jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16	import urllib.error
	17	import urllib.parse
	18
	19	from .common import InfoExtractor, SearchInfoExtractor
	20	from ..compat import functools
	21	from ..jsinterp import JSInterpreter
	22	from ..utils import (
	23	NO_DEFAULT,
	24	ExtractorError,
	25	UserNotLive,
	26	bug_reports_message,
	27	classproperty,
	28	clean_html,
	29	datetime_from_str,
	30	dict_get,
	31	float_or_none,
	32	format_field,
	33	get_first,
	34	int_or_none,
	35	is_html,
	36	join_nonempty,
	37	js_to_json,
	38	mimetype2ext,
	39	network_exceptions,
	40	orderedSet,
	41	parse_codecs,
	42	parse_count,
	43	parse_duration,
	44	parse_iso8601,
	45	parse_qs,
	46	qualities,
	47	remove_start,
	48	smuggle_url,
	49	str_or_none,
	50	str_to_int,
	51	strftime_or_none,
	52	traverse_obj,
	53	try_get,
	54	unescapeHTML,
	55	unified_strdate,
	56	unified_timestamp,
	57	unsmuggle_url,
	58	update_url_query,
	59	url_or_none,
	60	urljoin,
	61	variadic,
	62	)
	63
	64	# any clients starting with _ cannot be explicitly requested by the user
	65	INNERTUBE_CLIENTS = {
	66	'web': {
	67	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	68	'INNERTUBE_CONTEXT': {
	69	'client': {
	70	'clientName': 'WEB',
	71	'clientVersion': '2.20220801.00.00',
	72	}
	73	},
	74	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	75	},
	76	'web_embedded': {
	77	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	78	'INNERTUBE_CONTEXT': {
	79	'client': {
	80	'clientName': 'WEB_EMBEDDED_PLAYER',
	81	'clientVersion': '1.20220731.00.00',
	82	},
	83	},
	84	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	85	},
	86	'web_music': {
	87	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	88	'INNERTUBE_HOST': 'music.youtube.com',
	89	'INNERTUBE_CONTEXT': {
	90	'client': {
	91	'clientName': 'WEB_REMIX',
	92	'clientVersion': '1.20220727.01.00',
	93	}
	94	},
	95	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	96	},
	97	'web_creator': {
	98	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	99	'INNERTUBE_CONTEXT': {
	100	'client': {
	101	'clientName': 'WEB_CREATOR',
	102	'clientVersion': '1.20220726.00.00',
	103	}
	104	},
	105	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	106	},
	107	'android': {
	108	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	109	'INNERTUBE_CONTEXT': {
	110	'client': {
	111	'clientName': 'ANDROID',
	112	'clientVersion': '17.29.34',
	113	'androidSdkVersion': 30
	114	}
	115	},
	116	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	117	'REQUIRE_JS_PLAYER': False
	118	},
	119	'android_embedded': {
	120	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	121	'INNERTUBE_CONTEXT': {
	122	'client': {
	123	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	124	'clientVersion': '17.29.34',
	125	'androidSdkVersion': 30
	126	},
	127	},
	128	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	129	'REQUIRE_JS_PLAYER': False
	130	},
	131	'android_music': {
	132	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	133	'INNERTUBE_CONTEXT': {
	134	'client': {
	135	'clientName': 'ANDROID_MUSIC',
	136	'clientVersion': '5.16.51',
	137	'androidSdkVersion': 30
	138	}
	139	},
	140	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	141	'REQUIRE_JS_PLAYER': False
	142	},
	143	'android_creator': {
	144	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	145	'INNERTUBE_CONTEXT': {
	146	'client': {
	147	'clientName': 'ANDROID_CREATOR',
	148	'clientVersion': '22.28.100',
	149	'androidSdkVersion': 30
	150	},
	151	},
	152	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	153	'REQUIRE_JS_PLAYER': False
	154	},
	155	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	156	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	157	'ios': {
	158	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	159	'INNERTUBE_CONTEXT': {
	160	'client': {
	161	'clientName': 'IOS',
	162	'clientVersion': '17.30.1',
	163	'deviceModel': 'iPhone14,3',
	164	}
	165	},
	166	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	167	'REQUIRE_JS_PLAYER': False
	168	},
	169	'ios_embedded': {
	170	'INNERTUBE_CONTEXT': {
	171	'client': {
	172	'clientName': 'IOS_MESSAGES_EXTENSION',
	173	'clientVersion': '17.30.1',
	174	'deviceModel': 'iPhone14,3',
	175	},
	176	},
	177	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	178	'REQUIRE_JS_PLAYER': False
	179	},
	180	'ios_music': {
	181	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	182	'INNERTUBE_CONTEXT': {
	183	'client': {
	184	'clientName': 'IOS_MUSIC',
	185	'clientVersion': '5.18',
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_creator': {
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_CREATOR',
	195	'clientVersion': '22.29.101',
	196	},
	197	},
	198	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	199	'REQUIRE_JS_PLAYER': False
	200	},
	201	# mweb has 'ultralow' formats
	202	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	203	'mweb': {
	204	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	205	'INNERTUBE_CONTEXT': {
	206	'client': {
	207	'clientName': 'MWEB',
	208	'clientVersion': '2.20220801.00.00',
	209	}
	210	},
	211	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	212	},
	213	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	214	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	215	'tv_embedded': {
	216	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	217	'INNERTUBE_CONTEXT': {
	218	'client': {
	219	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	220	'clientVersion': '2.0',
	221	},
	222	},
	223	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	224	},
	225	}
	226
	227
	228	def _split_innertube_client(client_name):
	229	variant, *base = client_name.rsplit('.', 1)
	230	if base:
	231	return variant, base[0], variant
	232	base, *variant = client_name.split('_', 1)
	233	return client_name, base, variant[0] if variant else None
	234
	235
	236	def build_innertube_clients():
	237	THIRD_PARTY = {
	238	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	239	}
	240	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	241	priority = qualities(BASE_CLIENTS[::-1])
	242
	243	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	244	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	245	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	246	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	247	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	248
	249	_, base_client, variant = _split_innertube_client(client)
	250	ytcfg['priority'] = 10 * priority(base_client)
	251
	252	if not variant:
	253	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	254	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	255	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	256	embedscreen['priority'] -= 3
	257	elif variant == 'embedded':
	258	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	259	ytcfg['priority'] -= 2
	260	else:
	261	ytcfg['priority'] -= 3
	262
	263
	264	build_innertube_clients()
	265
	266
	267	class YoutubeBaseInfoExtractor(InfoExtractor):
	268	"""Provide base functions for Youtube extractors"""
	269
	270	_RESERVED_NAMES = (
	271	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	272	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	273	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	274	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	275
	276	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	277
	278	# _NETRC_MACHINE = 'youtube'
	279
	280	# If True it will raise an error if no login info is provided
	281	_LOGIN_REQUIRED = False
	282
	283	_INVIDIOUS_SITES = (
	284	# invidious-redirect websites
	285	r'(?:www\.)?redirect\.invidious\.io',
	286	r'(?:(?:www\|dev)\.)?invidio\.us',
	287	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	288	r'(?:www\.)?invidious\.pussthecat\.org',
	289	r'(?:www\.)?invidious\.zee\.li',
	290	r'(?:www\.)?invidious\.ethibox\.fr',
	291	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	292	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	293	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	294	# youtube-dl invidious instances list
	295	r'(?:(?:www\|no)\.)?invidiou\.sh',
	296	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	297	r'(?:www\.)?invidious\.kabi\.tk',
	298	r'(?:www\.)?invidious\.mastodon\.host',
	299	r'(?:www\.)?invidious\.zapashcanon\.fr',
	300	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	301	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	302	r'(?:www\.)?invidious\.himiko\.cloud',
	303	r'(?:www\.)?invidious\.reallyancient\.tech',
	304	r'(?:www\.)?invidious\.tube',
	305	r'(?:www\.)?invidiou\.site',
	306	r'(?:www\.)?invidious\.site',
	307	r'(?:www\.)?invidious\.xyz',
	308	r'(?:www\.)?invidious\.nixnet\.xyz',
	309	r'(?:www\.)?invidious\.048596\.xyz',
	310	r'(?:www\.)?invidious\.drycat\.fr',
	311	r'(?:www\.)?inv\.skyn3t\.in',
	312	r'(?:www\.)?tube\.poal\.co',
	313	r'(?:www\.)?tube\.connect\.cafe',
	314	r'(?:www\.)?vid\.wxzm\.sx',
	315	r'(?:www\.)?vid\.mint\.lgbt',
	316	r'(?:www\.)?vid\.puffyan\.us',
	317	r'(?:www\.)?yewtu\.be',
	318	r'(?:www\.)?yt\.elukerio\.org',
	319	r'(?:www\.)?yt\.lelux\.fi',
	320	r'(?:www\.)?invidious\.ggc-project\.de',
	321	r'(?:www\.)?yt\.maisputain\.ovh',
	322	r'(?:www\.)?ytprivate\.com',
	323	r'(?:www\.)?invidious\.13ad\.de',
	324	r'(?:www\.)?invidious\.toot\.koeln',
	325	r'(?:www\.)?invidious\.fdn\.fr',
	326	r'(?:www\.)?watch\.nettohikari\.com',
	327	r'(?:www\.)?invidious\.namazso\.eu',
	328	r'(?:www\.)?invidious\.silkky\.cloud',
	329	r'(?:www\.)?invidious\.exonip\.de',
	330	r'(?:www\.)?invidious\.riverside\.rocks',
	331	r'(?:www\.)?invidious\.blamefran\.net',
	332	r'(?:www\.)?invidious\.moomoo\.de',
	333	r'(?:www\.)?ytb\.trom\.tf',
	334	r'(?:www\.)?yt\.cyberhost\.uk',
	335	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	336	r'(?:www\.)?qklhadlycap4cnod\.onion',
	337	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	338	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	339	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	340	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	341	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	342	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	343	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	344	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	345	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	346	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	347	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	348	r'(?:www\.)?piped\.kavin\.rocks',
	349	r'(?:www\.)?piped\.silkky\.cloud',
	350	r'(?:www\.)?piped\.tokhmi\.xyz',
	351	r'(?:www\.)?piped\.moomoo\.me',
	352	r'(?:www\.)?il\.ax',
	353	r'(?:www\.)?piped\.syncpundit\.com',
	354	r'(?:www\.)?piped\.mha\.fi',
	355	r'(?:www\.)?piped\.mint\.lgbt',
	356	r'(?:www\.)?piped\.privacy\.com\.de',
	357	)
	358
	359	def _initialize_consent(self):
	360	cookies = self._get_cookies('https://www.youtube.com/')
	361	if cookies.get('__Secure-3PSID'):
	362	return
	363	consent_id = None
	364	consent = cookies.get('CONSENT')
	365	if consent:
	366	if 'YES' in consent.value:
	367	return
	368	consent_id = self._search_regex(
	369	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	370	if not consent_id:
	371	consent_id = random.randint(100, 999)
	372	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	373
	374	def _initialize_pref(self):
	375	cookies = self._get_cookies('https://www.youtube.com/')
	376	pref_cookie = cookies.get('PREF')
	377	pref = {}
	378	if pref_cookie:
	379	try:
	380	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	381	except ValueError:
	382	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	383	pref.update({'hl': 'en', 'tz': 'UTC'})
	384	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	385
	386	def _real_initialize(self):
	387	self._initialize_pref()
	388	self._initialize_consent()
	389	self._check_login_required()
	390
	391	def _check_login_required(self):
	392	if self._LOGIN_REQUIRED and not self._cookies_passed:
	393	self.raise_login_required('Login details are needed to download this content', method='cookies')
	394
	395	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	396	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	397
	398	def _get_default_ytcfg(self, client='web'):
	399	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	400
	401	def _get_innertube_host(self, client='web'):
	402	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	403
	404	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	405	# try_get but with fallback to default ytcfg client values when present
	406	_func = lambda y: try_get(y, getter, expected_type)
	407	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	408
	409	def _extract_client_name(self, ytcfg, default_client='web'):
	410	return self._ytcfg_get_safe(
	411	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	412	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	413
	414	def _extract_client_version(self, ytcfg, default_client='web'):
	415	return self._ytcfg_get_safe(
	416	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	417	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	418
	419	def _select_api_hostname(self, req_api_hostname, default_client=None):
	420	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	421	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	422
	423	def _extract_api_key(self, ytcfg=None, default_client='web'):
	424	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	425
	426	def _extract_context(self, ytcfg=None, default_client='web'):
	427	context = get_first(
	428	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	429	# Enforce language and tz for extraction
	430	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	431	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	432	return context
	433
	434	_SAPISID = None
	435
	436	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	437	time_now = round(time.time())
	438	if self._SAPISID is None:
	439	yt_cookies = self._get_cookies('https://www.youtube.com')
	440	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	441	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	442	sapisid_cookie = dict_get(
	443	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	444	if sapisid_cookie and sapisid_cookie.value:
	445	self._SAPISID = sapisid_cookie.value
	446	self.write_debug('Extracted SAPISID cookie')
	447	# SAPISID cookie is required if not already present
	448	if not yt_cookies.get('SAPISID'):
	449	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	450	self._set_cookie(
	451	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	452	else:
	453	self._SAPISID = False
	454	if not self._SAPISID:
	455	return None
	456	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	457	sapisidhash = hashlib.sha1(
	458	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	459	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	460
	461	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	462	note='Downloading API JSON', errnote='Unable to download API page',
	463	context=None, api_key=None, api_hostname=None, default_client='web'):
	464
	465	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	466	data.update(query)
	467	real_headers = self.generate_api_headers(default_client=default_client)
	468	real_headers.update({'content-type': 'application/json'})
	469	if headers:
	470	real_headers.update(headers)
	471	api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
	472	or api_key or self._extract_api_key(default_client=default_client))
	473	return self._download_json(
	474	f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
	475	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	476	data=json.dumps(data).encode('utf8'), headers=real_headers,
	477	query={'key': api_key, 'prettyPrint': 'false'})
	478
	479	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	480	return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
	481
	482	@staticmethod
	483	def _extract_session_index(*data):
	484	"""
	485	Index of current account in account list.
	486	See: https://github.com/yt-dlp/yt-dlp/pull/519
	487	"""
	488	for ytcfg in data:
	489	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	490	if session_index is not None:
	491	return session_index
	492
	493	# Deprecated?
	494	def _extract_identity_token(self, ytcfg=None, webpage=None):
	495	if ytcfg:
	496	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
	497	if token:
	498	return token
	499	if webpage:
	500	return self._search_regex(

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

20

from ..compat import functools

21

from ..jsinterp import JSInterpreter

22

from ..utils import (

NO_DEFAULT,

ExtractorError,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

65

INNERTUBE_CLIENTS = {

66

'web': {

67

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

68

'INNERTUBE_CONTEXT': {

69

'client': {

70

'clientName': 'WEB',

71

'clientVersion': '2.20220801.00.00',

72

}

73

},

74

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

75

},

76

'web_embedded': {

77

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

78

'INNERTUBE_CONTEXT': {

79

'client': {

80

'clientName': 'WEB_EMBEDDED_PLAYER',

81

'clientVersion': '1.20220731.00.00',

82

},

83

},

84

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

85

},

86

'web_music': {

87

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

88

'INNERTUBE_HOST': 'music.youtube.com',

89

'INNERTUBE_CONTEXT': {

90

'client': {

91

'clientName': 'WEB_REMIX',

92

'clientVersion': '1.20220727.01.00',

93

}

94

},

95

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

96

},

97

'web_creator': {

98

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

99

'INNERTUBE_CONTEXT': {

100

'client': {

101

'clientName': 'WEB_CREATOR',

102

'clientVersion': '1.20220726.00.00',

103

}

104

},

105

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

106

},

107

'android': {

108

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

109

'INNERTUBE_CONTEXT': {

110

'client': {

111

'clientName': 'ANDROID',

112

'clientVersion': '17.29.34',

113

'androidSdkVersion': 30

114

}

115

},

116

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

117

'REQUIRE_JS_PLAYER': False

118

},

119

'android_embedded': {

120

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

121

'INNERTUBE_CONTEXT': {

122

'client': {

123

'clientName': 'ANDROID_EMBEDDED_PLAYER',

124

'clientVersion': '17.29.34',

125

'androidSdkVersion': 30

126

},

127

},

128

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

129

'REQUIRE_JS_PLAYER': False

130

},

131

'android_music': {

132

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

133

'INNERTUBE_CONTEXT': {

134

'client': {

135

'clientName': 'ANDROID_MUSIC',

136

'clientVersion': '5.16.51',

137

'androidSdkVersion': 30

138

}

139

},

140

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

141

'REQUIRE_JS_PLAYER': False

142

},

143

'android_creator': {

144

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

145

'INNERTUBE_CONTEXT': {

146

'client': {

147

'clientName': 'ANDROID_CREATOR',

148

'clientVersion': '22.28.100',

149

'androidSdkVersion': 30

150

},

151

},

152

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

153

'REQUIRE_JS_PLAYER': False

154

},

155

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

156

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

157

'ios': {

158

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

159

'INNERTUBE_CONTEXT': {

160

'client': {

161

'clientName': 'IOS',

162

'clientVersion': '17.30.1',

163

'deviceModel': 'iPhone14,3',

164

}

165

},

166

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

167

'REQUIRE_JS_PLAYER': False

168

},

169

'ios_embedded': {

170

'INNERTUBE_CONTEXT': {

171

'client': {

172

'clientName': 'IOS_MESSAGES_EXTENSION',

173

'clientVersion': '17.30.1',

174

'deviceModel': 'iPhone14,3',

175

},

176

},

177

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

178

'REQUIRE_JS_PLAYER': False

179

},

180

'ios_music': {

181

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

182

'INNERTUBE_CONTEXT': {

183

'client': {

184

'clientName': 'IOS_MUSIC',

185

'clientVersion': '5.18',

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_creator': {

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_CREATOR',

195

'clientVersion': '22.29.101',

196

},

197

},

198

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

199

'REQUIRE_JS_PLAYER': False

200

},

201

# mweb has 'ultralow' formats

202

# See: https://github.com/yt-dlp/yt-dlp/pull/557

203

'mweb': {

204

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

205

'INNERTUBE_CONTEXT': {

206

'client': {

207

'clientName': 'MWEB',

208

'clientVersion': '2.20220801.00.00',

209

}

210

},

211

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

212

},

213

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

214

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

215

'tv_embedded': {

216

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

217

'INNERTUBE_CONTEXT': {

218

'client': {

219

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

220

'clientVersion': '2.0',

221

},

222

},

223

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

229

variant, *base = client_name.rsplit('.', 1)

230

if base:

231

return variant, base[0], variant

232

base, *variant = client_name.split('_', 1)

233

return client_name, base, variant[0] if variant else None

234

235

236

def build_innertube_clients():

237

THIRD_PARTY = {

238

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

239

}

240

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

241

priority = qualities(BASE_CLIENTS[::-1])

242

243

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

244

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

245

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

246

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

247

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

248

249

_, base_client, variant = _split_innertube_client(client)

250

ytcfg['priority'] = 10 * priority(base_client)

251

252

if not variant:

253

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

254

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

255

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

256

embedscreen['priority'] -= 3

257

elif variant == 'embedded':

258

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

259

ytcfg['priority'] -= 2

260

else:

261

ytcfg['priority'] -= 3

262

263

264

build_innertube_clients()

265

266

267

class YoutubeBaseInfoExtractor(InfoExtractor):

268

"""Provide base functions for Youtube extractors"""

269

270

_RESERVED_NAMES = (

271

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

277

278

# _NETRC_MACHINE = 'youtube'

279

280

# If True it will raise an error if no login info is provided

281

_LOGIN_REQUIRED = False

282

283

_INVIDIOUS_SITES = (

284

# invidious-redirect websites

285

r'(?:www\.)?redirect\.invidious\.io',

286

r'(?:(?:www|dev)\.)?invidio\.us',

287

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

288

r'(?:www\.)?invidious\.pussthecat\.org',

289

r'(?:www\.)?invidious\.zee\.li',

290

r'(?:www\.)?invidious\.ethibox\.fr',

291

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

292

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

293

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

294

# youtube-dl invidious instances list

295

r'(?:(?:www|no)\.)?invidiou\.sh',

296

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

297

r'(?:www\.)?invidious\.kabi\.tk',

298

r'(?:www\.)?invidious\.mastodon\.host',

299

r'(?:www\.)?invidious\.zapashcanon\.fr',

300

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

301

r'(?:www\.)?invidious\.tinfoil-hat\.net',

302

r'(?:www\.)?invidious\.himiko\.cloud',

303

r'(?:www\.)?invidious\.reallyancient\.tech',

304

r'(?:www\.)?invidious\.tube',

305

r'(?:www\.)?invidiou\.site',

306

r'(?:www\.)?invidious\.site',

307

r'(?:www\.)?invidious\.xyz',

308

r'(?:www\.)?invidious\.nixnet\.xyz',

309

r'(?:www\.)?invidious\.048596\.xyz',

310

r'(?:www\.)?invidious\.drycat\.fr',

311

r'(?:www\.)?inv\.skyn3t\.in',

312

r'(?:www\.)?tube\.poal\.co',

313

r'(?:www\.)?tube\.connect\.cafe',

314

r'(?:www\.)?vid\.wxzm\.sx',

315

r'(?:www\.)?vid\.mint\.lgbt',

316

r'(?:www\.)?vid\.puffyan\.us',

317

r'(?:www\.)?yewtu\.be',

318

r'(?:www\.)?yt\.elukerio\.org',

319

r'(?:www\.)?yt\.lelux\.fi',

320

r'(?:www\.)?invidious\.ggc-project\.de',

321

r'(?:www\.)?yt\.maisputain\.ovh',

322

r'(?:www\.)?ytprivate\.com',

323

r'(?:www\.)?invidious\.13ad\.de',

324

r'(?:www\.)?invidious\.toot\.koeln',

325

r'(?:www\.)?invidious\.fdn\.fr',

326

r'(?:www\.)?watch\.nettohikari\.com',

327

r'(?:www\.)?invidious\.namazso\.eu',

328

r'(?:www\.)?invidious\.silkky\.cloud',

329

r'(?:www\.)?invidious\.exonip\.de',

330

r'(?:www\.)?invidious\.riverside\.rocks',

331

r'(?:www\.)?invidious\.blamefran\.net',

332

r'(?:www\.)?invidious\.moomoo\.de',

333

r'(?:www\.)?ytb\.trom\.tf',

334

r'(?:www\.)?yt\.cyberhost\.uk',

335

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

336

r'(?:www\.)?qklhadlycap4cnod\.onion',

337

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

338

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

339

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

340

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

341

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

342

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

343

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

344

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

345

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

346

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

347

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

348

r'(?:www\.)?piped\.kavin\.rocks',

349

r'(?:www\.)?piped\.silkky\.cloud',

350

r'(?:www\.)?piped\.tokhmi\.xyz',

351

r'(?:www\.)?piped\.moomoo\.me',

352

r'(?:www\.)?il\.ax',

353

r'(?:www\.)?piped\.syncpundit\.com',

354

r'(?:www\.)?piped\.mha\.fi',

355

r'(?:www\.)?piped\.mint\.lgbt',

356

r'(?:www\.)?piped\.privacy\.com\.de',

357

)

358

359

def _initialize_consent(self):

360

cookies = self._get_cookies('https://www.youtube.com/')

361

if cookies.get('__Secure-3PSID'):

362

return

363

consent_id = None

364

consent = cookies.get('CONSENT')

365

if consent:

366

if 'YES' in consent.value:

367

return

368

consent_id = self._search_regex(

369

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

370

if not consent_id:

371

consent_id = random.randint(100, 999)

372

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

373

374

def _initialize_pref(self):

375

cookies = self._get_cookies('https://www.youtube.com/')

376

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

381

except ValueError:

382

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

383

pref.update({'hl': 'en', 'tz': 'UTC'})

384

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

385

386

def _real_initialize(self):

387

self._initialize_pref()

388

self._initialize_consent()

389

self._check_login_required()

390

391

def _check_login_required(self):

392

if self._LOGIN_REQUIRED and not self._cookies_passed:

393

self.raise_login_required('Login details are needed to download this content', method='cookies')

394

395

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

396

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

397

398

def _get_default_ytcfg(self, client='web'):

399

return copy.deepcopy(INNERTUBE_CLIENTS[client])

400

401

def _get_innertube_host(self, client='web'):

402

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

403

404

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

405

# try_get but with fallback to default ytcfg client values when present

406

_func = lambda y: try_get(y, getter, expected_type)

407

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

408

409

def _extract_client_name(self, ytcfg, default_client='web'):

410

return self._ytcfg_get_safe(

411

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

412

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

413

414

def _extract_client_version(self, ytcfg, default_client='web'):

415

return self._ytcfg_get_safe(

416

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

417

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

418

419

def _select_api_hostname(self, req_api_hostname, default_client=None):

420

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

421

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

422

423

def _extract_api_key(self, ytcfg=None, default_client='web'):

424

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

425

426

def _extract_context(self, ytcfg=None, default_client='web'):

427

context = get_first(

428

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

429

# Enforce language and tz for extraction

430

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

431

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

437

time_now = round(time.time())

438

if self._SAPISID is None:

439

yt_cookies = self._get_cookies('https://www.youtube.com')

440

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

441

# See: https://github.com/yt-dlp/yt-dlp/issues/393

442

sapisid_cookie = dict_get(

443

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

444

if sapisid_cookie and sapisid_cookie.value:

445

self._SAPISID = sapisid_cookie.value

446

self.write_debug('Extracted SAPISID cookie')

447

# SAPISID cookie is required if not already present

448

if not yt_cookies.get('SAPISID'):

449

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

450

self._set_cookie(

451

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

452

else:

453

self._SAPISID = False

454

if not self._SAPISID:

455

return None

456

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

457

sapisidhash = hashlib.sha1(

458

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

459

return f'SAPISIDHASH {time_now}_{sapisidhash}'

460

461

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

462

note='Downloading API JSON', errnote='Unable to download API page',

463

context=None, api_key=None, api_hostname=None, default_client='web'):

464

465

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

466

data.update(query)

467

real_headers = self.generate_api_headers(default_client=default_client)

468

real_headers.update({'content-type': 'application/json'})

469

if headers:

470

real_headers.update(headers)

471

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

472

or api_key or self._extract_api_key(default_client=default_client))

473

return self._download_json(

474

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

475

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

476

data=json.dumps(data).encode('utf8'), headers=real_headers,

477

query={'key': api_key, 'prettyPrint': 'false'})

478

479

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

480

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

481

482

@staticmethod

483

def _extract_session_index(*data):

484

"""

485

Index of current account in account list.

486

See: https://github.com/yt-dlp/yt-dlp/pull/519

487

"""

488

for ytcfg in data:

489

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

490

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

495

if ytcfg:

496

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

501

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

502

'identity token', default=None, fatal=False)

503

504

@staticmethod

505

def _extract_account_syncid(*args):

506

"""

507

Extract syncId required to download private playlists of secondary channels

508

@params response and/or ytcfg

509

"""

510

for data in args:

511

# ytcfg includes channel_syncid if on secondary channel

512

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

517

lambda x: x['DATASYNC_ID']), str) or '').split('||')

518

if len(sync_ids) >= 2 and sync_ids[1]:

519

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

520

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

525

"""

526

Extracts visitorData from an API response or ytcfg

527

Appears to be used to track session state

528

"""

529

return get_first(

530

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

531

expected_type=str)

532

533

@functools.cached_property

534

def is_authenticated(self):

535

return bool(self._generate_sapisidhash_header())

536

537

def extract_ytcfg(self, video_id, webpage):

538

if not webpage:

539

return {}

540

return self._parse_json(

541

self._search_regex(

542

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

543

default='{}'), video_id, fatal=False) or {}

544

545

def generate_api_headers(

546

self, *, ytcfg=None, account_syncid=None, session_index=None,

547

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

548

549

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

550

headers = {

551

'X-YouTube-Client-Name': str(

552

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

553

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

554

'Origin': origin,

555

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

556

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

557

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

558

}

559

if session_index is None:

560

session_index = self._extract_session_index(ytcfg)

561

if account_syncid or session_index is not None:

562

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

563

564

auth = self._generate_sapisidhash_header(origin)

565

if auth is not None:

566

headers['Authorization'] = auth

567

headers['X-Origin'] = origin

568

return {h: v for h, v in headers.items() if v is not None}

569

570

def _download_ytcfg(self, client, video_id):

571

url = {

572

'web': 'https://www.youtube.com',

573

'web_music': 'https://music.youtube.com',

574

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

579

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

580

return self.extract_ytcfg(video_id, webpage) or {}

581

582

@staticmethod

583

def _build_api_continuation_query(continuation, ctp=None):

584

query = {

585

'continuation': continuation

586

}

587

# TODO: Inconsistency with clickTrackingParams.

588

# Currently we have a fixed ctp contained within context (from ytcfg)

589

# and a ctp in root query for continuation.

590

if ctp:

591

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

596

next_continuation = try_get(

597

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

598

lambda x: x['continuation']['reloadContinuationData']), dict)

599

if not next_continuation:

600

return

601

continuation = next_continuation.get('continuation')

602

if not continuation:

603

return

604

ctp = next_continuation.get('clickTrackingParams')

605

return cls._build_api_continuation_query(continuation, ctp)

606

607

@classmethod

608

def _extract_continuation_ep_data(cls, continuation_ep: dict):

609

if isinstance(continuation_ep, dict):

610

continuation = try_get(

611

continuation_ep, lambda x: x['continuationCommand']['token'], str)

612

if not continuation:

613

return

614

ctp = continuation_ep.get('clickTrackingParams')

615

return cls._build_api_continuation_query(continuation, ctp)

616

617

@classmethod

618

def _extract_continuation(cls, renderer):

619

next_continuation = cls._extract_next_continuation_data(renderer)

620

if next_continuation:

621

return next_continuation

622

623

contents = []

624

for key in ('contents', 'items'):

625

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

626

627

for content in contents:

628

if not isinstance(content, dict):

629

continue

630

continuation_ep = try_get(

631

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

632

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

633

dict)

634

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

640

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

641

if not isinstance(alert_dict, dict):

642

continue

643

for alert in alert_dict.values():

644

alert_type = alert.get('type')

645

if not alert_type:

646

continue

647

message = cls._get_text(alert, 'text')

648

if message:

649

yield alert_type, message

650

651

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

652

errors = []

653

warnings = []

654

for alert_type, alert_message in alerts:

655

if alert_type.lower() == 'error' and fatal:

656

errors.append([alert_type, alert_message])

657

else:

658

warnings.append([alert_type, alert_message])

659

660

for alert_type, alert_message in (warnings + errors[:-1]):

661

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

662

if errors:

663

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

664

665

def _extract_and_report_alerts(self, data, *args, **kwargs):

666

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

667

668

def _extract_badges(self, renderer: dict):

669

badges = set()

670

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

671

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)

672

if label:

673

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

678

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

683

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

684

obj = [obj]

685

for item in obj:

686

text = try_get(item, lambda x: x['simpleText'], str)

687

if text:

688

return text

689

runs = try_get(item, lambda x: x['runs'], list) or []

690

if not runs and isinstance(item, list):

691

runs = item

692

693

runs = runs[:min(len(runs), max_runs or len(runs))]

694

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

699

count_text = self._get_text(data, *path_list) or ''

700

count = parse_count(count_text)

701

if count is None:

702

count = str_to_int(

703

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

708

"""

709

Extract thumbnails from thumbnails dict

710

@param path_list: path list to level that contains 'thumbnails' key

711

"""

712

thumbnails = []

713

for path in path_list or [()]:

714

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

715

thumbnail_url = url_or_none(thumbnail.get('url'))

716

if not thumbnail_url:

717

continue

718

# Sometimes youtube gives a wrong thumbnail URL. See:

719

# https://github.com/yt-dlp/yt-dlp/issues/233

720

# https://github.com/ytdl-org/youtube-dl/issues/28023

721

if 'maxresdefault' in thumbnail_url:

722

thumbnail_url = thumbnail_url.split('?')[0]

723

thumbnails.append({

724

'url': thumbnail_url,

725

'height': int_or_none(thumbnail.get('height')),

726

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

732

"""

733

Extracts a relative time from string and converts to dt object

734

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

739

if start:

740

return datetime_from_str(start)

741

try:

742

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

747

"""@returns (timestamp, time_text)"""

748

text = self._get_text(renderer, *path_list) or ''

749

dt = self.extract_relative_time(text)

750

timestamp = None

751

if isinstance(dt, datetime.datetime):

752

timestamp = calendar.timegm(dt.timetuple())

753

754

if timestamp is None:

755

timestamp = (

756

unified_timestamp(text) or unified_timestamp(

757

self._search_regex(

758

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

759

text.lower(), 'time text', default=None)))

760

761

if text and timestamp is None:

762

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

763

return timestamp, text

764

765

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

766

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

767

default_client='web'):

768

for retry in self.RetryManager():

769

try:

770

response = self._call_api(

771

ep=ep, fatal=True, headers=headers,

772

video_id=item_id, query=query, note=note,

773

context=self._extract_context(ytcfg, default_client),

774

api_key=self._extract_api_key(ytcfg, default_client),

775

api_hostname=api_hostname, default_client=default_client)

776

except ExtractorError as e:

777

if not isinstance(e.cause, network_exceptions):

778

return self._error_or_warning(e, fatal=fatal)

779

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

784

if not is_html(first_bytes):

785

yt_error = try_get(

786

self._parse_json(

787

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

788

lambda x: x['error']['message'], str)

789

if yt_error:

790

self._report_alerts([('ERROR', yt_error)], fatal=False)

791

# Downloading page may result in intermittent 5xx HTTP error

792

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

793

# We also want to catch all other network exceptions since errors in later pages can be troublesome

794

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

795

if e.cause.code not in (403, 429):

796

retry.error = e

797

continue

798

return self._error_or_warning(e, fatal=fatal)

799

800

try:

801

self._extract_and_report_alerts(response, only_once=True)

802

except ExtractorError as e:

803

# YouTube servers may return errors we want to retry on in a 200 OK response

804

# See: https://github.com/yt-dlp/yt-dlp/issues/839

805

if 'unknown error' in e.msg.lower():

806

retry.error = e

807

continue

808

return self._error_or_warning(e, fatal=fatal)

809

# Youtube sometimes sends incomplete data

810

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

811

if not traverse_obj(response, *variadic(check_get_keys)):

812

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

819

return re.match(r'https?://music\.youtube\.com/', url) is not None

820

821

def _extract_video(self, renderer):

822

video_id = renderer.get('videoId')

823

title = self._get_text(renderer, 'title')

824

description = self._get_text(renderer, 'descriptionSnippet')

825

duration = parse_duration(self._get_text(

826

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

827

if duration is None:

828

duration = parse_duration(self._search_regex(

829

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

830

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

831

video_id, default=None, group='duration'))

832

833

view_count = self._get_count(renderer, 'viewCountText')

834

835

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

836

channel_id = traverse_obj(

837

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

838

expected_type=str, get_all=False)

839

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

840

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

841

overlay_style = traverse_obj(

842

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

843

get_all=False, expected_type=str)

844

badges = self._extract_badges(renderer)

845

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

846

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

847

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

848

expected_type=str)) or ''

849

url = f'https://www.youtube.com/watch?v={video_id}'

850

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

851

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

860

'duration': duration,

861

'view_count': view_count,

862

'uploader': uploader,

863

'channel_id': channel_id,

864

'thumbnails': thumbnails,

865

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

866

if self._configuration_arg('approximate_date', ie_key='youtubetab')

867

else None),

868

'live_status': ('is_upcoming' if scheduled_timestamp is not None

869

else 'was_live' if 'streamed' in time_text.lower()

870

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

871

else None),

872

'release_timestamp': scheduled_timestamp,

873

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

878

IE_DESC = 'YouTube'

879

_VALID_URL = r"""(?x)^

880

(

881

(?:https?://|//) # http(s):// or protocol-independent URL

882

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

883

(?:www\.)?deturl\.com/www\.youtube\.com|

884

(?:www\.)?pwnyoutube\.com|

885

(?:www\.)?hooktube\.com|

886

(?:www\.)?yourepeat\.com|

887

tube\.majestyc\.net|

888

%(invidious)s|

889

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

890

(?:.*?\#/)? # handle anchor (#/) redirect urls

891

(?: # the various things that can precede the ID:

892

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

893

|(?: # or the v= param in all its forms

894

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

895

(?:\?|\#!?) # the params delimiter ? or # or #!

896

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

902

vid\.plus| # or vid.plus/xxxx

903

zwearz\.com/watch| # or zwearz.com/watch/xxxx

904

%(invidious)s

905

)/

906

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

907

)

908

)? # all until now is optional -> you can pass the naked ID

909

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

910

(?(1).+)? # if we found the ID, everything can follow

911

(?:\#|$)""" % {

912

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

913

}

914

_EMBED_REGEX = [r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

925

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

926

\1''']

927

_PLAYER_INFO_RE = (

928

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

929

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

930

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

931

)

932

_formats = {

933

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

934

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

935

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

936

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

937

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

938

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

939

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

940

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

941

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

942

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

943

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

944

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

945

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

946

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

947

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

948

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

949

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

950

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

955

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

956

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

957

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

958

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

959

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

960

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

961

962

# Apple HTTP Live Streaming

963

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

964

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

965

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

966

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

967

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

968

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

969

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

970

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

971

972

# DASH mp4 video

973

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

974

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

975

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

976

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

977

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

978

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

979

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

980

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

981

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

982

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

983

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

984

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

986

# Dash mp4 audio

987

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

988

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

989

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

990

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

991

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

992

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

993

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

994

995

# Dash webm

996

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

997

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

998

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

999

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1000

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1001

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1002

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1003

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1004

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1005

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1006

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1007

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1008

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1009

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1010

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1011

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1012

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1013

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1014

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1015

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1016

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1018

1019

# Dash webm audio

1020

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1021

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1022

1023

# Dash webm audio with opus inside

1024

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1025

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1026

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1027

1028

# RTMP (unnamed)

1029

'_rtmp': {'protocol': 'rtmp'},

1030

1031

# av01 video only formats sometimes served with "unknown" codecs

1032

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1033

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1034

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1035

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1036

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1037

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1038

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1039

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1040

}

1041

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1053

'uploader': 'Philipp Hagemeister',

1054

'uploader_id': 'phihag',

1055

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1056

'channel': 'Philipp Hagemeister',

1057

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1058

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1059

'upload_date': '20121002',

1060

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1061

'categories': ['Science & Technology'],

1062

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1067

'playable_in_embed': True,

1068

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1069

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1074

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1079

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1084

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1085

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1086

'uploader': 'SET India',

1087

'uploader_id': 'setindia',

1088

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1089

'age_limit': 18,

1090

},

1091

'skip': 'Private video',

1092

},

1093

{

1094

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1095

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1100

'uploader': 'Philipp Hagemeister',

1101

'uploader_id': 'phihag',

1102

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1103

'channel': 'Philipp Hagemeister',

1104

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1105

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1106

'upload_date': '20121002',

1107

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1108

'categories': ['Science & Technology'],

1109

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1114

'playable_in_embed': True,

1115

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1116

'live_status': 'not_live',

1117

'age_limit': 0,

1118

'comment_count': int,

1119

'channel_follower_count': int

1120

},

1121

'params': {

1122

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1127

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1132

'uploader_id': '8KVIDEO',

1133

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1134

'description': '',

1135

'uploader': '8KVIDEO',

1136

'title': 'UHDTV TEST 8K VIDEO.mp4'

1137

},

1138

'params': {

1139

'youtube_include_dash_manifest': True,

1140

'format': '141',

1141

},

1142

'skip': 'format 141 not served anymore',

1143

},

1144

# DASH manifest with encrypted signature

1145

{

1146

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1151

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1152

'duration': 244,

1153

'uploader': 'AfrojackVEVO',

1154

'uploader_id': 'AfrojackVEVO',

1155

'upload_date': '20131011',

1156

'abr': 129.495,

1157

'like_count': int,

1158

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1159

'playable_in_embed': True,

1160

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1161

'view_count': int,

1162

'track': 'The Spark',

1163

'live_status': 'not_live',

1164

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1165

'channel': 'Afrojack',

1166

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1167

'tags': 'count:19',

1168

'availability': 'public',

1169

'categories': ['Music'],

1170

'age_limit': 0,

1171

'alt_title': 'The Spark',

1172

'channel_follower_count': int

1173

},

1174

'params': {

1175

'youtube_include_dash_manifest': True,

1176

'format': '141/bestaudio[ext=m4a]',

1177

},

1178

},

1179

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1180

{

1181

'note': 'Embed allowed age-gate video',

1182

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1187

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1188

'duration': 142,

1189

'uploader': 'The Witcher',

1190

'uploader_id': 'WitcherGame',

1191

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1192

'upload_date': '20140605',

1193

'age_limit': 18,

1194

'categories': ['Gaming'],

1195

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1196

'availability': 'needs_auth',

1197

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1198

'like_count': int,

1199

'channel': 'The Witcher',

1200

'live_status': 'not_live',

1201

'tags': 'count:17',

1202

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1203

'playable_in_embed': True,

1204

'view_count': int,

1205

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1210

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1215

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1216

'upload_date': '20200408',

1217

'uploader_id': 'FlyingKitty900',

1218

'uploader': 'FlyingKitty',

1219

'age_limit': 18,

1220

'availability': 'needs_auth',

1221

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1222

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1223

'channel': 'FlyingKitty',

1224

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1225

'view_count': int,

1226

'categories': ['Entertainment'],

1227

'live_status': 'not_live',

1228

'tags': ['Flyingkitty', 'godzilla 2'],

1229

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1230

'like_count': int,

1231

'duration': 177,

1232

'playable_in_embed': True,

1233

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1238

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1239

'info_dict': {

1240

'id': 'Tq92D6wQ1mg',

1241

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1242

'ext': 'mp4',

1243

'upload_date': '20191228',

1244

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1245

'uploader': 'Projekt Melody',

1246

'description': 'md5:17eccca93a786d51bc67646756894066',

1247

'age_limit': 18,

1248

'like_count': int,

1249

'availability': 'needs_auth',

1250

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1251

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1252

'view_count': int,

1253

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1254

'channel': 'Projekt Melody',

1255

'live_status': 'not_live',

1256

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1257

'playable_in_embed': True,

1258

'categories': ['Entertainment'],

1259

'duration': 106,

1260

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1261

'comment_count': int,

1262

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1267

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1272

'uploader': 'Herr Lurik',

1273

'uploader_id': 'st3in234',

1274

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1275

'upload_date': '20130730',

1276

'track': 'Such mich find mich',

1277

'age_limit': 0,

1278

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1279

'like_count': int,

1280

'playable_in_embed': False,

1281

'creator': 'OOMPH!',

1282

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1283

'view_count': int,

1284

'alt_title': 'Such mich find mich',

1285

'duration': 210,

1286

'channel': 'Herr Lurik',

1287

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1288

'categories': ['Music'],

1289

'availability': 'public',

1290

'uploader_url': 'http://www.youtube.com/user/st3in234',

1291

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1292

'live_status': 'not_live',

1293

'artist': 'OOMPH!',

1294

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1299

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1300

'only_matching': True,

1301

},

1302

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1303

# YouTube Red ad is not captured for creator

1304

{

1305

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1311

'uploader_id': 'deadmau5',

1312

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1313

'creator': 'deadmau5',

1314

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1315

'uploader': 'deadmau5',

1316

'title': 'Deadmau5 - Some Chords (HD)',

1317

'alt_title': 'Some Chords',

1318

'availability': 'public',

1319

'tags': 'count:14',

1320

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1321

'view_count': int,

1322

'live_status': 'not_live',

1323

'channel': 'deadmau5',

1324

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1325

'like_count': int,

1326

'track': 'Some Chords',

1327

'artist': 'deadmau5',

1328

'playable_in_embed': True,

1329

'age_limit': 0,

1330

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1331

'categories': ['Music'],

1332

'album': 'Some Chords',

1333

'channel_follower_count': int

1334

},

1335

'expected_warnings': [

1336

'DASH manifest missing',

1337

]

1338

},

1339

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1340

{

1341

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1347

'uploader_id': 'olympic',

1348

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1349

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1350

'uploader': 'Olympics',

1351

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1352

'like_count': int,

1353

'release_timestamp': 1343767800,

1354

'playable_in_embed': True,

1355

'categories': ['Sports'],

1356

'release_date': '20120731',

1357

'channel': 'Olympics',

1358

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1359

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1360

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1361

'age_limit': 0,

1362

'availability': 'public',

1363

'live_status': 'was_live',

1364

'view_count': int,

1365

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1366

'channel_follower_count': int

1367

},

1368

'params': {

1369

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1379

'duration': 85,

1380

'upload_date': '20110310',

1381

'uploader_id': 'AllenMeow',

1382

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1383

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1384

'uploader': '孫ᄋᄅ',

1385

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1386

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1391

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1392

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1393

'view_count': int,

1394

'categories': ['People & Blogs'],

1395

'like_count': int,

1396

'live_status': 'not_live',

1397

'availability': 'unlisted',

1398

'comment_count': int,

1399

'channel_follower_count': int

1400

},

1401

},

1402

# url_encoded_fmt_stream_map is empty string

1403

{

1404

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1409

'description': '',

1410

'upload_date': '20150404',

1411

'uploader_id': 'spbelect',

1412

'uploader': 'Наблюдатели Петербурга',

1413

},

1414

'params': {

1415

'skip_download': 'requires avconv',

1416

},

1417

'skip': 'This live event has ended.',

1418

},

1419

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1420

{

1421

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1426

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1427

'duration': 220,

1428

'upload_date': '20150625',

1429

'uploader_id': 'dorappi2000',

1430

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1431

'uploader': 'dorappi2000',

1432

'formats': 'mincount:31',

1433

},

1434

'skip': 'not actual anymore',

1435

},

1436

# DASH manifest with segment_list

1437

{

1438

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1439

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1444

'uploader': 'Airtek',

1445

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1446

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1447

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1448

},

1449

'params': {

1450

'youtube_include_dash_manifest': True,

1451

'format': '135', # bestvideo

1452

},

1453

'skip': 'This live event has ended.',

1454

},

1455

{

1456

# Multifeed videos (multiple cameras), URL is for Main Camera

1457

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1458

'info_dict': {

1459

'id': 'jvGDaLqkpTg',

1460

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1461

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1468

'description': 'md5:e03b909557865076822aa169218d6a5d',

1469

'duration': 10643,

1470

'upload_date': '20161111',

1471

'uploader': 'Team PGP',

1472

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1473

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1480

'description': 'md5:e03b909557865076822aa169218d6a5d',

1481

'duration': 10991,

1482

'upload_date': '20161111',

1483

'uploader': 'Team PGP',

1484

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1485

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1492

'description': 'md5:e03b909557865076822aa169218d6a5d',

1493

'duration': 10995,

1494

'upload_date': '20161111',

1495

'uploader': 'Team PGP',

1496

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1497

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1504

'description': 'md5:e03b909557865076822aa169218d6a5d',

1505

'duration': 10990,

1506

'upload_date': '20161111',

1507

'uploader': 'Team PGP',

1508

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1509

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1514

},

1515

'skip': 'Not multifeed anymore',

1516

},

1517

{

1518

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1519

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1520

'info_dict': {

1521

'id': 'gVfLd0zydlo',

1522

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1523

},

1524

'playlist_count': 2,

1525

'skip': 'Not multifeed anymore',

1526

},

1527

{

1528

'url': 'https://vid.plus/FlRa-iH7PGw',

1529

'only_matching': True,

1530

},

1531

{

1532

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1533

'only_matching': True,

1534

},

1535

{

1536

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1537

# Also tests cut-off URL expansion in video description (see

1538

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1539

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1540

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1545

'alt_title': 'Dark Walk',

1546

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1547

'duration': 133,

1548

'upload_date': '20151119',

1549

'uploader_id': 'IronSoulElf',

1550

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1551

'uploader': 'IronSoulElf',

1552

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1553

'track': 'Dark Walk',

1554

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1555

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1556

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1557

'categories': ['Film & Animation'],

1558

'view_count': int,

1559

'live_status': 'not_live',

1560

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1561

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1562

'tags': 'count:13',

1563

'availability': 'public',

1564

'channel': 'IronSoulElf',

1565

'playable_in_embed': True,

1566

'like_count': int,

1567

'age_limit': 0,

1568

'channel_follower_count': int

1569

},

1570

'params': {

1571

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1576

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1577

'only_matching': True,

1578

},

1579

{

1580

# Video with yt:stretch=17:0

1581

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1586

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1587

'upload_date': '20151107',

1588

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1589

'uploader': 'CH GAMER DROID',

1590

},

1591

'params': {

1592

'skip_download': True,

1593

},

1594

'skip': 'This video does not exist.',

1595

},

1596

{

1597

# Video with incomplete 'yt:stretch=16:'

1598

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1599

'only_matching': True,

1600

},

1601

{

1602

# Video licensed under Creative Commons

1603

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1608

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1609

'duration': 721,

1610

'upload_date': '20150128',

1611

'uploader_id': 'BerkmanCenter',

1612

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1613

'uploader': 'The Berkman Klein Center for Internet & Society',

1614

'license': 'Creative Commons Attribution license (reuse allowed)',

1615

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1616

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1617

'like_count': int,

1618

'age_limit': 0,

1619

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1620

'channel': 'The Berkman Klein Center for Internet & Society',

1621

'availability': 'public',

1622

'view_count': int,

1623

'categories': ['Education'],

1624

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1625

'live_status': 'not_live',

1626

'playable_in_embed': True,

1627

'comment_count': int,

1628

'channel_follower_count': int

1629

},

1630

'params': {

1631

'skip_download': True,

},

},

{

# Channel-like uploader_url

1636

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1641

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1642

'duration': 4060,

1643

'upload_date': '20151120',

1644

'uploader': 'Bernie Sanders',

1645

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1646

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1647

'license': 'Creative Commons Attribution license (reuse allowed)',

1648

'playable_in_embed': True,

1649

'tags': 'count:12',

1650

'like_count': int,

1651

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1652

'age_limit': 0,

1653

'availability': 'public',

1654

'categories': ['News & Politics'],

1655

'channel': 'Bernie Sanders',

1656

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1657

'view_count': int,

1658

'live_status': 'not_live',

1659

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1660

'comment_count': int,

1661

'channel_follower_count': int

1662

},

1663

'params': {

1664

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1669

'only_matching': True,

1670

},

1671

{

1672

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1673

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1674

'only_matching': True,

1675

},

1676

{

1677

# Rental video preview

1678

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1683

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1684

'upload_date': '20150811',

1685

'uploader': 'FlixMatrix',

1686

'uploader_id': 'FlixMatrixKaravan',

1687

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1688

'license': 'Standard YouTube License',

1689

},

1690

'params': {

1691

'skip_download': True,

1692

},

1693

'skip': 'This video is not available.',

1694

},

1695

{

1696

# YouTube Red video with episode data

1697

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1702

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1703

'duration': 2085,

1704

'upload_date': '20170118',

1705

'uploader': 'Vsauce',

1706

'uploader_id': 'Vsauce',

1707

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1708

'series': 'Mind Field',

1709

'season_number': 1,

1710

'episode_number': 1,

1711

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1712

'tags': 'count:12',

1713

'view_count': int,

1714

'availability': 'public',

1715

'age_limit': 0,

1716

'channel': 'Vsauce',

1717

'episode': 'Episode 1',

1718

'categories': ['Entertainment'],

1719

'season': 'Season 1',

1720

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1721

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1722

'like_count': int,

1723

'playable_in_embed': True,

1724

'live_status': 'not_live',

1725

'channel_follower_count': int

1726

},

1727

'params': {

1728

'skip_download': True,

1729

},

1730

'expected_warnings': [

1731

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1736

# as inappropriate or offensive to some audiences.

1737

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1742

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1743

'duration': 965,

1744

'upload_date': '20140124',

1745

'uploader': 'New Century Foundation',

1746

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1747

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1748

},

1749

'params': {

1750

'skip_download': True,

1751

},

1752

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1757

'only_matching': True,

1758

},

1759

{

1760

# geo restricted to JP

1761

'url': 'sJL6WA-aGkQ',

1762

'only_matching': True,

1763

},

1764

{

1765

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1766

'only_matching': True,

1767

},

1768

{

1769

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1770

'only_matching': True,

1771

},

1772

{

1773

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1774

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1775

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1780

'only_matching': True,

1781

},

1782

{

1783

# Video with unsupported adaptive stream type formats

1784

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1789

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1790

'duration': 433,

1791

'upload_date': '20130923',

1792

'uploader': 'Amelia Putri Harwita',

1793

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1794

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1795

'formats': 'maxcount:10',

1796

},

1797

'params': {

1798

'skip_download': True,

1799

'youtube_include_dash_manifest': False,

1800

},

1801

'skip': 'not actual anymore',

1802

},

1803

{

1804

# Youtube Music Auto-generated description

1805

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1810

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1811

'upload_date': '20190312',

1812

'uploader': 'Stephen - Topic',

1813

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1814

'artist': 'Stephen',

1815

'track': 'Voyeur Girl',

1816

'album': 'it\'s too much love to know my dear',

1817

'release_date': '20190313',

1818

'release_year': 2019,

1819

'alt_title': 'Voyeur Girl',

1820

'view_count': int,

1821

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1822

'playable_in_embed': True,

1823

'like_count': int,

1824

'categories': ['Music'],

1825

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1826

'channel': 'Stephen',

1827

'availability': 'public',

1828

'creator': 'Stephen',

1829

'duration': 169,

1830

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1831

'age_limit': 0,

1832

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1833

'tags': 'count:11',

1834

'live_status': 'not_live',

1835

'channel_follower_count': int

1836

},

1837

'params': {

1838

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1843

'only_matching': True,

1844

},

1845

{

1846

# invalid -> valid video id redirection

1847

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1852

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1853

'upload_date': '20090125',

1854

'uploader': 'Prochorowka',

1855

'uploader_id': 'Prochorowka',

1856

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1857

'artist': 'Panjabi MC',

1858

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1859

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1860

},

1861

'params': {

1862

'skip_download': True,

1863

},

1864

'skip': 'Video unavailable',

1865

},

1866

{

1867

# empty description results in an empty string

1868

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1875

'uploader_id': 'ElevageOrVert',

1876

'uploader': 'ElevageOrVert',

1877

'view_count': int,

1878

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1879

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1880

'like_count': int,

1881

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1882

'tags': [],

1883

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1884

'availability': 'public',

1885

'age_limit': 0,

1886

'categories': ['Pets & Animals'],

1887

'duration': 7,

1888

'playable_in_embed': True,

1889

'live_status': 'not_live',

1890

'channel': 'ElevageOrVert',

1891

'channel_follower_count': int

1892

},

1893

'params': {

1894

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1899

# see [2] for an example with '};' inside ytInitialPlayerResponse

1900

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1901

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1902

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1907

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1908

'upload_date': '20130831',

1909

'uploader_id': 'kudvenkat',

1910

'uploader': 'kudvenkat',

1911

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1912

'like_count': int,

1913

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1914

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1915

'live_status': 'not_live',

1916

'categories': ['Education'],

1917

'availability': 'public',

1918

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1919

'tags': 'count:12',

1920

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1925

'comment_count': int,

1926

'channel_follower_count': int

1927

},

1928

'params': {

1929

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1934

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1935

'only_matching': True,

1936

},

1937

{

1938

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1939

'only_matching': True,

1940

},

1941

{

1942

# https://github.com/ytdl-org/youtube-dl/pull/28094

1943

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1949

'upload_date': '20141120',

1950

'uploader': 'The Cinematic Orchestra - Topic',

1951

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1952

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1953

'artist': 'The Cinematic Orchestra',

1954

'track': 'Burn Out',

1955

'album': 'Every Day',

1956

'like_count': int,

1957

'live_status': 'not_live',

1958

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1963

'creator': 'The Cinematic Orchestra',

1964

'channel': 'The Cinematic Orchestra',

1965

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1966

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1967

'availability': 'public',

1968

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1969

'categories': ['Music'],

1970

'playable_in_embed': True,

1971

'channel_follower_count': int

1972

},

1973

'params': {

1974

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1979

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1980

'only_matching': True,

1981

},

1982

{

1983

# controversial video, requires bpctr/contentCheckOk

1984

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1989

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1990

'uploader': 'CBS Mornings',

1991

'uploader_id': 'CBSThisMorning',

1992

'upload_date': '20140716',

1993

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1994

'duration': 170,

1995

'categories': ['News & Politics'],

1996

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1997

'view_count': int,

1998

'channel': 'CBS Mornings',

1999

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2000

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2001

'age_limit': 18,

2002

'availability': 'needs_auth',

2003

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2004

'like_count': int,

2005

'live_status': 'not_live',

2006

'playable_in_embed': True,

2007

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2012

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2017

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2018

'upload_date': '20201120',

2019

'uploader': 'Walk around Japan',

2020

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2021

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2022

'duration': 1456,

2023

'categories': ['Travel & Events'],

2024

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2025

'view_count': int,

2026

'channel': 'Walk around Japan',

2027

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2028

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2029

'age_limit': 0,

2030

'availability': 'public',

2031

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2032

'live_status': 'not_live',

2033

'playable_in_embed': True,

2034

'channel_follower_count': int

2035

},

2036

'params': {

2037

'skip_download': True,

2038

},

2039

}, {

2040

# Has multiple audio streams

2041

'url': 'WaOKSUlf4TM',

2042

'only_matching': True

2043

}, {

2044

# Requires Premium: has format 141 when requested using YTM url

2045

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2046

'only_matching': True

2047

}, {

2048

# multiple subtitles with same lang_code

2049

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2050

'only_matching': True,

2051

}, {

2052

# Force use android client fallback

2053

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2054

'info_dict': {

2055

'id': 'YOelRv7fMxY',

2056

'title': 'DIGGING A SECRET TUNNEL Part 1',

2057

'ext': '3gp',

2058

'upload_date': '20210624',

2059

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2060

'uploader': 'colinfurze',

2061

'uploader_id': 'colinfurze',

2062

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2063

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2064

'duration': 596,

2065

'categories': ['Entertainment'],

2066

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2067

'view_count': int,

2068

'channel': 'colinfurze',

2069

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2070

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2071

'age_limit': 0,

2072

'availability': 'public',

2073

'like_count': int,

2074

'live_status': 'not_live',

2075

'playable_in_embed': True,

2076

'channel_follower_count': int

2077

},

2078

'params': {

2079

'format': '17', # 3gp format available on android

2080

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2085

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2086

'only_matching': True,

2087

'params': {

2088

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2093

'only_matching': True,

2094

}, {

2095

'note': 'Storyboards',

2096

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2102

'uploader_id': 'scishow',

2103

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2104

'upload_date': '20140324',

2105

'uploader': 'SciShow',

2106

'like_count': int,

2107

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2108

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2109

'view_count': int,

2110

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2111

'playable_in_embed': True,

2112

'tags': 'count:12',

2113

'uploader_url': 'http://www.youtube.com/user/scishow',

2114

'availability': 'public',

2115

'channel': 'SciShow',

2116

'live_status': 'not_live',

2117

'duration': 248,

2118

'categories': ['Education'],

2119

'age_limit': 0,

2120

'channel_follower_count': int

2121

}, 'params': {'format': 'mhtml', 'skip_download': True}

2122

}, {

2123

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2124

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2129

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2130

'uploader': 'Leon Nguyen',

2131

'uploader_id': 'VNSXIII',

2132

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2133

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2134

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2139

'tags': 'count:23',

2140

'playable_in_embed': True,

2141

'live_status': 'not_live',

2142

'upload_date': '20220103',

2143

'like_count': int,

2144

'availability': 'public',

2145

'channel': 'Leon Nguyen',

2146

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2147

'comment_count': int,

2148

'channel_follower_count': int

2149

}

2150

}, {

2151

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2152

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2157

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2158

'uploader': 'Quackity',

2159

'uploader_id': 'QuackityHQ',

2160

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2161

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2162

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2167

'tags': 'count:26',

2168

'playable_in_embed': True,

2169

'live_status': 'not_live',

2170

'release_timestamp': 1641172509,

2171

'release_date': '20220103',

2172

'upload_date': '20220103',

2173

'like_count': int,

2174

'availability': 'public',

2175

'channel': 'Quackity',

2176

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2177

'channel_follower_count': int

2178

}

2179

},

2180

{ # continuous livestream. Microformat upload date should be preferred.

2181

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2182

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2183

'info_dict': {

2184

'id': 'kgx4WGK0oNU',

2185

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2186

'ext': 'mp4',

2187

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2188

'availability': 'public',

2189

'age_limit': 0,

2190

'release_timestamp': 1637975704,

2191

'upload_date': '20210619',

2192

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2193

'live_status': 'is_live',

2194

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2195

'uploader': '阿鲍Abao',

2196

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2197

'channel': 'Abao in Tokyo',

2198

'channel_follower_count': int,

2199

'release_date': '20211127',

2200

'tags': 'count:39',

2201

'categories': ['People & Blogs'],

2202

'like_count': int,

2203

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2204

'view_count': int,

2205

'playable_in_embed': True,

2206

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2207

},

2208

'params': {'skip_download': True}

2209

}, {

2210

# Story. Requires specific player params to work.

2211

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2216

'view_count': int,

2217

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2218

'upload_date': '20220526',

2219

'categories': ['Education'],

2220

'title': 'Story',

2221

'channel': 'IT\'S HISTORY',

2222

'description': '',

2223

'uploader_id': 'BlastfromthePast',

2224

'duration': 12,

2225

'uploader': 'IT\'S HISTORY',

2226

'playable_in_embed': True,

2227

'age_limit': 0,

2228

'live_status': 'not_live',

2229

'tags': [],

2230

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2231

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2232

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2233

},

2234

'skip': 'stories get removed after some period of time',

2235

}, {

2236

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2241

'upload_date': '20220323',

2242

'like_count': int,

2243

'availability': 'unlisted',

2244

'channel': 'nao20010128nao',

2245

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2246

'age_limit': 0,

2247

'uploader': 'nao20010128nao',

2248

'uploader_id': 'nao20010128nao',

2249

'categories': ['Music'],

2250

'view_count': int,

2251

'description': '',

2252

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2253

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2254

'live_status': 'not_live',

2255

'playable_in_embed': True,

2256

'channel_follower_count': int,

2257

'duration': 6,

2258

'tags': [],

2259

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2260

}

2261

}, {

2262

'note': '6 channel audio',

2263

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2264

'only_matching': True,

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2270

{

2271

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2272

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2277

'upload_date': '20080526',

2278

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2279

'uploader': 'Christopher Sykes',

2280

'uploader_id': 'ChristopherJSykes',

2281

'age_limit': 0,

2282

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2283

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2284

'playable_in_embed': True,

2285

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2286

'like_count': int,

2287

'comment_count': int,

2288

'channel': 'Christopher Sykes',

2289

'live_status': 'not_live',

2290

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2291

'availability': 'public',

2292

'duration': 195,

2293

'view_count': int,

2294

'categories': ['Science & Technology'],

2295

'channel_follower_count': int,

2296

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2297

},

2298

'params': {

2299

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2306

from ..utils import parse_qs

2307

2308

qs = parse_qs(url)

2309

if qs.get('list', [None])[0]:

2310

return False

2311

return super().suitable(url)

2312

2313

def __init__(self, *args, **kwargs):

2314

super().__init__(*args, **kwargs)

2315

self._code_cache = {}

2316

self._player_cache = {}

2317

2318

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2319

lock = threading.Lock()

2320

2321

is_live = True

2322

start_time = time.time()

2323

formats = [f for f in formats if f.get('is_from_start')]

2324

2325

def refetch_manifest(format_id, delay):

2326

nonlocal formats, start_time, is_live

2327

if time.time() <= start_time + delay:

2328

return

2329

2330

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2331

video_details = traverse_obj(

2332

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2333

microformats = traverse_obj(

2334

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2335

expected_type=dict, default=[])

2336

_, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2337

start_time = time.time()

2338

2339

def mpd_feed(format_id, delay):

2340

"""

2341

@returns (manifest_url, manifest_stream_number, is_live) or None

2342

"""

2343

with lock:

2344

refetch_manifest(format_id, delay)

2345

2346

f = next((f for f in formats if f['format_id'] == format_id), None)

2347

if not f:

2348

if not is_live:

2349

self.to_screen(f'{video_id}: Video is no longer live')

2350

else:

2351

self.report_warning(

2352

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2353

return None

2354

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2359

f['fragments'] = functools.partial(

2360

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2361

2362

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2363

FETCH_SPAN, MAX_DURATION = 5, 432000

2364

2365

mpd_url, stream_number, is_live = None, None, True

2366

2367

begin_index = 0

2368

download_start_time = ctx.get('start') or time.time()

2369

2370

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2371

if lack_early_segments:

2372

self.report_warning(bug_reports_message(

2373

'Starting download from the last 120 hours of the live stream since '

2374

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2375

lack_early_segments = True

2376

2377

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2378

fragments, fragment_base_url = None, None

2379

2380

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2381

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2382

# Obtain from MPD's maximum seq value

2383

old_mpd_url = mpd_url

2384

last_error = ctx.pop('last_error', None)

2385

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2386

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2387

or (mpd_url, stream_number, False))

2388

if not refresh_sequence:

2389

if expire_fast and not is_live:

2390

return False, last_seq

2391

elif old_mpd_url == mpd_url:

2392

return True, last_seq

2393

try:

2394

fmts, _ = self._extract_mpd_formats_and_subtitles(

2395

mpd_url, None, note=False, errnote=False, fatal=False)

2396

except ExtractorError:

2397

fmts = None

2398

if not fmts:

2399

no_fragment_score += 2

2400

return False, last_seq

2401

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2402

fragments = fmt_info['fragments']

2403

fragment_base_url = fmt_info['fragment_base_url']

2404

assert fragment_base_url

2405

2406

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2407

return True, _last_seq

2408

2409

while is_live:

2410

fetch_time = time.time()

2411

if no_fragment_score > 30:

2412

return

2413

if last_segment_url:

2414

# Obtain from "X-Head-Seqnum" header value from each segment

2415

try:

2416

urlh = self._request_webpage(

2417

last_segment_url, None, note=False, errnote=False, fatal=False)

2418

except ExtractorError:

2419

urlh = None

2420

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2421

if last_seq is None:

2422

no_fragment_score += 2

2423

last_segment_url = None

2424

continue

2425

else:

2426

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2427

no_fragment_score += 2

2428

if not should_continue:

2429

continue

2430

2431

if known_idx > last_seq:

2432

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2438

# skip from the start when it's negative value

2439

known_idx = last_seq + begin_index

2440

if lack_early_segments:

2441

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2442

try:

2443

for idx in range(known_idx, last_seq):

2444

# do not update sequence here or you'll get skipped some part of it

2445

should_continue, _ = _extract_sequence_from_mpd(False, False)

2446

if not should_continue:

2447

known_idx = idx - 1

2448

raise ExtractorError('breaking out of outer loop')

2449

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2450

yield {

2451

'url': last_segment_url,

2452

'fragment_count': last_seq,

2453

}

2454

if known_idx == last_seq:

2455

no_fragment_score += 5

2456

else:

2457

no_fragment_score = 0

2458

known_idx = last_seq

2459

except ExtractorError:

2460

continue

2461

2462

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2463

2464

def _extract_player_url(self, *ytcfgs, webpage=None):

2465

player_url = traverse_obj(

2466

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2467

get_all=False, expected_type=str)

2468

if not player_url:

2469

return

2470

return urljoin('https://www.youtube.com', player_url)

2471

2472

def _download_player_url(self, video_id, fatal=False):

2473

res = self._download_webpage(

2474

'https://www.youtube.com/iframe_api',

2475

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2476

if res:

2477

player_version = self._search_regex(

2478

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2479

if player_version:

2480

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2481

2482

def _signature_cache_id(self, example_sig):

2483

""" Return a string representation of a signature """

2484

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2485

2486

@classmethod

2487

def _extract_player_info(cls, player_url):

2488

for player_re in cls._PLAYER_INFO_RE:

2489

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2494

return id_m.group('id')

2495

2496

def _load_player(self, video_id, player_url, fatal=True):

2497

player_id = self._extract_player_info(player_url)

2498

if player_id not in self._code_cache:

2499

code = self._download_webpage(

2500

player_url, video_id, fatal=fatal,

2501

note='Downloading player ' + player_id,

2502

errnote='Download of %s failed' % player_url)

2503

if code:

2504

self._code_cache[player_id] = code

2505

return self._code_cache.get(player_id)

2506

2507

def _extract_signature_function(self, video_id, player_url, example_sig):

2508

player_id = self._extract_player_info(player_url)

2509

2510

# Read from filesystem cache

2511

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2512

assert os.path.basename(func_id) == func_id

2513

2514

self.write_debug(f'Extracting signature function {func_id}')

2515

cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None

2516

2517

if not cache_spec:

2518

code = self._load_player(video_id, player_url)

2519

if code:

2520

res = self._parse_sig_js(code)

2521

test_string = ''.join(map(chr, range(len(example_sig))))

2522

cache_spec = [ord(c) for c in res(test_string)]

2523

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2524

2525

return lambda s: ''.join(s[i] for i in cache_spec)

2526

2527

def _print_sig_code(self, func, example_sig):

2528

if not self.get_param('youtube_print_sig_code'):

2529

return

2530

2531

def gen_sig_code(idxs):

2532

def _genslice(start, end, step):

2533

starts = '' if start == 0 else str(start)

2534

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2535

steps = '' if step == 1 else (':%d' % step)

2536

return f's[{starts}{ends}{steps}]'

2537

2538

step = None

2539

# Quelch pyflakes warnings - start will be set when step is set

2540

start = '(Never used)'

2541

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2546

step = None

2547

continue

2548

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2558

2559

test_string = ''.join(map(chr, range(len(example_sig))))

2560

cache_res = func(test_string)

2561

cache_spec = [ord(c) for c in cache_res]

2562

expr_code = ' + '.join(gen_sig_code(cache_spec))

2563

signature_id_tuple = '(%s)' % (

2564

', '.join(str(len(p)) for p in example_sig.split('.')))

2565

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2566

' return %s\n') % (signature_id_tuple, expr_code)

2567

self.to_screen('Extracted signature function:\n' + code)

2568

2569

def _parse_sig_js(self, jscode):

2570

funcname = self._search_regex(

2571

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2572

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2573

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2574

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2575

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2576

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2577

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2578

# Obsolete patterns

2579

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2580

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2581

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2582

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2583

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2584

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2585

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2586

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2587

jscode, 'Initial JS player signature function name', group='sig')

2588

2589

jsi = JSInterpreter(jscode)

2590

initial_function = jsi.extract_function(funcname)

2591

return lambda s: initial_function([s])

2592

2593

def _cached(self, func, *cache_id):

2594

def inner(*args, **kwargs):

2595

if cache_id not in self._player_cache:

2596

try:

2597

self._player_cache[cache_id] = func(*args, **kwargs)

2598

except ExtractorError as e:

2599

self._player_cache[cache_id] = e

2600

except Exception as e:

2601

self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)

2602

2603

ret = self._player_cache[cache_id]

2604

if isinstance(ret, Exception):

raise ret

return ret

return inner

def _decrypt_signature(self, s, video_id, player_url):

2610

"""Turn the encrypted s field into a working signature"""

2611

extract_sig = self._cached(

2612

self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))

2613

func = extract_sig(video_id, player_url, s)

2614

self._print_sig_code(func, s)

2615

return func(s)

2616

2617

def _decrypt_nsig(self, s, video_id, player_url):

2618

"""Turn the encrypted n field into a working signature"""

2619

if player_url is None:

2620

raise ExtractorError('Cannot decrypt nsig without player_url')

2621

player_url = urljoin('https://www.youtube.com', player_url)

2622

2623

jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)

2624

if self.get_param('youtube_print_sig_code'):

2625

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2626

2627

extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)

2628

ret = extract_nsig(jsi, func_code)(s)

2629

2630

self.write_debug(f'Decrypted nsig {s} => {ret}')

2631

return ret

2632

2633

def _extract_n_function_code(self, video_id, player_url):

2634

player_id = self._extract_player_info(player_url)

2635

func_code = self.cache.load('youtube-nsig', player_id)

2636

jscode = func_code or self._load_player(video_id, player_url)

2637

jsi = JSInterpreter(jscode)

2638

2639

if func_code:

2640

return jsi, player_id, func_code

2641

2642

funcname, idx = self._search_regex(

2643

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2644

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2645

if idx:

2646

funcname = json.loads(js_to_json(self._search_regex(

2647

rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,

2648

f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]

2649

2650

func_code = jsi.extract_function_code(funcname)

2651

self.cache.store('youtube-nsig', player_id, func_code)

2652

return jsi, player_id, func_code

2653

2654

def _extract_n_function_from_code(self, jsi, func_code):

2655

func = jsi.extract_function_from_code(*func_code)

def extract_nsig(s):

ret = func([s])

if ret.startswith('enhanced_except_'):

2660

raise ExtractorError('Signature function returned an exception')

return ret

return extract_nsig

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2666

"""

2667

Extract signatureTimestamp (sts)

2668

Required to tell API what sig/player version is in use.

2669

"""

2670

sts = None

2671

if isinstance(ytcfg, dict):

2672

sts = int_or_none(ytcfg.get('STS'))

2673

2674

if not sts:

2675

# Attempt to extract from player

2676

if player_url is None:

2677

error_msg = 'Cannot extract signature timestamp without player_url.'

2678

if fatal:

2679

raise ExtractorError(error_msg)

2680

self.report_warning(error_msg)

2681

return

2682

code = self._load_player(video_id, player_url, fatal=fatal)

2683

if code:

2684

sts = int_or_none(self._search_regex(

2685

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2686

'JS player signature timestamp', group='sts', fatal=fatal))

2687

return sts

2688

2689

def _mark_watched(self, video_id, player_responses):

2690

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2691

label = 'fully ' if is_full else ''

2692

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2693

expected_type=url_or_none)

2694

if not url:

2695

self.report_warning(f'Unable to mark {label}watched')

2696

return

2697

parsed_url = urllib.parse.urlparse(url)

2698

qs = urllib.parse.parse_qs(parsed_url.query)

2699

2700

# cpn generation algorithm is reverse engineered from base.js.

2701

# In fact it works even with dummy cpn.

2702

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2703

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2704

2705

# # more consistent results setting it to right before the end

2706

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2717

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2724

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2725

2726

self._download_webpage(

2727

url, video_id, f'Marking {label}watched',

2728

'Unable to mark watched', fatal=False)

2729

2730

@classmethod

2731

def _extract_from_webpage(cls, url, webpage):

2732

# Invidious Instances

2733

# https://github.com/yt-dlp/yt-dlp/issues/195

2734

# https://github.com/iv-org/invidious/pull/1730

2735

mobj = re.search(

2736

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

2737

webpage)

2738

if mobj:

2739

yield cls.url_result(mobj.group('url'), cls)

2740

raise cls.StopExtraction()

2741

2742

yield from super()._extract_from_webpage(url, webpage)

2743

2744

# lazyYT YouTube embed

2745

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

2746

yield cls.url_result(unescapeHTML(id_), cls, id_)

2747

2748

# Wordpress "YouTube Video Importer" plugin

2749

for m in re.findall(r'''(?x)<div[^>]+

2750

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2751

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

2752

yield cls.url_result(m[-1], cls, m[-1])

2753

2754

@classmethod

2755

def extract_id(cls, url):

2756

video_id = cls.get_temp_id(url)

2757

if not video_id:

2758

raise ExtractorError(f'Invalid URL: {url}')

2759

return video_id

2760

2761

def _extract_chapters_from_json(self, data, duration):

2762

chapter_list = traverse_obj(

2763

data, (

2764

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2765

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2766

), expected_type=list)

2767

2768

return self._extract_chapters(

2769

chapter_list,

2770

chapter_time=lambda chapter: float_or_none(

2771

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2772

chapter_title=lambda chapter: traverse_obj(

2773

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2774

duration=duration)

2775

2776

def _extract_chapters_from_engagement_panel(self, data, duration):

2777

content_list = traverse_obj(

2778

data,

2779

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2780

expected_type=list, default=[])

2781

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2782

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2783

2784

return next(filter(None, (

2785

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2786

chapter_time, chapter_title, duration)

2787

for contents in content_list)), [])

2788

2789

def _extract_chapters_from_description(self, description, duration):

2790

return self._extract_chapters(

2791

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2792

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2793

duration=duration, strict=False)

2794

2795

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2800

'title': chapter_title(chapter),

2801

} for chapter in chapter_list or []]

2802

if not strict:

2803

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2804

2805

chapters = [{'start_time': 0}]

2806

for idx, chapter in enumerate(chapter_list):

2807

if chapter['start_time'] is None:

2808

self.report_warning(f'Incomplete chapter {idx}')

2809

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2810

chapters.append(chapter)

2811

else:

2812

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2813

return chapters[1:]

2814

2815

def _extract_comment(self, comment_renderer, parent=None):

2816

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2821

2822

# note: timestamp is an estimate calculated from the current time and time_text

2823

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2824

author = self._get_text(comment_renderer, 'authorText')

2825

author_id = try_get(comment_renderer,

2826

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

2827

2828

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2829

lambda x: x['likeCount']), str)) or 0

2830

author_thumbnail = try_get(comment_renderer,

2831

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

2832

2833

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2834

is_favorited = 'creatorHeart' in (try_get(

2835

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2840

'time_text': time_text,

2841

'like_count': votes,

2842

'is_favorited': is_favorited,

2843

'author': author,

2844

'author_id': author_id,

2845

'author_thumbnail': author_thumbnail,

2846

'author_is_uploader': author_is_uploader,

2847

'parent': parent or 'root'

2848

}

2849

2850

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2851

2852

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2853

2854

def extract_header(contents):

2855

_continuation = None

2856

for content in contents:

2857

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2858

expected_comment_count = self._get_count(

2859

comments_header_renderer, 'countText', 'commentsCount')

2860

2861

if expected_comment_count:

2862

tracker['est_total'] = expected_comment_count

2863

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2864

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2865

2866

sort_menu_item = try_get(

2867

comments_header_renderer,

2868

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2869

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2870

2871

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2872

if not _continuation:

2873

continue

2874

2875

sort_text = str_or_none(sort_menu_item.get('title'))

2876

if not sort_text:

2877

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2878

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2883

if not parent:

2884

tracker['current_page_thread'] = 0

2885

for content in contents:

2886

if not parent and tracker['total_parent_comments'] >= max_parents:

2887

yield

2888

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2889

comment_renderer = get_first(

2890

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2891

expected_type=dict, default={})

2892

2893

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2898

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2899

yield comment

2900

2901

# Attempt to get the replies

2902

comment_replies_renderer = try_get(

2903

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2904

2905

if comment_replies_renderer:

2906

tracker['current_page_thread'] += 1

2907

comment_entries_iter = self._comment_entries(

2908

comment_replies_renderer, ytcfg, video_id,

2909

parent=comment.get('id'), tracker=tracker)

2910

yield from itertools.islice(comment_entries_iter, min(

2911

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2912

2913

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2919

total_parent_comments=0,

2920

total_reply_comments=0)

2921

2922

# TODO: Deprecated

2923

# YouTube comments have a max depth of 2

2924

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2925

if max_depth:

2926

self._downloader.deprecation_warning(

2927

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2928

if max_depth == 1 and parent:

2929

return

2930

2931

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2932

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2933

2934

continuation = self._extract_continuation(root_continuation_data)

2935

2936

response = None

2937

is_forced_continuation = False

2938

is_first_continuation = parent is None

2939

if is_first_continuation and not continuation:

2940

# Sometimes you can get comments by generating the continuation yourself,

2941

# even if YouTube initially reports them being disabled - e.g. stories comments.

2942

# Note: if the comment section is actually disabled, YouTube may return a response with

2943

# required check_get_keys missing. So we will disable that check initially in this case.

2944

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2945

is_forced_continuation = True

2946

2947

for page_num in itertools.count(0):

2948

if not continuation:

2949

break

2950

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2951

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2952

if page_num == 0:

2953

if is_first_continuation:

2954

note_prefix = 'Downloading comment section API JSON'

2955

else:

2956

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2957

tracker['current_page_thread'], comment_prog_str)

2958

else:

2959

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2960

' ' if parent else '', ' replies' if parent else '',

2961

page_num, comment_prog_str)

2962

2963

response = self._extract_response(

2964

item_id=None, query=continuation,

2965

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2966

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2967

is_forced_continuation = False

2968

continuation_contents = traverse_obj(

2969

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2970

2971

continuation = None

2972

for continuation_section in continuation_contents:

2973

continuation_items = traverse_obj(

2974

continuation_section,

2975

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2976

get_all=False, expected_type=list) or []

2977

if is_first_continuation:

2978

continuation = extract_header(continuation_items)

2979

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2993

if message and not parent and tracker['running_total'] == 0:

2994

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2995

2996

@staticmethod

2997

def _generate_comment_continuation(video_id):

2998

"""

2999

Generates initial comment section continuation token from given video id

3000

"""

3001

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3002

return base64.b64encode(token.encode()).decode()

3003

3004

def _get_comments(self, ytcfg, video_id, contents, webpage):

3005

"""Entry for comment extraction"""

3006

def _real_comment_extract(contents):

3007

renderer = next((

3008

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3009

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3010

yield from self._comment_entries(renderer, ytcfg, video_id)

3011

3012

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3013

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3014

3015

@staticmethod

3016

def _get_checkok_params():

3017

return {'contentCheckOk': True, 'racyCheckOk': True}

3018

3019

@classmethod

3020

def _generate_player_context(cls, sts=None):

3021

context = {

3022

'html5Preference': 'HTML5_PREF_WANTS',

3023

}

3024

if sts is not None:

3025

context['signatureTimestamp'] = sts

3026

return {

3027

'playbackContext': {

3028

'contentPlaybackContext': context

3029

},

3030

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3035

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3036

return True

3037

3038

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3039

AGE_GATE_REASONS = (

3040

'confirm your age', 'age-restricted', 'inappropriate', # reason

3041

'age_verification_required', 'age_check_required', # status

3042

)

3043

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3044

3045

@staticmethod

3046

def _is_unplayable(player_response):

3047

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3048

3049

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3050

3051

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3052

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3053

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3054

headers = self.generate_api_headers(

3055

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3060

}

3061

yt_query.update(self._generate_player_context(sts))

3062

return self._extract_response(

3063

item_id=video_id, ep='player', query=yt_query,

3064

ytcfg=player_ytcfg, headers=headers, fatal=True,

3065

default_client=client,

3066

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3067

) or None

3068

3069

def _get_requested_clients(self, url, smuggled_data):

3070

requested_clients = []

3071

default = ['android', 'web']

3072

allowed_clients = sorted(

3073

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3074

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3075

for client in self._configuration_arg('player_client'):

3076

if client in allowed_clients:

3077

requested_clients.append(client)

3078

elif client == 'default':

3079

requested_clients.extend(default)

3080

elif client == 'all':

3081

requested_clients.extend(allowed_clients)

3082

else:

3083

self.report_warning(f'Skipping unsupported client {client}')

3084

if not requested_clients:

3085

requested_clients = default

3086

3087

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3088

requested_clients.extend(

3089

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3090

3091

return orderedSet(requested_clients)

3092

3093

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3094

initial_pr = None

3095

if webpage:

3096

initial_pr = self._search_json(

3097

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3098

3099

all_clients = set(clients)

3100

clients = clients[::-1]

3101

prs = []

3102

3103

def append_client(*client_names):

3104

""" Append the first client name that exists but not already used """

3105

for client_name in client_names:

3106

actual_client = _split_innertube_client(client_name)[0]

3107

if actual_client in INNERTUBE_CLIENTS:

3108

if actual_client not in all_clients:

3109

clients.append(client_name)

3110

all_clients.add(actual_client)

3111

return

3112

3113

# Android player_response does not have microFormats which are needed for

3114

# extraction of some data. So we return the initial_pr with formats

3115

# stripped out even if not requested by the user

3116

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3117

if initial_pr:

3118

pr = dict(initial_pr)

3119

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3124

player_url = None

3125

while clients:

3126

client, base_client, variant = _split_innertube_client(clients.pop())

3127

player_ytcfg = master_ytcfg if client == 'web' else {}

3128

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3129

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3130

3131

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3132

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3133

if 'js' in self._configuration_arg('player_skip'):

3134

require_js_player = False

3135

player_url = None

3136

3137

if not player_url and not tried_iframe_fallback and require_js_player:

3138

player_url = self._download_player_url(video_id)

3139

tried_iframe_fallback = True

3140

3141

try:

3142

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3143

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3144

except ExtractorError as e:

3145

if last_error:

3146

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3152

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3153

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3154

if pr_video_id and pr_video_id != video_id:

3155

self.report_warning(

3156

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3161

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3162

append_client(f'{base_client}_creator')

3163

elif self._is_agegated(pr):

3164

if variant == 'tv_embedded':

3165

append_client(f'{base_client}_embedded')

3166

elif not variant:

3167

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3173

return prs, player_url

3174

3175

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):

3176

itags, stream_ids = {}, []

3177

itag_qualities, res_qualities = {}, {0: -1}

3178

q = qualities([

3179

# Normally tiny is the smallest video-only formats. But

3180

# audio-only formats with unknown quality may get tagged as tiny

3181

'tiny',

3182

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3183

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3184

])

3185

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3186

3187

for fmt in streaming_formats:

3188

if fmt.get('targetDurationSec'):

3189

continue

3190

3191

itag = str_or_none(fmt.get('itag'))

3192

audio_track = fmt.get('audioTrack') or {}

3193

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3194

if stream_id in stream_ids:

3195

continue

3196

3197

quality = fmt.get('quality')

3198

height = int_or_none(fmt.get('height'))

3199

if quality == 'tiny' or not quality:

3200

quality = fmt.get('audioQuality', '').lower() or quality

3201

# The 3gp format (17) in android client has a quality of "small",

3202

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3208

if height:

3209

res_qualities[height] = quality

3210

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3211

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3212

# number of fragment that would subsequently requested with (`&sq=N`)

3213

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3214

continue

3215

3216

fmt_url = fmt.get('url')

3217

if not fmt_url:

3218

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3219

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3220

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3221

if not all((sc, fmt_url, player_url, encrypted_sig)):

3222

continue

3223

try:

3224

fmt_url += '&%s=%s' % (

3225

traverse_obj(sc, ('sp', -1)) or 'signature',

3226

self._decrypt_signature(encrypted_sig, video_id, player_url)

3227

)

3228

except ExtractorError as e:

3229

self.report_warning('Signature extraction failed: Some formats may be missing',

3230

video_id=video_id, only_once=True)

3231

self.write_debug(e, only_once=True)

3232

continue

3233

3234

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])

3239

fmt_url = update_url_query(fmt_url, {

3240

'n': decrypt_nsig(query['n'][0], video_id, player_url)

3241

})

3242

except ExtractorError as e:

3243

self.report_warning(

3244

'nsig extraction failed: You may experience throttling for some formats\n'

3245

f'n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)

3246

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3251

stream_ids.append(stream_id)

3252

3253

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3254

language_preference = (

3255

10 if audio_track.get('audioIsDefault') and 10

3256

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3257

else -1)

3258

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3259

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3260

# Make sure to avoid false positives with small duration differences.

3261

# E.g. __2ABJjxzNo, ySuUZEjARPY

3262

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3263

if is_damaged:

3264

self.report_warning(

3265

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3266

dct = {

3267

'asr': int_or_none(fmt.get('audioSampleRate')),

3268

'filesize': int_or_none(fmt.get('contentLength')),

3269

'format_id': itag,

3270

'format_note': join_nonempty(

3271

'%s%s' % (audio_track.get('displayName') or '',

3272

' (default)' if language_preference > 0 else ''),

3273

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3274

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3275

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3276

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3277

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3278

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3279

'fps': int_or_none(fmt.get('fps')) or None,

3280

'audio_channels': fmt.get('audioChannels'),

3281

'height': height,

3282

'quality': q(quality),

3283

'has_drm': bool(fmt.get('drmFamilies')),

3284

'tbr': tbr,

3285

'url': fmt_url,

3286

'width': int_or_none(fmt.get('width')),

3287

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3288

'desc' if language_preference < -1 else ''),

3289

'language_preference': language_preference,

3290

# Strictly de-prioritize damaged and 3gp formats

3291

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3292

}

3293

mime_mobj = re.match(

3294

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3295

if mime_mobj:

3296

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3297

dct.update(parse_codecs(mime_mobj.group(2)))

3298

no_audio = dct.get('acodec') == 'none'

3299

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3305

dct['downloader_options'] = {

3306

# Youtube throttles chunks >~10M

3307

'http_chunk_size': 10485760,

3308

}

3309

if dct.get('ext'):

3310

dct['container'] = dct['ext'] + '_dash'

3311

yield dct

3312

3313

live_from_start = is_live and self.get_param('live_from_start')

3314

skip_manifests = self._configuration_arg('skip')

3315

if not self.get_param('youtube_include_hls_manifest', True):

3316

skip_manifests.append('hls')

3317

if not self.get_param('youtube_include_dash_manifest', True):

3318

skip_manifests.append('dash')

3319

get_dash = 'dash' not in skip_manifests and (

3320

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3321

get_hls = not live_from_start and 'hls' not in skip_manifests

3322

3323

def process_manifest_format(f, proto, itag):

3324

if itag in itags:

3325

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3326

return False

3327

itag = f'{itag}-{proto}'

3328

if itag:

3329

f['format_id'] = itag

3330

itags[itag] = proto

3331

3332

f['quality'] = itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)

3333

if f['quality'] == -1 and f.get('height'):

3334

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3339

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3340

if hls_manifest_url:

3341

fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)

3342

subtitles = self._merge_subtitles(subs, subtitles)

3343

for f in fmts:

3344

if process_manifest_format(f, 'hls', self._search_regex(

3345

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3346

yield f

3347

3348

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3349

if dash_manifest_url:

3350

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3351

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3352

for f in formats:

3353

if process_manifest_format(f, 'dash', f['format_id']):

3354

f['filesize'] = int_or_none(self._search_regex(

3355

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3356

if live_from_start:

3357

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3363

spec = get_first(

3364

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3365

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3370

args = args.split('#')

3371

counts = list(map(int_or_none, args[:5]))

3372

if len(args) != 8 or not all(counts):

3373

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3374

continue

3375

width, height, frame_count, cols, rows = counts

3376

N, sigh = args[6:]

3377

3378

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3379

fragment_count = frame_count / (cols * rows)

3380

fragment_duration = duration / fragment_count

3381

yield {

3382

'format_id': f'sb{i}',

3383

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3396

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3397

} for j in range(math.ceil(fragment_count))],

3398

}

3399

3400

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3401

webpage = None

3402

if 'webpage' not in self._configuration_arg('player_skip'):

3403

webpage = self._download_webpage(

3404

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3405

3406

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3407

3408

player_responses, player_url = self._extract_player_responses(

3409

self._get_requested_clients(url, smuggled_data),

3410

video_id, webpage, master_ytcfg)

3411

3412

return webpage, master_ytcfg, player_responses, player_url

3413

3414

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3415

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3416

is_live = get_first(video_details, 'isLive')

3417

if is_live is None:

3418

is_live = get_first(live_broadcast_details, 'isLiveNow')

3419

3420

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3421

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)

3422

3423

return live_broadcast_details, is_live, streaming_data, formats, subtitles

3424

3425

def _real_extract(self, url):

3426

url, smuggled_data = unsmuggle_url(url, {})

3427

video_id = self._match_id(url)

3428

3429

base_url = self.http_scheme() + '//www.youtube.com/'

3430

webpage_url = base_url + 'watch?v=' + video_id

3431

3432

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3433

3434

playability_statuses = traverse_obj(

3435

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3436

3437

trailer_video_id = get_first(

3438

playability_statuses,

3439

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3440

expected_type=str)

3441

if trailer_video_id:

3442

return self.url_result(

3443

trailer_video_id, self.ie_key(), trailer_video_id)

3444

3445

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3446

if webpage else (lambda x: None))

3447

3448

video_details = traverse_obj(

3449

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3450

microformats = traverse_obj(

3451

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3452

expected_type=dict, default=[])

3453

video_title = (

3454

get_first(video_details, 'title')

3455

or self._get_text(microformats, (..., 'title'))

3456

or search_meta(['og:title', 'twitter:title', 'title']))

3457

video_description = get_first(video_details, 'shortDescription')

3458

3459

multifeed_metadata_list = get_first(

3460

player_responses,

3461

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3462

expected_type=str)

3463

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3464

if self.get_param('noplaylist'):

3465

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3470

# Unquote should take place before split on comma (,) since textual

3471

# fields may contain comma as well (see

3472

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3473

feed_data = urllib.parse.parse_qs(

3474

urllib.parse.unquote_plus(feed))

3475

3476

def feed_entry(name):

3477

return try_get(

3478

feed_data, lambda x: x[name][0], str)

3479

3480

feed_id = feed_entry('id')

3481

if not feed_id:

3482

continue

3483

feed_title = feed_entry('title')

3484

title = video_title

3485

if feed_title:

3486

title += ' (%s)' % feed_title

3487

entries.append({

3488

'_type': 'url_transparent',

3489

'ie_key': 'Youtube',

3490

'url': smuggle_url(

3491

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3492

{'force_singlefeed': True}),

3493

'title': title,

3494

})

3495

feed_ids.append(feed_id)

3496

self.to_screen(

3497

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3498

% (', '.join(feed_ids), video_id))

3499

return self.playlist_result(

3500

entries, video_id, video_title, video_description)

3501

3502

duration = int_or_none(

3503

get_first(video_details, 'lengthSeconds')

3504

or get_first(microformats, 'lengthSeconds')

3505

or parse_duration(search_meta('duration'))) or None

3506

3507

live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \

3508

self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3509

3510

if not formats:

3511

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3512

self.report_drm(video_id)

3513

pemr = get_first(

3514

playability_statuses,

3515

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3516

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3517

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3518

if subreason:

3519

if subreason == 'The uploader has not made this video available in your country.':

3520

countries = get_first(microformats, 'availableCountries')

3521

if not countries:

3522

regions_allowed = search_meta('regionsAllowed')

3523

countries = regions_allowed.split(',') if regions_allowed else None

3524

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3525

reason += f'. {subreason}'

3526

if reason:

3527

self.raise_no_formats(reason, expected=True)

3528

3529

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3530

if not keywords and webpage:

3531

keywords = [

3532

unescapeHTML(m.group('content'))

3533

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3534

for keyword in keywords:

3535

if keyword.startswith('yt:stretch='):

3536

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3537

if mobj:

3538

# NB: float is intentional for forcing float division

3539

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3544

f['stretched_ratio'] = ratio

3545

break

3546

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3547

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3548

if thumbnail_url:

3549

thumbnails.append({

3550

'url': thumbnail_url,

3551

})

3552

original_thumbnails = thumbnails.copy()

3553

3554

# The best resolution thumbnails sometimes does not appear in the webpage

3555

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3556

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3557

thumbnail_names = [

3558

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3559

# in resolution, these are not the custom thumbnail. So de-prioritize them

3560

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3561

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3562

]

3563

n_thumbnail_names = len(thumbnail_names)

3564

thumbnails.extend({

3565

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3566

video_id=video_id, name=name, ext=ext,

3567

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3568

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3569

for thumb in thumbnails:

3570

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3571

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3572

self._remove_duplicate_formats(thumbnails)

3573

self._downloader._sort_thumbnails(original_thumbnails)

3574

3575

category = get_first(microformats, 'category') or search_meta('genre')

3576

channel_id = str_or_none(

3577

get_first(video_details, 'channelId')

3578

or get_first(microformats, 'externalChannelId')

3579

or search_meta('channelId'))

3580

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3581

3582

live_content = get_first(video_details, 'isLiveContent')

3583

is_upcoming = get_first(video_details, 'isUpcoming')

3584

if is_live is None:

3585

if is_upcoming or live_content is False:

3586

is_live = False

3587

if is_upcoming is None and (live_content or is_live):

3588

is_upcoming = False

3589

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3590

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3591

if not duration and live_end_time and live_start_time:

3592

duration = live_end_time - live_start_time

3593

3594

if is_live and self.get_param('live_from_start'):

3595

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3596

3597

formats.extend(self._extract_storyboard(player_responses, duration))

3598

3599

# source_preference is lower for throttled/potentially damaged formats

3600

self._sort_formats(formats, (

3601

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3606

'formats': formats,

3607

'thumbnails': thumbnails,

3608

# The best thumbnail that we are sure exists. Prevents unnecessary

3609

# URL checking if user don't care about getting the best possible thumbnail

3610

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3611

'description': video_description,

3612

'uploader': get_first(video_details, 'author'),

3613

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3614

'uploader_url': owner_profile_url,

3615

'channel_id': channel_id,

3616

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3617

'duration': duration,

3618

'view_count': int_or_none(

3619

get_first((video_details, microformats), (..., 'viewCount'))

3620

or search_meta('interactionCount')),

3621

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3622

'age_limit': 18 if (

3623

get_first(microformats, 'isFamilySafe') is False

3624

or search_meta('isFamilyFriendly') == 'false'

3625

or search_meta('og:restrictions:age') == '18+') else 0,

3626

'webpage_url': webpage_url,

3627

'categories': [category] if category else None,

3628

'tags': keywords,

3629

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3630

'is_live': is_live,

3631

'was_live': (False if is_live or is_upcoming or live_content is False

3632

else None if is_live is None or is_upcoming is None

3633

else live_content),

3634

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3635

'release_timestamp': live_start_time,

3636

}

3637

3638

if get_first(video_details, 'isPostLiveDvr'):

3639

self.write_debug('Video is in Post-Live Manifestless mode')

3640

info['live_status'] = 'post_live'

3641

if (duration or 0) > 4 * 3600:

3642

self.report_warning(

3643

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3644

'This is a known issue and patches are welcome')

3645

3646

subtitles = {}

3647

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3648

if pctr:

3649

def get_lang_code(track):

3650

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3651

or track.get('languageCode'))

3652

3653

# Converted into dicts to remove duplicates

3654

captions = {

3655

get_lang_code(sub): sub

3656

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3657

translation_languages = {

3658

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3659

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3660

3661

def process_language(container, base_url, lang_code, sub_name, query):

3662

lang_subs = container.setdefault(lang_code, [])

3663

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

3674

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

3675

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

3676

for lang_code, caption_track in captions.items():

3677

base_url = caption_track.get('baseUrl')

3678

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3679

if not base_url:

3680

continue

3681

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3682

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3687

if not caption_track.get('isTranslatable'):

3688

continue

3689

for trans_code, trans_name in translation_languages.items():

3690

if not trans_code:

3691

continue

3692

orig_trans_code = trans_code

3693

if caption_track.get('kind') != 'asr':

3694

if not get_translated_subs:

3695

continue

3696

trans_code += f'-{lang_code}'

3697

trans_name += format_field(lang_name, None, ' from %s')

3698

# Add an "-orig" label to the original language so that it can be distinguished.

3699

# The subs are returned without "-orig" as well for compatibility

3700

if lang_code == f'a-{orig_trans_code}':

3701

process_language(

3702

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3703

# Setting tlang=lang returns damaged subtitles.

3704

process_language(automatic_captions, base_url, trans_code, trans_name,

3705

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3706

3707

info['automatic_captions'] = automatic_captions

3708

info['subtitles'] = subtitles

3709

3710

parsed_url = urllib.parse.urlparse(url)

3711

for component in [parsed_url.fragment, parsed_url.query]:

3712

query = urllib.parse.parse_qs(component)

3713

for k, v in query.items():

3714

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3715

d_k += '_time'

3716

if d_k not in info and k in s_ks:

3717

info[d_k] = parse_duration(query[k][0])

3718

3719

# Youtube Music Auto-generated description

3720

if video_description:

3721

mobj = re.search(

3722

r'''(?xs)

3723

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3724

(?P<album>[^\n]+)

3725

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3726

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3727

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3728

.+\nAuto-generated\ by\ YouTube\.\s*$

3729

''', video_description)

3730

if mobj:

3731

release_year = mobj.group('release_year')

3732

release_date = mobj.group('release_date')

3733

if release_date:

3734

release_date = release_date.replace('-', '')

3735

if not release_year:

3736

release_year = release_date[:4]

3737

info.update({

3738

'album': mobj.group('album'.strip()),

3739

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3740

'track': mobj.group('track').strip(),

3741

'release_date': release_date,

3742

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

3748

if not initial_data:

3749

query = {'videoId': video_id}

3750

query.update(self._get_checkok_params())

3751

initial_data = self._extract_response(

3752

item_id=video_id, ep='next', fatal=False,

3753

ytcfg=master_ytcfg, query=query,

3754

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3755

note='Downloading initial data API JSON')

3756

3757

info['comment_count'] = traverse_obj(initial_data, (

3758

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

3759

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

3760

), (

3761

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

3762

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

3763

), expected_type=int_or_none, get_all=False)

3764

3765

try: # This will error if there is no livechat

3766

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3767

except (KeyError, IndexError, TypeError):

3768

pass

3769

else:

3770

info.setdefault('subtitles', {})['live_chat'] = [{

3771

# url is needed to set cookies

3772

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

3773

'video_id': video_id,

3774

'ext': 'json',

3775

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3781

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3782

or self._extract_chapters_from_description(video_description, duration)

3783

or None)

3784

3785

contents = traverse_obj(

3786

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3787

expected_type=list, default=[])

3788

3789

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3790

if vpir:

3791

stl = vpir.get('superTitleLink')

3792

if stl:

3793

stl = self._get_text(stl)

3794

if try_get(

3795

vpir,

3796

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3797

info['location'] = stl

3798

else:

3799

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3800

if mobj:

3801

info.update({

3802

'series': mobj.group(1),

3803

'season_number': int(mobj.group(2)),

3804

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3809

list) or []):

3810

tbr = tlb.get('toggleButtonRenderer') or {}

3811

for getter, regex in [(

3812

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3813

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3814

lambda x: x['accessibility'],

3815

lambda x: x['accessibilityData']['accessibilityData'],

3816

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3817

label = (try_get(tbr, getter, dict) or {}).get('label')

3818

if label:

3819

mobj = re.match(regex, label)

3820

if mobj:

3821

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3822

break

3823

sbr_tooltip = try_get(

3824

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3825

if sbr_tooltip:

3826

like_count, dislike_count = sbr_tooltip.split(' / ')

3827

info.update({

3828

'like_count': str_to_int(like_count),

3829

'dislike_count': str_to_int(dislike_count),

3830

})

3831

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3832

if vsir:

3833

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3834

info.update({

3835

'channel': self._get_text(vor, 'title'),

3836

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3841

list) or []

3842

multiple_songs = False

3843

for row in rows:

3844

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3845

multiple_songs = True

3846

break

3847

for row in rows:

3848

mrr = row.get('metadataRowRenderer') or {}

3849

mrr_title = mrr.get('title')

3850

if not mrr_title:

3851

continue

3852

mrr_title = self._get_text(mrr, 'title')

3853

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3854

if mrr_title == 'License':

3855

info['license'] = mrr_contents_text

3856

elif not multiple_songs:

3857

if mrr_title == 'Album':

3858

info['album'] = mrr_contents_text

3859

elif mrr_title == 'Artist':

3860

info['artist'] = mrr_contents_text

3861

elif mrr_title == 'Song':

3862

info['track'] = mrr_contents_text

3863

3864

fallbacks = {

3865

'channel': 'uploader',

3866

'channel_id': 'uploader_id',

3867

'channel_url': 'uploader_url',

3868

}

3869

3870

# The upload date for scheduled, live and past live streams / premieres in microformats

3871

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3872

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3873

upload_date = (

3874

unified_strdate(get_first(microformats, 'uploadDate'))

3875

or unified_strdate(search_meta('uploadDate')))

3876

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3877

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3878

info['upload_date'] = upload_date

3879

3880

for to, frm in fallbacks.items():

3881

if not info.get(to):

3882

info[to] = info.get(frm)

3883

3884

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3890

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3891

is_membersonly = None

3892

is_premium = None

3893

if initial_data and is_private is not None:

3894

is_membersonly = False

3895

is_premium = False

3896

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3897

badge_labels = set()

3898

for content in contents:

3899

if not isinstance(content, dict):

3900

continue

3901

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3902

for badge_label in badge_labels:

3903

if badge_label.lower() == 'members only':

3904

is_membersonly = True

3905

elif badge_label.lower() == 'premium':

3906

is_premium = True

3907

elif badge_label.lower() == 'unlisted':

3908

is_unlisted = True

3909

3910

info['availability'] = self._availability(

3911

is_private=is_private,

3912

needs_premium=is_premium,

3913

needs_subscription=is_membersonly,

3914

needs_auth=info['age_limit'] >= 18,

3915

is_unlisted=None if is_private is None else is_unlisted)

3916

3917

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3918

3919

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3925

3926

@staticmethod

3927

def passthrough_smuggled_data(func):

3928

def _smuggle(entries, smuggled_data):

3929

for entry in entries:

3930

# TODO: Convert URL to music.youtube instead.

3931

# Do we need to passthrough any other smuggled_data?

3932

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3933

yield entry

3934

3935

@functools.wraps(func)

3936

def wrapper(self, url):

3937

url, smuggled_data = unsmuggle_url(url, {})

3938

if self.is_music_url(url):

3939

smuggled_data['is_music_url'] = True

3940

info_dict = func(self, url, smuggled_data)

3941

if smuggled_data and info_dict.get('entries'):

3942

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3947

channel_id = self._html_search_meta(

3948

'channelId', webpage, 'channel id', default=None)

3949

if channel_id:

3950

return channel_id

3951

channel_url = self._html_search_meta(

3952

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3953

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3954

'twitter:app:url:googleplay'), webpage, 'channel url')

3955

return self._search_regex(

3956

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3957

channel_url, 'channel id')

3958

3959

@staticmethod

3960

def _extract_basic_item_renderer(item):

3961

# Modified from _extract_grid_item_renderer

3962

known_basic_renderers = (

3963

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3964

)

3965

for key, renderer in item.items():

3966

if not isinstance(renderer, dict):

3967

continue

3968

elif key in known_basic_renderers:

3969

return renderer

3970

elif key.startswith('grid') and key.endswith('Renderer'):

3971

return renderer

3972

3973

def _grid_entries(self, grid_renderer):

3974

for item in grid_renderer['items']:

3975

if not isinstance(item, dict):

3976

continue

3977

renderer = self._extract_basic_item_renderer(item)

3978

if not isinstance(renderer, dict):

3979

continue

3980

title = self._get_text(renderer, 'title')

3981

3982

# playlist

3983

playlist_id = renderer.get('playlistId')

3984

if playlist_id:

3985

yield self.url_result(

3986

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3987

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3992

if video_id:

3993

yield self._extract_video(renderer)

3994

continue

3995

# channel

3996

channel_id = renderer.get('channelId')

3997

if channel_id:

3998

yield self.url_result(

3999

'https://www.youtube.com/channel/%s' % channel_id,

4000

ie=YoutubeTabIE.ie_key(), video_title=title)

4001

continue

4002

# generic endpoint URL support

4003

ep_url = urljoin('https://www.youtube.com/', try_get(

4004

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4005

str))

4006

if ep_url:

4007

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4008

if ie.suitable(ep_url):

4009

yield self.url_result(

4010

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4011

break

4012

4013

def _music_reponsive_list_entry(self, renderer):

4014

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4015

if video_id:

4016

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4017

ie=YoutubeIE.ie_key(), video_id=video_id)

4018

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4019

if playlist_id:

4020

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4021

if video_id:

4022

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4023

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4024

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4025

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4026

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4027

if browse_id:

4028

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4029

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4030

4031

def _shelf_entries_from_content(self, shelf_renderer):

4032

content = shelf_renderer.get('content')

4033

if not isinstance(content, dict):

4034

return

4035

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4036

if renderer:

4037

# TODO: add support for nested playlists so each shelf is processed

4038

# as separate playlist

4039

# TODO: this includes only first N items

4040

yield from self._grid_entries(renderer)

4041

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4047

ep = try_get(

4048

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4049

str)

4050

shelf_url = urljoin('https://www.youtube.com', ep)

4051

if shelf_url:

4052

# Skipping links to another channels, note that checking for

4053

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4054

# will not work

4055

if skip_channels and '/channels?' in shelf_url:

4056

return

4057

title = self._get_text(shelf_renderer, 'title')

4058

yield self.url_result(shelf_url, video_title=title)

4059

# Shelf may not contain shelf URL, fallback to extraction from content

4060

yield from self._shelf_entries_from_content(shelf_renderer)

4061

4062

def _playlist_entries(self, video_list_renderer):

4063

for content in video_list_renderer['contents']:

4064

if not isinstance(content, dict):

4065

continue

4066

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4067

if not isinstance(renderer, dict):

4068

continue

4069

video_id = renderer.get('videoId')

4070

if not video_id:

4071

continue

4072

yield self._extract_video(renderer)

4073

4074

def _rich_entries(self, rich_grid_renderer):

4075

renderer = try_get(

4076

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4077

video_id = renderer.get('videoId')

4078

if not video_id:

4079

return

4080

yield self._extract_video(renderer)

4081

4082

def _video_entry(self, video_renderer):

4083

video_id = video_renderer.get('videoId')

4084

if video_id:

4085

return self._extract_video(video_renderer)

4086

4087

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4088

url = urljoin('https://youtube.com', traverse_obj(

4089

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4090

if url:

4091

return self.url_result(

4092

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4093

4094

def _post_thread_entries(self, post_thread_renderer):

4095

post_renderer = try_get(

4096

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4097

if not post_renderer:

4098

return

4099

# video attachment

4100

video_renderer = try_get(

4101

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4102

video_id = video_renderer.get('videoId')

4103

if video_id:

4104

entry = self._extract_video(video_renderer)

4105

if entry:

4106

yield entry

4107

# playlist attachment

4108

playlist_id = try_get(

4109

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4110

if playlist_id:

4111

yield self.url_result(

4112

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4113

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4114

# inline video links

4115

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4116

for run in runs:

4117

if not isinstance(run, dict):

4118

continue

4119

ep_url = try_get(

4120

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4121

if not ep_url:

4122

continue

4123

if not YoutubeIE.suitable(ep_url):

4124

continue

4125

ep_video_id = YoutubeIE._match_id(ep_url)

4126

if video_id == ep_video_id:

4127

continue

4128

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4129

4130

def _post_thread_continuation_entries(self, post_thread_continuation):

4131

contents = post_thread_continuation.get('contents')

4132

if not isinstance(contents, list):

4133

return

4134

for content in contents:

4135

renderer = content.get('backstagePostThreadRenderer')

4136

if isinstance(renderer, dict):

4137

yield from self._post_thread_entries(renderer)

4138

continue

4139

renderer = content.get('videoRenderer')

4140

if isinstance(renderer, dict):

4141

yield self._video_entry(renderer)

4142

4143

r''' # unused

4144

def _rich_grid_entries(self, contents):

4145

for content in contents:

4146

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4147

if video_renderer:

4148

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4154

# continuation_list is modified in-place with continuation_list = [continuation_token]

4155

continuation_list[:] = [None]

4156

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4157

for content in contents:

4158

if not isinstance(content, dict):

4159

continue

4160

is_renderer = traverse_obj(

4161

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4162

expected_type=dict)

4163

if not is_renderer:

4164

renderer = content.get('richItemRenderer')

4165

if renderer:

4166

for entry in self._rich_entries(renderer):

4167

yield entry

4168

continuation_list[0] = self._extract_continuation(parent_renderer)

4169

continue

4170

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4171

for isr_content in isr_contents:

4172

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4177

'gridRenderer': self._grid_entries,

4178

'reelShelfRenderer': self._grid_entries,

4179

'shelfRenderer': self._shelf_entries,

4180

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4181

'backstagePostThreadRenderer': self._post_thread_entries,

4182

'videoRenderer': lambda x: [self._video_entry(x)],

4183

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4184

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4185

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4186

}

4187

for key, renderer in isr_content.items():

4188

if key not in known_renderers:

4189

continue

4190

for entry in known_renderers[key](renderer):

4191

if entry:

4192

yield entry

4193

continuation_list[0] = self._extract_continuation(renderer)

4194

break

4195

4196

if not continuation_list[0]:

4197

continuation_list[0] = self._extract_continuation(is_renderer)

4198

4199

if not continuation_list[0]:

4200

continuation_list[0] = self._extract_continuation(parent_renderer)

4201

4202

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4203

continuation_list = [None]

4204

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4205

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4210

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4211

yield from extract_entries(parent_renderer)

4212

continuation = continuation_list[0]

4213

4214

for page_num in itertools.count(1):

4215

if not continuation:

4216

break

4217

headers = self.generate_api_headers(

4218

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4219

response = self._extract_response(

4220

item_id=f'{item_id} page {page_num}',

4221

query=continuation, headers=headers, ytcfg=ytcfg,

4222

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4227

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4228

visitor_data = self._extract_visitor_data(response) or visitor_data

4229

4230

known_continuation_renderers = {

4231

'playlistVideoListContinuation': self._playlist_entries,

4232

'gridContinuation': self._grid_entries,

4233

'itemSectionContinuation': self._post_thread_continuation_entries,

4234

'sectionListContinuation': extract_entries, # for feeds

4235

}

4236

continuation_contents = try_get(

4237

response, lambda x: x['continuationContents'], dict) or {}

4238

continuation_renderer = None

4239

for key, value in continuation_contents.items():

4240

if key not in known_continuation_renderers:

4241

continue

4242

continuation_renderer = value

4243

continuation_list = [None]

4244

yield from known_continuation_renderers[key](continuation_renderer)

4245

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4246

break

4247

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4252

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4253

'gridVideoRenderer': (self._grid_entries, 'items'),

4254

'gridChannelRenderer': (self._grid_entries, 'items'),

4255

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4256

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4257

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4258

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4259

}

4260

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4261

continuation_items = try_get(

4262

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4263

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4264

video_items_renderer = None

4265

for key, value in continuation_item.items():

4266

if key not in known_renderers:

4267

continue

4268

video_items_renderer = {known_renderers[key][1]: continuation_items}

4269

continuation_list = [None]

4270

yield from known_renderers[key][0](video_items_renderer)

4271

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4272

break

4273

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4279

for tab in tabs:

4280

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4281

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4286

4287

def _extract_uploader(self, data):

4288

uploader = {}

4289

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4290

owner = try_get(

4291

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4292

if owner:

4293

owner_text = owner.get('text')

4294

uploader['uploader'] = self._search_regex(

4295

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4296

uploader['uploader_id'] = try_get(

4297

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4298

uploader['uploader_url'] = urljoin(

4299

'https://www.youtube.com/',

4300

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4301

return {k: v for k, v in uploader.items() if v is not None}

4302

4303

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4304

playlist_id = title = description = channel_url = channel_name = channel_id = None

4305

tags = []

4306

4307

selected_tab = self._extract_selected_tab(tabs)

4308

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4309

renderer = try_get(

4310

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4311

if renderer:

4312

channel_name = renderer.get('title')

4313

channel_url = renderer.get('channelUrl')

4314

channel_id = renderer.get('externalId')

4315

else:

4316

renderer = try_get(

4317

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4318

4319

if renderer:

4320

title = renderer.get('title')

4321

description = renderer.get('description', '')

4322

playlist_id = channel_id

4323

tags = renderer.get('keywords', '').split()

4324

4325

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4326

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4327

def _get_uncropped(url):

4328

return url_or_none((url or '').split('=')[0] + '=s0')

4329

4330

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4331

if avatar_thumbnails:

4332

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4333

if uncropped_avatar:

4334

avatar_thumbnails.append({

4335

'url': uncropped_avatar,

4336

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4341

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4342

for banner in channel_banners:

4343

banner['preference'] = -10

4344

4345

if channel_banners:

4346

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4347

if uncropped_banner:

4348

channel_banners.append({

4349

'url': uncropped_banner,

4350

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4355

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4356

4357

if playlist_id is None:

4358

playlist_id = item_id

4359

4360

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4361

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4362

if title is None:

4363

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4364

title += format_field(selected_tab, 'title', ' - %s')

4365

title += format_field(selected_tab, 'expandedText', ' - %s')

4366

4367

metadata = {

4368

'playlist_id': playlist_id,

4369

'playlist_title': title,

4370

'playlist_description': description,

4371

'uploader': channel_name,

4372

'uploader_id': channel_id,

4373

'uploader_url': channel_url,

4374

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4375

'tags': tags,

4376

'view_count': self._get_count(playlist_stats, 1),

4377

'availability': self._extract_availability(data),

4378

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4379

'playlist_count': self._get_count(playlist_stats, 0),

4380

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4381

}

4382

if not channel_id:

4383

metadata.update(self._extract_uploader(data))

4384

metadata.update({

4385

'channel': metadata['uploader'],

4386

'channel_id': metadata['uploader_id'],

4387

'channel_url': metadata['uploader_url']})

4388

return self.playlist_result(

4389

self._entries(

4390

selected_tab, playlist_id, ytcfg,

4391

self._extract_account_syncid(ytcfg, data),

4392

self._extract_visitor_data(data, ytcfg)),

4393

**metadata)

4394

4395

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4396

first_id = last_id = response = None

4397

for page_num in itertools.count(1):

4398

videos = list(self._playlist_entries(playlist))

4399

if not videos:

4400

return

4401

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4402

if start >= len(videos):

4403

return

4404

yield from videos[start:]

4405

first_id = first_id or videos[0]['id']

4406

last_id = videos[-1]['id']

4407

watch_endpoint = try_get(

4408

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4409

headers = self.generate_api_headers(

4410

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4411

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4412

query = {

4413

'playlistId': playlist_id,

4414

'videoId': watch_endpoint.get('videoId') or last_id,

4415

'index': watch_endpoint.get('index') or len(videos),

4416

'params': watch_endpoint.get('params') or 'OAE%3D'

4417

}

4418

response = self._extract_response(

4419

item_id='%s page %d' % (playlist_id, page_num),

4420

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4421

check_get_keys='contents'

4422

)

4423

playlist = try_get(

4424

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4425

4426

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4427

title = playlist.get('title') or try_get(

4428

data, lambda x: x['titleText']['simpleText'], str)

4429

playlist_id = playlist.get('playlistId') or item_id

4430

4431

# Delegating everything except mix playlists to regular tab-based playlist URL

4432

playlist_url = urljoin(url, try_get(

4433

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4434

str))

4435

4436

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4437

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4438

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4439

4440

if playlist_url and playlist_url != url and not is_known_unviewable:

4441

return self.url_result(

4442

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4443

video_title=title)

4444

4445

return self.playlist_result(

4446

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4447

playlist_id=playlist_id, playlist_title=title)

4448

4449

def _extract_availability(self, data):

4450

"""

4451

Gets the availability of a given playlist/tab.

4452

Note: Unless YouTube tells us explicitly, we do not assume it is public

4453

@param data: response

4454

"""

4455

is_private = is_unlisted = None

4456

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4457

badge_labels = self._extract_badges(renderer)

4458

4459

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4460

privacy_dropdown_entries = try_get(

4461

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4462

for renderer_dict in privacy_dropdown_entries:

4463

is_selected = try_get(

4464

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4465

if not is_selected:

4466

continue

4467

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4468

if label:

4469

badge_labels.add(label.lower())

4470

break

4471

4472

for badge_label in badge_labels:

4473

if badge_label == 'unlisted':

4474

is_unlisted = True

4475

elif badge_label == 'private':

4476

is_private = True

4477

elif badge_label == 'public':

4478

is_unlisted = is_private = False

4479

return self._availability(is_private, False, False, False, is_unlisted)

4480

4481

@staticmethod

4482

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4483

sidebar_renderer = try_get(

4484

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4485

for item in sidebar_renderer:

4486

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4491

"""

4492

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4493

"""

4494

browse_id = params = None

4495

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4496

if not renderer:

4497

return

4498

menu_renderer = try_get(

4499

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4500

for menu_item in menu_renderer:

4501

if not isinstance(menu_item, dict):

4502

continue

4503

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4504

text = try_get(

4505

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4506

if not text or text.lower() != 'show unavailable videos':

4507

continue

4508

browse_endpoint = try_get(

4509

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4510

browse_id = browse_endpoint.get('browseId')

4511

params = browse_endpoint.get('params')

4512

break

4513

4514

headers = self.generate_api_headers(

4515

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4516

visitor_data=self._extract_visitor_data(data, ytcfg))

4517

query = {

4518

'params': params or 'wgYCCAA=',

4519

'browseId': browse_id or 'VL%s' % item_id

4520

}

4521

return self._extract_response(

4522

item_id=item_id, headers=headers, query=query,

4523

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4524

note='Downloading API JSON with unavailable videos')

4525

4526

@functools.cached_property

4527

def skip_webpage(self):

4528

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4529

4530

def _extract_webpage(self, url, item_id, fatal=True):

4531

webpage, data = None, None

4532

for retry in self.RetryManager(fatal=fatal):

4533

try:

4534

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4535

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4536

except ExtractorError as e:

4537

if isinstance(e.cause, network_exceptions):

4538

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4539

retry.error = e

4540

continue

4541

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4546

except ExtractorError as e:

4547

self._error_or_warning(e, fatal=fatal)

4548

break

4549

4550

# Sometimes youtube returns a webpage with incomplete ytInitialData

4551

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4552

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4553

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4559

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4560

if not ytcfg and self.is_authenticated:

4561

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4562

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4563

raise ExtractorError(

4564

f'{msg}. If you are not downloading private content, or '

4565

'your cookies are only for the first account and channel,'

4566

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4567

expected=True)

4568

self.report_warning(msg, only_once=True)

4569

4570

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4571

data = None

4572

if not self.skip_webpage:

4573

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4574

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4575

# Reject webpage data if redirected to home page without explicitly requesting

4576

selected_tab = self._extract_selected_tab(traverse_obj(

4577

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4578

if (url != 'https://www.youtube.com/feed/recommended'

4579

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4580

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4581

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4582

if fatal:

4583

raise ExtractorError(msg, expected=True)

4584

self.report_warning(msg, only_once=True)

4585

if not data:

4586

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4587

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4588

return data, ytcfg

4589

4590

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4591

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4592

resolve_response = self._extract_response(

4593

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4594

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4595

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4596

for ep_key, ep in endpoints.items():

4597

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4598

if params:

4599

return self._extract_response(

4600

item_id=item_id, query=params, ep=ep, headers=headers,

4601

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4602

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4603

err_note = 'Failed to resolve url (does the playlist exist?)'

4604

if fatal:

4605

raise ExtractorError(err_note, expected=True)

4606

self.report_warning(err_note, item_id)

4607

4608

_SEARCH_PARAMS = None

4609

4610

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4611

data = {'query': query}

4612

if params is NO_DEFAULT:

4613

params = self._SEARCH_PARAMS

4614

if params:

4615

data['params'] = params

4616

4617

content_keys = (

4618

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4619

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4620

# ytmusic search

4621

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4622

('continuationContents', ),

4623

)

4624

display_id = f'query "{query}"'

4625

check_get_keys = tuple({keys[0] for keys in content_keys})

4626

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4627

self._report_playlist_authcheck(ytcfg, fatal=False)

4628

4629

continuation_list = [None]

4630

search = None

4631

for page_num in itertools.count(1):

4632

data.update(continuation_list[0] or {})

4633

headers = self.generate_api_headers(

4634

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4635

search = self._extract_response(

4636

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4637

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4638

slr_contents = traverse_obj(search, *content_keys)

4639

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4640

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4645

IE_DESC = 'YouTube Tabs'

4646

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4655

(?P<not_channel>

4656

feed/|hashtag/|

4657

(?:playlist|watch)\?.*?\blist=

4658

)|

4659

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4664

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4665

}

4666

IE_NAME = 'youtube:tab'

4667

4668

_TESTS = [{

4669

'note': 'playlists, multipage',

4670

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4671

'playlist_mincount': 94,

4672

'info_dict': {

4673

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4674

'title': 'Igor Kleiner - Playlists',

4675

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4676

'uploader': 'Igor Kleiner',

4677

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4678

'channel': 'Igor Kleiner',

4679

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4680

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4681

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4682

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4683

'channel_follower_count': int

4684

},

4685

}, {

4686

'note': 'playlists, multipage, different order',

4687

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4688

'playlist_mincount': 94,

4689

'info_dict': {

4690

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4691

'title': 'Igor Kleiner - Playlists',

4692

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4693

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4694

'uploader': 'Igor Kleiner',

4695

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4696

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4697

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4698

'channel': 'Igor Kleiner',

4699

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4700

'channel_follower_count': int

4701

},

4702

}, {

4703

'note': 'playlists, series',

4704

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4705

'playlist_mincount': 5,

4706

'info_dict': {

4707

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4708

'title': '3Blue1Brown - Playlists',

4709

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4710

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4711

'uploader': '3Blue1Brown',

4712

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4713

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4714

'channel': '3Blue1Brown',

4715

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4716

'tags': ['Mathematics'],

4717

'channel_follower_count': int

4718

},

4719

}, {

4720

'note': 'playlists, singlepage',

4721

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4722

'playlist_mincount': 4,

4723

'info_dict': {

4724

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4725

'title': 'ThirstForScience - Playlists',

4726

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4727

'uploader': 'ThirstForScience',

4728

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4729

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4730

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4731

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4732

'tags': 'count:13',

4733

'channel': 'ThirstForScience',

4734

'channel_follower_count': int

4735

}

4736

}, {

4737

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4738

'only_matching': True,

4739

}, {

4740

'note': 'basic, single video playlist',

4741

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4742

'info_dict': {

4743

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4744

'uploader': 'Sergey M.',

4745

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4746

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4751

'channel': 'Sergey M.',

4752

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4753

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4754

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4759

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4760

'info_dict': {

4761

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4762

'uploader': 'Sergey M.',

4763

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4764

'title': 'youtube-dl empty playlist',

4765

'tags': [],

4766

'channel': 'Sergey M.',

4767

'description': '',

4768

'modified_date': '20160902',

4769

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4770

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4771

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4777

'info_dict': {

4778

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4779

'title': 'lex will - Home',

4780

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4781

'uploader': 'lex will',

4782

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4783

'channel': 'lex will',

4784

'tags': ['bible', 'history', 'prophesy'],

4785

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4786

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4787

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4788

'channel_follower_count': int

4789

},

4790

'playlist_mincount': 2,

4791

}, {

4792

'note': 'Videos tab',

4793

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4794

'info_dict': {

4795

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4796

'title': 'lex will - Videos',

4797

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4798

'uploader': 'lex will',

4799

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4800

'tags': ['bible', 'history', 'prophesy'],

4801

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4802

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4803

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4804

'channel': 'lex will',

4805

'channel_follower_count': int

4806

},

4807

'playlist_mincount': 975,

4808

}, {

4809

'note': 'Videos tab, sorted by popular',

4810

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4811

'info_dict': {

4812

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4813

'title': 'lex will - Videos',

4814

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4815

'uploader': 'lex will',

4816

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4817

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4818

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4819

'channel': 'lex will',

4820

'tags': ['bible', 'history', 'prophesy'],

4821

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4822

'channel_follower_count': int

4823

},

4824

'playlist_mincount': 199,

4825

}, {

4826

'note': 'Playlists tab',

4827

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4828

'info_dict': {

4829

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4830

'title': 'lex will - Playlists',

4831

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4832

'uploader': 'lex will',

4833

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4834

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4835

'channel': 'lex will',

4836

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4837

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4838

'tags': ['bible', 'history', 'prophesy'],

4839

'channel_follower_count': int

4840

},

4841

'playlist_mincount': 17,

4842

}, {

4843

'note': 'Community tab',

4844

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4845

'info_dict': {

4846

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4847

'title': 'lex will - Community',

4848

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4849

'uploader': 'lex will',

4850

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4851

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4852

'channel': 'lex will',

4853

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4854

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4855

'tags': ['bible', 'history', 'prophesy'],

4856

'channel_follower_count': int

4857

},

4858

'playlist_mincount': 18,

4859

}, {

4860

'note': 'Channels tab',

4861

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4862

'info_dict': {

4863

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4864

'title': 'lex will - Channels',

4865

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4866

'uploader': 'lex will',

4867

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4868

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4869

'channel': 'lex will',

4870

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4871

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4872

'tags': ['bible', 'history', 'prophesy'],

4873

'channel_follower_count': int

4874

},

4875

'playlist_mincount': 12,

4876

}, {

4877

'note': 'Search tab',

4878

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4879

'playlist_mincount': 40,

4880

'info_dict': {

4881

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4882

'title': '3Blue1Brown - Search - linear algebra',

4883

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4884

'uploader': '3Blue1Brown',

4885

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4886

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4887

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4888

'tags': ['Mathematics'],

4889

'channel': '3Blue1Brown',

4890

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4891

'channel_follower_count': int

4892

},

4893

}, {

4894

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4895

'only_matching': True,

4896

}, {

4897

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4898

'only_matching': True,

4899

}, {

4900

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4901

'only_matching': True,

4902

}, {

4903

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4904

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4905

'info_dict': {

4906

'title': '29C3: Not my department',

4907

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4908

'uploader': 'Christiaan008',

4909

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4910

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4911

'tags': [],

4912

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4913

'view_count': int,

4914

'modified_date': '20150605',

4915

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4916

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4917

'channel': 'Christiaan008',

4918

},

4919

'playlist_count': 96,

4920

}, {

4921

'note': 'Large playlist',

4922

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4923

'info_dict': {

4924

'title': 'Uploads from Cauchemar',

4925

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4926

'uploader': 'Cauchemar',

4927

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4928

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4929

'tags': [],

4930

'modified_date': r're:\d{8}',

4931

'channel': 'Cauchemar',

4932

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4933

'view_count': int,

4934

'description': '',

4935

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4936

},

4937

'playlist_mincount': 1123,

4938

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4939

}, {

4940

'note': 'even larger playlist, 8832 videos',

4941

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4942

'only_matching': True,

4943

}, {

4944

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4945

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4946

'info_dict': {

4947

'title': 'Uploads from Interstellar Movie',

4948

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4949

'uploader': 'Interstellar Movie',

4950

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4951

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4952

'tags': [],

4953

'view_count': int,

4954

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4955

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4956

'channel': 'Interstellar Movie',

4957

'description': '',

4958

'modified_date': r're:\d{8}',

4959

},

4960

'playlist_mincount': 21,

4961

}, {

4962

'note': 'Playlist with "show unavailable videos" button',

4963

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4964

'info_dict': {

4965

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4966

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4967

'uploader': 'Phim Siêu Nhân Nhật Bản',

4968

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4969

'view_count': int,

4970

'channel': 'Phim Siêu Nhân Nhật Bản',

4971

'tags': [],

4972

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4973

'description': '',

4974

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4975

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4976

'modified_date': r're:\d{8}',

4977

},

4978

'playlist_mincount': 200,

4979

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4980

}, {

4981

'note': 'Playlist with unavailable videos in page 7',

4982

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4983

'info_dict': {

4984

'title': 'Uploads from BlankTV',

4985

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4986

'uploader': 'BlankTV',

4987

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4988

'channel': 'BlankTV',

4989

'channel_url': 'https://www.youtube.com/c/blanktv',

4990

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4991

'view_count': int,

4992

'tags': [],

4993

'uploader_url': 'https://www.youtube.com/c/blanktv',

4994

'modified_date': r're:\d{8}',

4995

'description': '',

4996

},

4997

'playlist_mincount': 1000,

4998

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4999

}, {

5000

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

5001

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5002

'info_dict': {

5003

'title': 'Data Analysis with Dr Mike Pound',

5004

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5005

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5006

'uploader': 'Computerphile',

5007

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5008

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5009

'tags': [],

5010

'view_count': int,

5011

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5012

'channel_url': 'https://www.youtube.com/user/Computerphile',

5013

'channel': 'Computerphile',

5014

},

5015

'playlist_mincount': 11,

5016

}, {

5017

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5018

'only_matching': True,

5019

}, {

5020

'note': 'Playlist URL that does not actually serve a playlist',

5021

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5026

'uploader': 'STREEM',

5027

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5028

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5029

'upload_date': '20150526',

5030

'license': 'Standard YouTube License',

5031

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5032

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5039

},

5040

'skip': 'This video is not available.',

5041

'add_ie': [YoutubeIE.ie_key()],

5042

}, {

5043

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5044

'only_matching': True,

5045

}, {

5046

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5047

'only_matching': True,

5048

}, {

5049

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5050

'info_dict': {

5051

'id': 'Wq15eF5vCbI', # This will keep changing

5052

'ext': 'mp4',

5053

'title': str,

5054

'uploader': 'Sky News',

5055

'uploader_id': 'skynews',

5056

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5057

'upload_date': r're:\d{8}',

5058

'description': str,

5059

'categories': ['News & Politics'],

5060

'tags': list,

5061

'like_count': int,

5062

'release_timestamp': 1642502819,

5063

'channel': 'Sky News',

5064

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5065

'age_limit': 0,

5066

'view_count': int,

5067

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5068

'playable_in_embed': True,

5069

'release_date': '20220118',

5070

'availability': 'public',

5071

'live_status': 'is_live',

5072

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5073

'channel_follower_count': int

5074

},

5075

'params': {

5076

'skip_download': True,

5077

},

5078

'expected_warnings': ['Ignoring subtitle tracks found in '],

5079

}, {

5080

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5085

'uploader': 'The Young Turks',

5086

'uploader_id': 'TheYoungTurks',

5087

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5088

'upload_date': '20150715',

5089

'license': 'Standard YouTube License',

5090

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5091

'categories': ['News & Politics'],

5092

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5097

},

5098

'only_matching': True,

5099

}, {

5100

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5101

'only_matching': True,

5102

}, {

5103

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5104

'only_matching': True,

5105

}, {

5106

'note': 'A channel that is not live. Should raise error',

5107

'url': 'https://www.youtube.com/user/numberphile/live',

5108

'only_matching': True,

5109

}, {

5110

'url': 'https://www.youtube.com/feed/trending',

5111

'only_matching': True,

5112

}, {

5113

'url': 'https://www.youtube.com/feed/library',

5114

'only_matching': True,

5115

}, {

5116

'url': 'https://www.youtube.com/feed/history',

5117

'only_matching': True,

5118

}, {

5119

'url': 'https://www.youtube.com/feed/subscriptions',

5120

'only_matching': True,

5121

}, {

5122

'url': 'https://www.youtube.com/feed/watch_later',

5123

'only_matching': True,

5124

}, {

5125

'note': 'Recommended - redirects to home page.',

5126

'url': 'https://www.youtube.com/feed/recommended',

5127

'only_matching': True,

5128

}, {

5129

'note': 'inline playlist with not always working continuations',

5130

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5131

'only_matching': True,

5132

}, {

5133

'url': 'https://www.youtube.com/course',

5134

'only_matching': True,

5135

}, {

5136

'url': 'https://www.youtube.com/zsecurity',

5137

'only_matching': True,

5138

}, {

5139

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5140

'only_matching': True,

5141

}, {

5142

'url': 'https://www.youtube.com/TheYoungTurks/live',

5143

'only_matching': True,

5144

}, {

5145

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5152

}, {

5153

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5154

'only_matching': True,

5155

}, {

5156

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5157

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5158

'only_matching': True

5159

}, {

5160

'note': '/browse/ should redirect to /channel/',

5161

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5162

'only_matching': True

5163

}, {

5164

'note': 'VLPL, should redirect to playlist?list=PL...',

5165

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5166

'info_dict': {

5167

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5168

'uploader': 'NoCopyrightSounds',

5169

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5170

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5171

'title': 'NCS : All Releases 💿',

5172

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5173

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5174

'modified_date': r're:\d{8}',

5175

'view_count': int,

5176

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5177

'tags': [],

5178

'channel': 'NoCopyrightSounds',

5179

},

5180

'playlist_mincount': 166,

5181

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5182

}, {

5183

'note': 'Topic, should redirect to playlist?list=UU...',

5184

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5185

'info_dict': {

5186

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5187

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5188

'title': 'Uploads from Royalty Free Music - Topic',

5189

'uploader': 'Royalty Free Music - Topic',

5190

'tags': [],

5191

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5192

'channel': 'Royalty Free Music - Topic',

5193

'view_count': int,

5194

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5195

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5196

'modified_date': r're:\d{8}',

5197

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5198

'description': '',

5199

},

5200

'expected_warnings': [

5201

'The URL does not have a videos tab',

5202

r'[Uu]navailable videos (are|will be) hidden',

5203

],

5204

'playlist_mincount': 101,

5205

}, {

5206

'note': 'Topic without a UU playlist',

5207

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5208

'info_dict': {

5209

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5210

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5211

'tags': [],

5212

},

5213

'expected_warnings': [

5214

'the playlist redirect gave error',

5215

],

5216

'playlist_mincount': 9,

5217

}, {

5218

'note': 'Youtube music Album',

5219

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5220

'info_dict': {

5221

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5222

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5227

'modified_date': r're:\d{8}',

5228

},

5229

'playlist_count': 50,

5230

}, {

5231

'note': 'unlisted single video playlist',

5232

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5233

'info_dict': {

5234

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5235

'uploader': 'colethedj',

5236

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5237

'title': 'yt-dlp unlisted playlist test',

5238

'availability': 'unlisted',

5239

'tags': [],

5240

'modified_date': '20220418',

5241

'channel': 'colethedj',

5242

'view_count': int,

5243

'description': '',

5244

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5245

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5246

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5251

'url': 'https://www.youtube.com/feed/recommended',

5252

'info_dict': {

5253

'id': 'recommended',

5254

'title': 'recommended',

5255

'tags': [],

5256

},

5257

'playlist_mincount': 50,

5258

'params': {

5259

'skip_download': True,

5260

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5261

},

5262

}, {

5263

'note': 'API Fallback: /videos tab, sorted by oldest first',

5264

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5265

'info_dict': {

5266

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5267

'title': 'Cody\'sLab - Videos',

5268

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5269

'uploader': 'Cody\'sLab',

5270

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5271

'channel': 'Cody\'sLab',

5272

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5273

'tags': [],

5274

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5275

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5276

'channel_follower_count': int

5277

},

5278

'playlist_mincount': 650,

5279

'params': {

5280

'skip_download': True,

5281

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5282

},

5283

}, {

5284

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5285

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5286

'info_dict': {

5287

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5288

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5289

'title': 'Uploads from Royalty Free Music - Topic',

5290

'uploader': 'Royalty Free Music - Topic',

5291

'modified_date': r're:\d{8}',

5292

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5293

'description': '',

5294

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5295

'tags': [],

5296

'channel': 'Royalty Free Music - Topic',

5297

'view_count': int,

5298

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5299

},

5300

'expected_warnings': [

5301

'does not have a videos tab',

5302

r'[Uu]navailable videos (are|will be) hidden',

5303

],

5304

'playlist_mincount': 101,

5305

'params': {

5306

'skip_download': True,

5307

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5308

},

5309

}, {

5310

'note': 'non-standard redirect to regional channel',

5311

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5312

'only_matching': True

5313

}, {

5314

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5315

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5316

'info_dict': {

5317

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5318

'modified_date': '20220407',

5319

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5320

'tags': [],

5321

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5322

'uploader': 'pukkandan',

5323

'availability': 'unlisted',

5324

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5325

'channel': 'pukkandan',

5326

'description': 'Test for collaborative playlist',

5327

'title': 'yt-dlp test - collaborative playlist',

5328

'view_count': int,

5329

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5330

},

5331

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5336

return False if YoutubeIE.suitable(url) else super().suitable(url)

5337

5338

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5339

5340

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5341

def _real_extract(self, url, smuggled_data):

5342

item_id = self._match_id(url)

5343

url = urllib.parse.urlunparse(

5344

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5345

compat_opts = self.get_param('compat_opts', [])

5346

5347

def get_mobj(url):

5348

mobj = self._URL_RE.match(url).groupdict()

5349

mobj.update((k, '') for k, v in mobj.items() if v is None)

5350

return mobj

5351

5352

mobj, redirect_warning = get_mobj(url), None

5353

# Youtube returns incomplete data if tabname is not lower case

5354

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5355

if is_channel:

5356

if smuggled_data.get('is_music_url'):

5357

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5358

item_id = item_id[2:]

5359

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5360

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5361

mdata = self._extract_tab_endpoint(

5362

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5363

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5364

get_all=False, expected_type=str)

5365

if not murl:

5366

raise ExtractorError('Failed to resolve album to playlist')

5367

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5368

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5369

pre = f'https://www.youtube.com/channel/{item_id}'

5370

5371

original_tab_name = tab

5372

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5373

# Home URLs should redirect to /videos/

5374

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5375

'To download only the videos in the home page, add a "/featured" to the URL')

5376

tab = '/videos'

5377

5378

url = ''.join((pre, tab, post))

5379

mobj = get_mobj(url)

5380

5381

# Handle both video/playlist URLs

5382

qs = parse_qs(url)

5383

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5384

5385

if not video_id and mobj['not_channel'].startswith('watch'):

5386

if not playlist_id:

5387

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5388

raise ExtractorError('Unable to recognize tab page')

5389

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5390

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5391

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5392

mobj = get_mobj(url)

5393

5394

if video_id and playlist_id:

5395

if self.get_param('noplaylist'):

5396

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5397

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5398

ie=YoutubeIE.ie_key(), video_id=video_id)

5399

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5400

5401

data, ytcfg = self._extract_data(url, item_id)

5402

5403

# YouTube may provide a non-standard redirect to the regional channel

5404

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5405

redirect_url = traverse_obj(

5406

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5407

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5408

redirect_url = ''.join((

5409

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5410

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5411

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5412

5413

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5414

if tabs:

5415

selected_tab = self._extract_selected_tab(tabs)

5416

selected_tab_name = selected_tab.get('title', '').lower()

5417

if selected_tab_name == 'home':

5418

selected_tab_name = 'featured'

5419

requested_tab_name = mobj['tab'][1:]

5420

if 'no-youtube-channel-redirect' not in compat_opts:

5421

if requested_tab_name == 'live': # Live tab should have redirected to the video

5422

raise UserNotLive(video_id=mobj['id'])

5423

if requested_tab_name not in ('', selected_tab_name):

5424

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5425

if not original_tab_name:

5426

if item_id[:2] == 'UC':

5427

# Topic channels don't have /videos. Use the equivalent playlist instead

5428

pl_id = f'UU{item_id[2:]}'

5429

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5430

try:

5431

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5432

except ExtractorError:

5433

redirect_warning += ' and the playlist redirect gave error'

5434

else:

5435

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5436

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5437

if selected_tab_name and selected_tab_name != requested_tab_name:

5438

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5439

else:

5440

raise ExtractorError(redirect_warning, expected=True)

5441

5442

if redirect_warning:

5443

self.to_screen(redirect_warning)

5444

self.write_debug(f'Final URL: {url}')

5445

5446

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5447

if 'no-youtube-unavailable-videos' not in compat_opts:

5448

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5449

self._extract_and_report_alerts(data, only_once=True)

5450

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5451

if tabs:

5452

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5453

5454

playlist = traverse_obj(

5455

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5456

if playlist:

5457

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5458

5459

video_id = traverse_obj(

5460

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5461

if video_id:

5462

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5463

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5464

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5465

ie=YoutubeIE.ie_key(), video_id=video_id)

5466

5467

raise ExtractorError('Unable to recognize tab page')

5468

5469

5470

class YoutubePlaylistIE(InfoExtractor):

5471

IE_DESC = 'YouTube playlists'

5472

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5483

)''' % {

5484

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5485

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5486

}

5487

IE_NAME = 'youtube:playlist'

5488

_TESTS = [{

5489

'note': 'issue #673',

5490

'url': 'PLBB231211A4F62143',

5491

'info_dict': {

5492

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5493

'id': 'PLBB231211A4F62143',

5494

'uploader': 'Wickman',

5495

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5496

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5497

'view_count': int,

5498

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5499

'modified_date': r're:\d{8}',

5500

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5501

'channel': 'Wickman',

5502

'tags': [],

5503

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5504

},

5505

'playlist_mincount': 29,

5506

}, {

5507

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5508

'info_dict': {

5509

'title': 'YDL_safe_search',

5510

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5511

},

5512

'playlist_count': 2,

5513

'skip': 'This playlist is private',

5514

}, {

5515

'note': 'embedded',

5516

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5521

'uploader': 'milan',

5522

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5523

'description': '',

5524

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5525

'tags': [],

5526

'modified_date': '20140919',

5527

'view_count': int,

5528

'channel': 'milan',

5529

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5530

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5531

},

5532

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5533

}, {

5534

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5535

'playlist_mincount': 455,

5536

'info_dict': {

5537

'title': '2018 Chinese New Singles (11/6 updated)',

5538

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5539

'uploader': 'LBK',

5540

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5541

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5542

'channel': 'LBK',

5543

'view_count': int,

5544

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5545

'tags': [],

5546

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5547

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5548

'modified_date': r're:\d{8}',

5549

},

5550

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5551

}, {

5552

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5553

'only_matching': True,

5554

}, {

5555

# music album playlist

5556

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5557

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5562

if YoutubeTabIE.suitable(url):

5563

return False

5564

from ..utils import parse_qs

5565

qs = parse_qs(url)

5566

if qs.get('v', [None])[0]:

5567

return False

5568

return super().suitable(url)

5569

5570

def _real_extract(self, url):

5571

playlist_id = self._match_id(url)

5572

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5573

url = update_url_query(

5574

'https://www.youtube.com/playlist',

5575

parse_qs(url) or {'list': playlist_id})

5576

if is_music_url:

5577

url = smuggle_url(url, {'is_music_url': True})

5578

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5579

5580

5581

class YoutubeYtBeIE(InfoExtractor):

5582

IE_DESC = 'youtu.be'

5583

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5584

_TESTS = [{

5585

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5590

'uploader': 'Backus-Page House Museum',

5591

'uploader_id': 'backuspagemuseum',

5592

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5593

'upload_date': '20161008',

5594

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5595

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5600

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5601

'channel': 'Backus-Page House Museum',

5602

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5603

'live_status': 'not_live',

5604

'view_count': int,

5605

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5606

'availability': 'public',

5607

'duration': 59,

5608

'comment_count': int,

5609

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

5614

},

5615

}, {

5616

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5617

'only_matching': True,

5618

}]

5619

5620

def _real_extract(self, url):

5621

mobj = self._match_valid_url(url)

5622

video_id = mobj.group('id')

5623

playlist_id = mobj.group('playlist_id')

5624

return self.url_result(

5625

update_url_query('https://www.youtube.com/watch', {

5626

'v': video_id,

5627

'list': playlist_id,

5628

'feature': 'youtu.be',

5629

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5630

5631

5632

class YoutubeLivestreamEmbedIE(InfoExtractor):

5633

IE_DESC = 'YouTube livestream embeds'

5634

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5635

_TESTS = [{

5636

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5637

'only_matching': True,

5638

}]

5639

5640

def _real_extract(self, url):

5641

channel_id = self._match_id(url)

5642

return self.url_result(

5643

f'https://www.youtube.com/channel/{channel_id}/live',

5644

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5645

5646

5647

class YoutubeYtUserIE(InfoExtractor):

5648

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5649

IE_NAME = 'youtube:user'

5650

_VALID_URL = r'ytuser:(?P<id>.+)'

5651

_TESTS = [{

5652

'url': 'ytuser:phihag',

5653

'only_matching': True,

5654

}]

5655

5656

def _real_extract(self, url):

5657

user_id = self._match_id(url)

5658

return self.url_result(

5659

'https://www.youtube.com/user/%s/videos' % user_id,

5660

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5661

5662

5663

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5664

IE_NAME = 'youtube:favorites'

5665

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5666

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5667

_LOGIN_REQUIRED = True

5668

_TESTS = [{

5669

'url': ':ytfav',

5670

'only_matching': True,

5671

}, {

5672

'url': ':ytfavorites',

5673

'only_matching': True,

5674

}]

5675

5676

def _real_extract(self, url):

5677

return self.url_result(

5678

'https://www.youtube.com/playlist?list=LL',

5679

ie=YoutubeTabIE.ie_key())

5680

5681

5682

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5683

IE_NAME = 'youtube:notif'

5684

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5685

_VALID_URL = r':ytnotif(?:ication)?s?'

5686

_LOGIN_REQUIRED = True

5687

_TESTS = [{

5688

'url': ':ytnotif',

5689

'only_matching': True,

5690

}, {

5691

'url': ':ytnotifications',

5692

'only_matching': True,

5693

}]

5694

5695

def _extract_notification_menu(self, response, continuation_list):

5696

notification_list = traverse_obj(

5697

response,

5698

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5699

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5700

expected_type=list) or []

5701

continuation_list[0] = None

5702

for item in notification_list:

5703

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5704

if entry:

5705

yield entry

5706

continuation = item.get('continuationItemRenderer')

5707

if continuation:

5708

continuation_list[0] = continuation

5709

5710

def _extract_notification_renderer(self, notification):

5711

video_id = traverse_obj(

5712

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5713

url = f'https://www.youtube.com/watch?v={video_id}'

5714

channel_id = None

5715

if not video_id:

5716

browse_ep = traverse_obj(

5717

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5718

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5719

post_id = self._search_regex(

5720

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5721

'post id', default=None)

5722

if not channel_id or not post_id:

5723

return

5724

# The direct /post url redirects to this in the browser

5725

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5726

5727

channel = traverse_obj(

5728

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5729

expected_type=str)

5730

notification_title = self._get_text(notification, 'shortMessage')

5731

if notification_title:

5732

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5733

# TODO: handle recommended videos

5734

title = self._search_regex(

5735

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5736

'video title', default=None)

5737

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5738

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5744

'video_id': video_id,

5745

'title': title,

5746

'channel_id': channel_id,

5747

'channel': channel,

5748

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5749

'upload_date': upload_date,

5750

}

5751

5752

def _notification_menu_entries(self, ytcfg):

5753

continuation_list = [None]

5754

response = None

5755

for page in itertools.count(1):

5756

ctoken = traverse_obj(

5757

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5758

response = self._extract_response(

5759

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5760

ep='notification/get_notification_menu', check_get_keys='actions',

5761

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5762

yield from self._extract_notification_menu(response, continuation_list)

5763

if not continuation_list[0]:

5764

break

5765

5766

def _real_extract(self, url):

5767

display_id = 'notifications'

5768

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5769

self._report_playlist_authcheck(ytcfg)

5770

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5771

5772

5773

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5774

IE_DESC = 'YouTube search'

5775

IE_NAME = 'youtube:search'

5776

_SEARCH_KEY = 'ytsearch'

5777

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5778

_TESTS = [{

5779

'url': 'ytsearch5:youtube-dl test video',

5780

'playlist_count': 5,

5781

'info_dict': {

5782

'id': 'youtube-dl test video',

5783

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5789

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5790

_SEARCH_KEY = 'ytsearchdate'

5791

IE_DESC = 'YouTube search, newest videos first'

5792

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5793

_TESTS = [{

5794

'url': 'ytsearchdate5:youtube-dl test video',

5795

'playlist_count': 5,

5796

'info_dict': {

5797

'id': 'youtube-dl test video',

5798

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5804

IE_DESC = 'YouTube search URLs with sorting and filter support'

5805

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5806

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5807

_TESTS = [{

5808

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5809

'playlist_mincount': 5,

5810

'info_dict': {

5811

'id': 'youtube-dl test video',

5812

'title': 'youtube-dl test video',

5813

}

5814

}, {

5815

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5816

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5823

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

5828

# 'entries': [{

5829

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5835

'only_matching': True,

5836

}]

5837

5838

def _real_extract(self, url):

5839

qs = parse_qs(url)

5840

query = (qs.get('search_query') or qs.get('q'))[0]

5841

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5842

5843

5844

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5845

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

5846

IE_NAME = 'youtube:music:search_url'

5847

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5848

_TESTS = [{

5849

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5850

'playlist_count': 16,

5851

'info_dict': {

5852

'id': 'royalty free music',

5853

'title': 'royalty free music',

5854

}

5855

}, {

5856

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5857

'playlist_mincount': 30,

5858

'info_dict': {

5859

'id': 'royalty free music - songs',

5860

'title': 'royalty free music - songs',

5861

},

5862

'params': {'extract_flat': 'in_playlist'}

5863

}, {

5864

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5865

'playlist_mincount': 30,

5866

'info_dict': {

5867

'id': 'royalty free music - community playlists',

5868

'title': 'royalty free music - community playlists',

5869

},

5870

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5875

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5876

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5877

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5878

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5879

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5880

}

5881

5882

def _real_extract(self, url):

5883

qs = parse_qs(url)

5884

query = (qs.get('search_query') or qs.get('q'))[0]

5885

params = qs.get('sp', (None,))[0]

5886

if params:

5887

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5888

else:

5889

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

5890

params = self._SECTIONS.get(section)

5891

if not params:

5892

section = None

5893

title = join_nonempty(query, section, delim=' - ')

5894

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5895

5896

5897

class YoutubeFeedsInfoExtractor(InfoExtractor):

5898

"""

5899

Base class for feed extractors

5900

Subclasses must re-define the _FEED_NAME property.

5901

"""

5902

_LOGIN_REQUIRED = True

5903

_FEED_NAME = 'feeds'

5904

5905

def _real_initialize(self):

5906

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5911

5912

def _real_extract(self, url):

5913

return self.url_result(

5914

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5915

5916

5917

class YoutubeWatchLaterIE(InfoExtractor):

5918

IE_NAME = 'youtube:watchlater'

5919

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5920

_VALID_URL = r':ytwatchlater'

5921

_TESTS = [{

5922

'url': ':ytwatchlater',

5923

'only_matching': True,

5924

}]

5925

5926

def _real_extract(self, url):

5927

return self.url_result(

5928

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5929

5930

5931

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5932

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5933

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5934

_FEED_NAME = 'recommended'

5935

_LOGIN_REQUIRED = False

5936

_TESTS = [{

5937

'url': ':ytrec',

5938

'only_matching': True,

5939

}, {

5940

'url': ':ytrecommended',

5941

'only_matching': True,

5942

}, {

5943

'url': 'https://youtube.com',

5944

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5949

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5950

_VALID_URL = r':ytsub(?:scription)?s?'

5951

_FEED_NAME = 'subscriptions'

5952

_TESTS = [{

5953

'url': ':ytsubs',

5954

'only_matching': True,

5955

}, {

5956

'url': ':ytsubscriptions',

5957

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5962

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5963

_VALID_URL = r':ythis(?:tory)?'

5964

_FEED_NAME = 'history'

5965

_TESTS = [{

5966

'url': ':ythistory',

5967

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5972

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5973

IE_NAME = 'youtube:stories'

5974

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5975

_TESTS = [{

5976

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5977

'only_matching': True,

5978

}]

5979

5980

def _real_extract(self, url):

5981

playlist_id = f'RLTD{self._match_id(url)}'

5982

return self.url_result(

5983

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5984

ie=YoutubeTabIE, video_id=playlist_id)

5985

5986

5987

class YoutubeTruncatedURLIE(InfoExtractor):

5988

IE_NAME = 'youtube:truncated_url'

5989

IE_DESC = False # Do not list

5990

_VALID_URL = r'''(?x)

5991

(?:https?://)?

5992

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5993

(?:watch\?(?:

5994

feature=[a-z_]+|

5995

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6008

'only_matching': True,

6009

}, {

6010

'url': 'https://www.youtube.com/watch?',

6011

'only_matching': True,

6012

}, {

6013

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6014

'only_matching': True,

6015

}, {

6016

'url': 'https://www.youtube.com/watch?feature=foo',

6017

'only_matching': True,

6018

}, {

6019

'url': 'https://www.youtube.com/watch?hl=en-GB',

6020

'only_matching': True,

6021

}, {

6022

'url': 'https://www.youtube.com/watch?t=2372',

6023

'only_matching': True,

6024

}]

6025

6026

def _real_extract(self, url):

6027

raise ExtractorError(

6028

'Did you forget to quote the URL? Remember that & is a meta '

6029

'character in most shells, so you want to put the URL in quotes, '

6030

'like youtube-dl '

6031

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6032

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6037

IE_NAME = 'youtube:clip'

6038

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6039

_TESTS = [{

6040

# FIXME: Other metadata should be extracted from the clip, not from the base video

6041

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6042

'info_dict': {

6043

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6044

'ext': 'mp4',

6045

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6050

'categories': ['Gaming'],

6051

'channel': 'Scott The Woz',

6052

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6053

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6054

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6055

'like_count': int,

6056

'playable_in_embed': True,

6057

'tags': 'count:17',

6058

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6059

'title': 'Mobile Games on Console - Scott The Woz',

6060

'upload_date': '20210920',

6061

'uploader': 'Scott The Woz',

6062

'uploader_id': 'scottthewoz',

6063

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6064

'view_count': int,

6065

'live_status': 'not_live',

6066

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6071

clip_id = self._match_id(url)

6072

_, data = self._extract_webpage(url, clip_id)

6073

6074

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6075

if not video_id:

6076

raise ExtractorError('Unable to find video ID')

6077

6078

clip_data = traverse_obj(data, (

6079

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6080

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6081

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6082

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6083

6084

return {

6085

'_type': 'url_transparent',

6086

'url': f'https://www.youtube.com/watch?v={video_id}',

6087

'ie_key': YoutubeIE.ie_key(),

6088

'id': clip_id,

6089

'section_start': int(clip_data['startTimeMs']) / 1000,

6090

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6095

IE_NAME = 'youtube:truncated_id'

6096

IE_DESC = False # Do not list

6097

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6098

6099

_TESTS = [{

6100

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6101

'only_matching': True,

6102

}]

6103

6104

def _real_extract(self, url):

6105

video_id = self._match_id(url)

6106

raise ExtractorError(

6107

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6108

expected=True)