jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16	import urllib.error
	17	import urllib.parse
	18
	19	from .common import InfoExtractor, SearchInfoExtractor
	20	from ..compat import functools
	21	from ..jsinterp import JSInterpreter
	22	from ..utils import (
	23	NO_DEFAULT,
	24	ExtractorError,
	25	UserNotLive,
	26	bug_reports_message,
	27	classproperty,
	28	clean_html,
	29	datetime_from_str,
	30	dict_get,
	31	float_or_none,
	32	format_field,
	33	get_first,
	34	int_or_none,
	35	is_html,
	36	join_nonempty,
	37	js_to_json,
	38	mimetype2ext,
	39	network_exceptions,
	40	orderedSet,
	41	parse_codecs,
	42	parse_count,
	43	parse_duration,
	44	parse_iso8601,
	45	parse_qs,
	46	qualities,
	47	remove_start,
	48	smuggle_url,
	49	str_or_none,
	50	str_to_int,
	51	strftime_or_none,
	52	traverse_obj,
	53	try_get,
	54	unescapeHTML,
	55	unified_strdate,
	56	unified_timestamp,
	57	unsmuggle_url,
	58	update_url_query,
	59	url_or_none,
	60	urljoin,
	61	variadic,
	62	)
	63
	64	# any clients starting with _ cannot be explicitly requested by the user
	65	INNERTUBE_CLIENTS = {
	66	'web': {
	67	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	68	'INNERTUBE_CONTEXT': {
	69	'client': {
	70	'clientName': 'WEB',
	71	'clientVersion': '2.20220801.00.00',
	72	}
	73	},
	74	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	75	},
	76	'web_embedded': {
	77	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	78	'INNERTUBE_CONTEXT': {
	79	'client': {
	80	'clientName': 'WEB_EMBEDDED_PLAYER',
	81	'clientVersion': '1.20220731.00.00',
	82	},
	83	},
	84	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	85	},
	86	'web_music': {
	87	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	88	'INNERTUBE_HOST': 'music.youtube.com',
	89	'INNERTUBE_CONTEXT': {
	90	'client': {
	91	'clientName': 'WEB_REMIX',
	92	'clientVersion': '1.20220727.01.00',
	93	}
	94	},
	95	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	96	},
	97	'web_creator': {
	98	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	99	'INNERTUBE_CONTEXT': {
	100	'client': {
	101	'clientName': 'WEB_CREATOR',
	102	'clientVersion': '1.20220726.00.00',
	103	}
	104	},
	105	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	106	},
	107	'android': {
	108	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	109	'INNERTUBE_CONTEXT': {
	110	'client': {
	111	'clientName': 'ANDROID',
	112	'clientVersion': '17.29.34',
	113	'androidSdkVersion': 30
	114	}
	115	},
	116	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	117	'REQUIRE_JS_PLAYER': False
	118	},
	119	'android_embedded': {
	120	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	121	'INNERTUBE_CONTEXT': {
	122	'client': {
	123	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	124	'clientVersion': '17.29.34',
	125	'androidSdkVersion': 30
	126	},
	127	},
	128	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	129	'REQUIRE_JS_PLAYER': False
	130	},
	131	'android_music': {
	132	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	133	'INNERTUBE_CONTEXT': {
	134	'client': {
	135	'clientName': 'ANDROID_MUSIC',
	136	'clientVersion': '5.16.51',
	137	'androidSdkVersion': 30
	138	}
	139	},
	140	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	141	'REQUIRE_JS_PLAYER': False
	142	},
	143	'android_creator': {
	144	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	145	'INNERTUBE_CONTEXT': {
	146	'client': {
	147	'clientName': 'ANDROID_CREATOR',
	148	'clientVersion': '22.28.100',
	149	'androidSdkVersion': 30
	150	},
	151	},
	152	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	153	'REQUIRE_JS_PLAYER': False
	154	},
	155	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	156	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	157	'ios': {
	158	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	159	'INNERTUBE_CONTEXT': {
	160	'client': {
	161	'clientName': 'IOS',
	162	'clientVersion': '17.30.1',
	163	'deviceModel': 'iPhone14,3',
	164	}
	165	},
	166	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	167	'REQUIRE_JS_PLAYER': False
	168	},
	169	'ios_embedded': {
	170	'INNERTUBE_CONTEXT': {
	171	'client': {
	172	'clientName': 'IOS_MESSAGES_EXTENSION',
	173	'clientVersion': '17.30.1',
	174	'deviceModel': 'iPhone14,3',
	175	},
	176	},
	177	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	178	'REQUIRE_JS_PLAYER': False
	179	},
	180	'ios_music': {
	181	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	182	'INNERTUBE_CONTEXT': {
	183	'client': {
	184	'clientName': 'IOS_MUSIC',
	185	'clientVersion': '5.18',
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_creator': {
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_CREATOR',
	195	'clientVersion': '22.29.101',
	196	},
	197	},
	198	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	199	'REQUIRE_JS_PLAYER': False
	200	},
	201	# mweb has 'ultralow' formats
	202	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	203	'mweb': {
	204	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	205	'INNERTUBE_CONTEXT': {
	206	'client': {
	207	'clientName': 'MWEB',
	208	'clientVersion': '2.20220801.00.00',
	209	}
	210	},
	211	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	212	},
	213	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	214	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	215	'tv_embedded': {
	216	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	217	'INNERTUBE_CONTEXT': {
	218	'client': {
	219	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	220	'clientVersion': '2.0',
	221	},
	222	},
	223	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	224	},
	225	}
	226
	227
	228	def _split_innertube_client(client_name):
	229	variant, *base = client_name.rsplit('.', 1)
	230	if base:
	231	return variant, base[0], variant
	232	base, *variant = client_name.split('_', 1)
	233	return client_name, base, variant[0] if variant else None
	234
	235
	236	def build_innertube_clients():
	237	THIRD_PARTY = {
	238	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	239	}
	240	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	241	priority = qualities(BASE_CLIENTS[::-1])
	242
	243	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	244	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	245	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	246	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	247	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	248
	249	_, base_client, variant = _split_innertube_client(client)
	250	ytcfg['priority'] = 10 * priority(base_client)
	251
	252	if not variant:
	253	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	254	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	255	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	256	embedscreen['priority'] -= 3
	257	elif variant == 'embedded':
	258	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	259	ytcfg['priority'] -= 2
	260	else:
	261	ytcfg['priority'] -= 3
	262
	263
	264	build_innertube_clients()
	265
	266
	267	class YoutubeBaseInfoExtractor(InfoExtractor):
	268	"""Provide base functions for Youtube extractors"""
	269
	270	_RESERVED_NAMES = (
	271	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	272	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	273	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	274	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	275
	276	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	277
	278	# _NETRC_MACHINE = 'youtube'
	279
	280	# If True it will raise an error if no login info is provided
	281	_LOGIN_REQUIRED = False
	282
	283	_INVIDIOUS_SITES = (
	284	# invidious-redirect websites
	285	r'(?:www\.)?redirect\.invidious\.io',
	286	r'(?:(?:www\|dev)\.)?invidio\.us',
	287	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	288	r'(?:www\.)?invidious\.pussthecat\.org',
	289	r'(?:www\.)?invidious\.zee\.li',
	290	r'(?:www\.)?invidious\.ethibox\.fr',
	291	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	292	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	293	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	294	# youtube-dl invidious instances list
	295	r'(?:(?:www\|no)\.)?invidiou\.sh',
	296	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	297	r'(?:www\.)?invidious\.kabi\.tk',
	298	r'(?:www\.)?invidious\.mastodon\.host',
	299	r'(?:www\.)?invidious\.zapashcanon\.fr',
	300	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	301	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	302	r'(?:www\.)?invidious\.himiko\.cloud',
	303	r'(?:www\.)?invidious\.reallyancient\.tech',
	304	r'(?:www\.)?invidious\.tube',
	305	r'(?:www\.)?invidiou\.site',
	306	r'(?:www\.)?invidious\.site',
	307	r'(?:www\.)?invidious\.xyz',
	308	r'(?:www\.)?invidious\.nixnet\.xyz',
	309	r'(?:www\.)?invidious\.048596\.xyz',
	310	r'(?:www\.)?invidious\.drycat\.fr',
	311	r'(?:www\.)?inv\.skyn3t\.in',
	312	r'(?:www\.)?tube\.poal\.co',
	313	r'(?:www\.)?tube\.connect\.cafe',
	314	r'(?:www\.)?vid\.wxzm\.sx',
	315	r'(?:www\.)?vid\.mint\.lgbt',
	316	r'(?:www\.)?vid\.puffyan\.us',
	317	r'(?:www\.)?yewtu\.be',
	318	r'(?:www\.)?yt\.elukerio\.org',
	319	r'(?:www\.)?yt\.lelux\.fi',
	320	r'(?:www\.)?invidious\.ggc-project\.de',
	321	r'(?:www\.)?yt\.maisputain\.ovh',
	322	r'(?:www\.)?ytprivate\.com',
	323	r'(?:www\.)?invidious\.13ad\.de',
	324	r'(?:www\.)?invidious\.toot\.koeln',
	325	r'(?:www\.)?invidious\.fdn\.fr',
	326	r'(?:www\.)?watch\.nettohikari\.com',
	327	r'(?:www\.)?invidious\.namazso\.eu',
	328	r'(?:www\.)?invidious\.silkky\.cloud',
	329	r'(?:www\.)?invidious\.exonip\.de',
	330	r'(?:www\.)?invidious\.riverside\.rocks',
	331	r'(?:www\.)?invidious\.blamefran\.net',
	332	r'(?:www\.)?invidious\.moomoo\.de',
	333	r'(?:www\.)?ytb\.trom\.tf',
	334	r'(?:www\.)?yt\.cyberhost\.uk',
	335	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	336	r'(?:www\.)?qklhadlycap4cnod\.onion',
	337	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	338	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	339	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	340	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	341	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	342	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	343	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	344	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	345	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	346	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	347	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	348	r'(?:www\.)?piped\.kavin\.rocks',
	349	r'(?:www\.)?piped\.silkky\.cloud',
	350	r'(?:www\.)?piped\.tokhmi\.xyz',
	351	r'(?:www\.)?piped\.moomoo\.me',
	352	r'(?:www\.)?il\.ax',
	353	r'(?:www\.)?piped\.syncpundit\.com',
	354	r'(?:www\.)?piped\.mha\.fi',
	355	r'(?:www\.)?piped\.mint\.lgbt',
	356	r'(?:www\.)?piped\.privacy\.com\.de',
	357	)
	358
	359	def _initialize_consent(self):
	360	cookies = self._get_cookies('https://www.youtube.com/')
	361	if cookies.get('__Secure-3PSID'):
	362	return
	363	consent_id = None
	364	consent = cookies.get('CONSENT')
	365	if consent:
	366	if 'YES' in consent.value:
	367	return
	368	consent_id = self._search_regex(
	369	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	370	if not consent_id:
	371	consent_id = random.randint(100, 999)
	372	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	373
	374	def _initialize_pref(self):
	375	cookies = self._get_cookies('https://www.youtube.com/')
	376	pref_cookie = cookies.get('PREF')
	377	pref = {}
	378	if pref_cookie:
	379	try:
	380	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	381	except ValueError:
	382	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	383	pref.update({'hl': 'en', 'tz': 'UTC'})
	384	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	385
	386	def _real_initialize(self):
	387	self._initialize_pref()
	388	self._initialize_consent()
	389	self._check_login_required()
	390
	391	def _check_login_required(self):
	392	if self._LOGIN_REQUIRED and not self._cookies_passed:
	393	self.raise_login_required('Login details are needed to download this content', method='cookies')
	394
	395	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	396	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	397
	398	def _get_default_ytcfg(self, client='web'):
	399	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	400
	401	def _get_innertube_host(self, client='web'):
	402	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	403
	404	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	405	# try_get but with fallback to default ytcfg client values when present
	406	_func = lambda y: try_get(y, getter, expected_type)
	407	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	408
	409	def _extract_client_name(self, ytcfg, default_client='web'):
	410	return self._ytcfg_get_safe(
	411	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	412	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	413
	414	def _extract_client_version(self, ytcfg, default_client='web'):
	415	return self._ytcfg_get_safe(
	416	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	417	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	418
	419	def _select_api_hostname(self, req_api_hostname, default_client=None):
	420	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	421	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	422
	423	def _extract_api_key(self, ytcfg=None, default_client='web'):
	424	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	425
	426	def _extract_context(self, ytcfg=None, default_client='web'):
	427	context = get_first(
	428	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	429	# Enforce language and tz for extraction
	430	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	431	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	432	return context
	433
	434	_SAPISID = None
	435
	436	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	437	time_now = round(time.time())
	438	if self._SAPISID is None:
	439	yt_cookies = self._get_cookies('https://www.youtube.com')
	440	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	441	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	442	sapisid_cookie = dict_get(
	443	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	444	if sapisid_cookie and sapisid_cookie.value:
	445	self._SAPISID = sapisid_cookie.value
	446	self.write_debug('Extracted SAPISID cookie')
	447	# SAPISID cookie is required if not already present
	448	if not yt_cookies.get('SAPISID'):
	449	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	450	self._set_cookie(
	451	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	452	else:
	453	self._SAPISID = False
	454	if not self._SAPISID:
	455	return None
	456	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	457	sapisidhash = hashlib.sha1(
	458	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	459	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	460
	461	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	462	note='Downloading API JSON', errnote='Unable to download API page',
	463	context=None, api_key=None, api_hostname=None, default_client='web'):
	464
	465	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	466	data.update(query)
	467	real_headers = self.generate_api_headers(default_client=default_client)
	468	real_headers.update({'content-type': 'application/json'})
	469	if headers:
	470	real_headers.update(headers)
	471	api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
	472	or api_key or self._extract_api_key(default_client=default_client))
	473	return self._download_json(
	474	f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
	475	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	476	data=json.dumps(data).encode('utf8'), headers=real_headers,
	477	query={'key': api_key, 'prettyPrint': 'false'})
	478
	479	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	480	return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
	481
	482	@staticmethod
	483	def _extract_session_index(*data):
	484	"""
	485	Index of current account in account list.
	486	See: https://github.com/yt-dlp/yt-dlp/pull/519
	487	"""
	488	for ytcfg in data:
	489	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	490	if session_index is not None:
	491	return session_index
	492
	493	# Deprecated?
	494	def _extract_identity_token(self, ytcfg=None, webpage=None):
	495	if ytcfg:
	496	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
	497	if token:
	498	return token
	499	if webpage:
	500	return self._search_regex(

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

20

from ..compat import functools

21

from ..jsinterp import JSInterpreter

22

from ..utils import (

NO_DEFAULT,

ExtractorError,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

65

INNERTUBE_CLIENTS = {

66

'web': {

67

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

68

'INNERTUBE_CONTEXT': {

69

'client': {

70

'clientName': 'WEB',

71

'clientVersion': '2.20220801.00.00',

72

}

73

},

74

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

75

},

76

'web_embedded': {

77

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

78

'INNERTUBE_CONTEXT': {

79

'client': {

80

'clientName': 'WEB_EMBEDDED_PLAYER',

81

'clientVersion': '1.20220731.00.00',

82

},

83

},

84

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

85

},

86

'web_music': {

87

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

88

'INNERTUBE_HOST': 'music.youtube.com',

89

'INNERTUBE_CONTEXT': {

90

'client': {

91

'clientName': 'WEB_REMIX',

92

'clientVersion': '1.20220727.01.00',

93

}

94

},

95

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

96

},

97

'web_creator': {

98

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

99

'INNERTUBE_CONTEXT': {

100

'client': {

101

'clientName': 'WEB_CREATOR',

102

'clientVersion': '1.20220726.00.00',

103

}

104

},

105

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

106

},

107

'android': {

108

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

109

'INNERTUBE_CONTEXT': {

110

'client': {

111

'clientName': 'ANDROID',

112

'clientVersion': '17.29.34',

113

'androidSdkVersion': 30

114

}

115

},

116

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

117

'REQUIRE_JS_PLAYER': False

118

},

119

'android_embedded': {

120

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

121

'INNERTUBE_CONTEXT': {

122

'client': {

123

'clientName': 'ANDROID_EMBEDDED_PLAYER',

124

'clientVersion': '17.29.34',

125

'androidSdkVersion': 30

126

},

127

},

128

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

129

'REQUIRE_JS_PLAYER': False

130

},

131

'android_music': {

132

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

133

'INNERTUBE_CONTEXT': {

134

'client': {

135

'clientName': 'ANDROID_MUSIC',

136

'clientVersion': '5.16.51',

137

'androidSdkVersion': 30

138

}

139

},

140

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

141

'REQUIRE_JS_PLAYER': False

142

},

143

'android_creator': {

144

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

145

'INNERTUBE_CONTEXT': {

146

'client': {

147

'clientName': 'ANDROID_CREATOR',

148

'clientVersion': '22.28.100',

149

'androidSdkVersion': 30

150

},

151

},

152

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

153

'REQUIRE_JS_PLAYER': False

154

},

155

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

156

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

157

'ios': {

158

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

159

'INNERTUBE_CONTEXT': {

160

'client': {

161

'clientName': 'IOS',

162

'clientVersion': '17.30.1',

163

'deviceModel': 'iPhone14,3',

164

}

165

},

166

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

167

'REQUIRE_JS_PLAYER': False

168

},

169

'ios_embedded': {

170

'INNERTUBE_CONTEXT': {

171

'client': {

172

'clientName': 'IOS_MESSAGES_EXTENSION',

173

'clientVersion': '17.30.1',

174

'deviceModel': 'iPhone14,3',

175

},

176

},

177

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

178

'REQUIRE_JS_PLAYER': False

179

},

180

'ios_music': {

181

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

182

'INNERTUBE_CONTEXT': {

183

'client': {

184

'clientName': 'IOS_MUSIC',

185

'clientVersion': '5.18',

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_creator': {

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_CREATOR',

195

'clientVersion': '22.29.101',

196

},

197

},

198

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

199

'REQUIRE_JS_PLAYER': False

200

},

201

# mweb has 'ultralow' formats

202

# See: https://github.com/yt-dlp/yt-dlp/pull/557

203

'mweb': {

204

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

205

'INNERTUBE_CONTEXT': {

206

'client': {

207

'clientName': 'MWEB',

208

'clientVersion': '2.20220801.00.00',

209

}

210

},

211

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

212

},

213

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

214

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

215

'tv_embedded': {

216

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

217

'INNERTUBE_CONTEXT': {

218

'client': {

219

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

220

'clientVersion': '2.0',

221

},

222

},

223

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

229

variant, *base = client_name.rsplit('.', 1)

230

if base:

231

return variant, base[0], variant

232

base, *variant = client_name.split('_', 1)

233

return client_name, base, variant[0] if variant else None

234

235

236

def build_innertube_clients():

237

THIRD_PARTY = {

238

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

239

}

240

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

241

priority = qualities(BASE_CLIENTS[::-1])

242

243

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

244

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

245

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

246

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

247

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

248

249

_, base_client, variant = _split_innertube_client(client)

250

ytcfg['priority'] = 10 * priority(base_client)

251

252

if not variant:

253

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

254

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

255

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

256

embedscreen['priority'] -= 3

257

elif variant == 'embedded':

258

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

259

ytcfg['priority'] -= 2

260

else:

261

ytcfg['priority'] -= 3

262

263

264

build_innertube_clients()

265

266

267

class YoutubeBaseInfoExtractor(InfoExtractor):

268

"""Provide base functions for Youtube extractors"""

269

270

_RESERVED_NAMES = (

271

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

277

278

# _NETRC_MACHINE = 'youtube'

279

280

# If True it will raise an error if no login info is provided

281

_LOGIN_REQUIRED = False

282

283

_INVIDIOUS_SITES = (

284

# invidious-redirect websites

285

r'(?:www\.)?redirect\.invidious\.io',

286

r'(?:(?:www|dev)\.)?invidio\.us',

287

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

288

r'(?:www\.)?invidious\.pussthecat\.org',

289

r'(?:www\.)?invidious\.zee\.li',

290

r'(?:www\.)?invidious\.ethibox\.fr',

291

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

292

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

293

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

294

# youtube-dl invidious instances list

295

r'(?:(?:www|no)\.)?invidiou\.sh',

296

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

297

r'(?:www\.)?invidious\.kabi\.tk',

298

r'(?:www\.)?invidious\.mastodon\.host',

299

r'(?:www\.)?invidious\.zapashcanon\.fr',

300

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

301

r'(?:www\.)?invidious\.tinfoil-hat\.net',

302

r'(?:www\.)?invidious\.himiko\.cloud',

303

r'(?:www\.)?invidious\.reallyancient\.tech',

304

r'(?:www\.)?invidious\.tube',

305

r'(?:www\.)?invidiou\.site',

306

r'(?:www\.)?invidious\.site',

307

r'(?:www\.)?invidious\.xyz',

308

r'(?:www\.)?invidious\.nixnet\.xyz',

309

r'(?:www\.)?invidious\.048596\.xyz',

310

r'(?:www\.)?invidious\.drycat\.fr',

311

r'(?:www\.)?inv\.skyn3t\.in',

312

r'(?:www\.)?tube\.poal\.co',

313

r'(?:www\.)?tube\.connect\.cafe',

314

r'(?:www\.)?vid\.wxzm\.sx',

315

r'(?:www\.)?vid\.mint\.lgbt',

316

r'(?:www\.)?vid\.puffyan\.us',

317

r'(?:www\.)?yewtu\.be',

318

r'(?:www\.)?yt\.elukerio\.org',

319

r'(?:www\.)?yt\.lelux\.fi',

320

r'(?:www\.)?invidious\.ggc-project\.de',

321

r'(?:www\.)?yt\.maisputain\.ovh',

322

r'(?:www\.)?ytprivate\.com',

323

r'(?:www\.)?invidious\.13ad\.de',

324

r'(?:www\.)?invidious\.toot\.koeln',

325

r'(?:www\.)?invidious\.fdn\.fr',

326

r'(?:www\.)?watch\.nettohikari\.com',

327

r'(?:www\.)?invidious\.namazso\.eu',

328

r'(?:www\.)?invidious\.silkky\.cloud',

329

r'(?:www\.)?invidious\.exonip\.de',

330

r'(?:www\.)?invidious\.riverside\.rocks',

331

r'(?:www\.)?invidious\.blamefran\.net',

332

r'(?:www\.)?invidious\.moomoo\.de',

333

r'(?:www\.)?ytb\.trom\.tf',

334

r'(?:www\.)?yt\.cyberhost\.uk',

335

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

336

r'(?:www\.)?qklhadlycap4cnod\.onion',

337

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

338

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

339

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

340

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

341

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

342

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

343

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

344

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

345

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

346

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

347

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

348

r'(?:www\.)?piped\.kavin\.rocks',

349

r'(?:www\.)?piped\.silkky\.cloud',

350

r'(?:www\.)?piped\.tokhmi\.xyz',

351

r'(?:www\.)?piped\.moomoo\.me',

352

r'(?:www\.)?il\.ax',

353

r'(?:www\.)?piped\.syncpundit\.com',

354

r'(?:www\.)?piped\.mha\.fi',

355

r'(?:www\.)?piped\.mint\.lgbt',

356

r'(?:www\.)?piped\.privacy\.com\.de',

357

)

358

359

def _initialize_consent(self):

360

cookies = self._get_cookies('https://www.youtube.com/')

361

if cookies.get('__Secure-3PSID'):

362

return

363

consent_id = None

364

consent = cookies.get('CONSENT')

365

if consent:

366

if 'YES' in consent.value:

367

return

368

consent_id = self._search_regex(

369

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

370

if not consent_id:

371

consent_id = random.randint(100, 999)

372

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

373

374

def _initialize_pref(self):

375

cookies = self._get_cookies('https://www.youtube.com/')

376

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

381

except ValueError:

382

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

383

pref.update({'hl': 'en', 'tz': 'UTC'})

384

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

385

386

def _real_initialize(self):

387

self._initialize_pref()

388

self._initialize_consent()

389

self._check_login_required()

390

391

def _check_login_required(self):

392

if self._LOGIN_REQUIRED and not self._cookies_passed:

393

self.raise_login_required('Login details are needed to download this content', method='cookies')

394

395

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

396

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

397

398

def _get_default_ytcfg(self, client='web'):

399

return copy.deepcopy(INNERTUBE_CLIENTS[client])

400

401

def _get_innertube_host(self, client='web'):

402

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

403

404

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

405

# try_get but with fallback to default ytcfg client values when present

406

_func = lambda y: try_get(y, getter, expected_type)

407

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

408

409

def _extract_client_name(self, ytcfg, default_client='web'):

410

return self._ytcfg_get_safe(

411

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

412

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

413

414

def _extract_client_version(self, ytcfg, default_client='web'):

415

return self._ytcfg_get_safe(

416

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

417

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

418

419

def _select_api_hostname(self, req_api_hostname, default_client=None):

420

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

421

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

422

423

def _extract_api_key(self, ytcfg=None, default_client='web'):

424

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

425

426

def _extract_context(self, ytcfg=None, default_client='web'):

427

context = get_first(

428

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

429

# Enforce language and tz for extraction

430

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

431

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

437

time_now = round(time.time())

438

if self._SAPISID is None:

439

yt_cookies = self._get_cookies('https://www.youtube.com')

440

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

441

# See: https://github.com/yt-dlp/yt-dlp/issues/393

442

sapisid_cookie = dict_get(

443

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

444

if sapisid_cookie and sapisid_cookie.value:

445

self._SAPISID = sapisid_cookie.value

446

self.write_debug('Extracted SAPISID cookie')

447

# SAPISID cookie is required if not already present

448

if not yt_cookies.get('SAPISID'):

449

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

450

self._set_cookie(

451

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

452

else:

453

self._SAPISID = False

454

if not self._SAPISID:

455

return None

456

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

457

sapisidhash = hashlib.sha1(

458

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

459

return f'SAPISIDHASH {time_now}_{sapisidhash}'

460

461

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

462

note='Downloading API JSON', errnote='Unable to download API page',

463

context=None, api_key=None, api_hostname=None, default_client='web'):

464

465

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

466

data.update(query)

467

real_headers = self.generate_api_headers(default_client=default_client)

468

real_headers.update({'content-type': 'application/json'})

469

if headers:

470

real_headers.update(headers)

471

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

472

or api_key or self._extract_api_key(default_client=default_client))

473

return self._download_json(

474

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

475

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

476

data=json.dumps(data).encode('utf8'), headers=real_headers,

477

query={'key': api_key, 'prettyPrint': 'false'})

478

479

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

480

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

481

482

@staticmethod

483

def _extract_session_index(*data):

484

"""

485

Index of current account in account list.

486

See: https://github.com/yt-dlp/yt-dlp/pull/519

487

"""

488

for ytcfg in data:

489

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

490

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

495

if ytcfg:

496

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

501

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

502

'identity token', default=None, fatal=False)

503

504

@staticmethod

505

def _extract_account_syncid(*args):

506

"""

507

Extract syncId required to download private playlists of secondary channels

508

@params response and/or ytcfg

509

"""

510

for data in args:

511

# ytcfg includes channel_syncid if on secondary channel

512

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

517

lambda x: x['DATASYNC_ID']), str) or '').split('||')

518

if len(sync_ids) >= 2 and sync_ids[1]:

519

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

520

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

525

"""

526

Extracts visitorData from an API response or ytcfg

527

Appears to be used to track session state

528

"""

529

return get_first(

530

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

531

expected_type=str)

532

533

@functools.cached_property

534

def is_authenticated(self):

535

return bool(self._generate_sapisidhash_header())

536

537

def extract_ytcfg(self, video_id, webpage):

538

if not webpage:

539

return {}

540

return self._parse_json(

541

self._search_regex(

542

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

543

default='{}'), video_id, fatal=False) or {}

544

545

def generate_api_headers(

546

self, *, ytcfg=None, account_syncid=None, session_index=None,

547

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

548

549

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

550

headers = {

551

'X-YouTube-Client-Name': str(

552

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

553

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

554

'Origin': origin,

555

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

556

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

557

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

558

}

559

if session_index is None:

560

session_index = self._extract_session_index(ytcfg)

561

if account_syncid or session_index is not None:

562

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

563

564

auth = self._generate_sapisidhash_header(origin)

565

if auth is not None:

566

headers['Authorization'] = auth

567

headers['X-Origin'] = origin

568

return {h: v for h, v in headers.items() if v is not None}

569

570

def _download_ytcfg(self, client, video_id):

571

url = {

572

'web': 'https://www.youtube.com',

573

'web_music': 'https://music.youtube.com',

574

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

579

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

580

return self.extract_ytcfg(video_id, webpage) or {}

581

582

@staticmethod

583

def _build_api_continuation_query(continuation, ctp=None):

584

query = {

585

'continuation': continuation

586

}

587

# TODO: Inconsistency with clickTrackingParams.

588

# Currently we have a fixed ctp contained within context (from ytcfg)

589

# and a ctp in root query for continuation.

590

if ctp:

591

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

596

next_continuation = try_get(

597

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

598

lambda x: x['continuation']['reloadContinuationData']), dict)

599

if not next_continuation:

600

return

601

continuation = next_continuation.get('continuation')

602

if not continuation:

603

return

604

ctp = next_continuation.get('clickTrackingParams')

605

return cls._build_api_continuation_query(continuation, ctp)

606

607

@classmethod

608

def _extract_continuation_ep_data(cls, continuation_ep: dict):

609

if isinstance(continuation_ep, dict):

610

continuation = try_get(

611

continuation_ep, lambda x: x['continuationCommand']['token'], str)

612

if not continuation:

613

return

614

ctp = continuation_ep.get('clickTrackingParams')

615

return cls._build_api_continuation_query(continuation, ctp)

616

617

@classmethod

618

def _extract_continuation(cls, renderer):

619

next_continuation = cls._extract_next_continuation_data(renderer)

620

if next_continuation:

621

return next_continuation

622

623

contents = []

624

for key in ('contents', 'items'):

625

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

626

627

for content in contents:

628

if not isinstance(content, dict):

629

continue

630

continuation_ep = try_get(

631

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

632

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

633

dict)

634

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

640

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

641

if not isinstance(alert_dict, dict):

642

continue

643

for alert in alert_dict.values():

644

alert_type = alert.get('type')

645

if not alert_type:

646

continue

647

message = cls._get_text(alert, 'text')

648

if message:

649

yield alert_type, message

650

651

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

652

errors = []

653

warnings = []

654

for alert_type, alert_message in alerts:

655

if alert_type.lower() == 'error' and fatal:

656

errors.append([alert_type, alert_message])

657

else:

658

warnings.append([alert_type, alert_message])

659

660

for alert_type, alert_message in (warnings + errors[:-1]):

661

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

662

if errors:

663

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

664

665

def _extract_and_report_alerts(self, data, *args, **kwargs):

666

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

667

668

def _extract_badges(self, renderer: dict):

669

badges = set()

670

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

671

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)

672

if label:

673

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

678

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

683

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

684

obj = [obj]

685

for item in obj:

686

text = try_get(item, lambda x: x['simpleText'], str)

687

if text:

688

return text

689

runs = try_get(item, lambda x: x['runs'], list) or []

690

if not runs and isinstance(item, list):

691

runs = item

692

693

runs = runs[:min(len(runs), max_runs or len(runs))]

694

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

699

count_text = self._get_text(data, *path_list) or ''

700

count = parse_count(count_text)

701

if count is None:

702

count = str_to_int(

703

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

708

"""

709

Extract thumbnails from thumbnails dict

710

@param path_list: path list to level that contains 'thumbnails' key

711

"""

712

thumbnails = []

713

for path in path_list or [()]:

714

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

715

thumbnail_url = url_or_none(thumbnail.get('url'))

716

if not thumbnail_url:

717

continue

718

# Sometimes youtube gives a wrong thumbnail URL. See:

719

# https://github.com/yt-dlp/yt-dlp/issues/233

720

# https://github.com/ytdl-org/youtube-dl/issues/28023

721

if 'maxresdefault' in thumbnail_url:

722

thumbnail_url = thumbnail_url.split('?')[0]

723

thumbnails.append({

724

'url': thumbnail_url,

725

'height': int_or_none(thumbnail.get('height')),

726

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

732

"""

733

Extracts a relative time from string and converts to dt object

734

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

739

if start:

740

return datetime_from_str(start)

741

try:

742

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

747

"""@returns (timestamp, time_text)"""

748

text = self._get_text(renderer, *path_list) or ''

749

dt = self.extract_relative_time(text)

750

timestamp = None

751

if isinstance(dt, datetime.datetime):

752

timestamp = calendar.timegm(dt.timetuple())

753

754

if timestamp is None:

755

timestamp = (

756

unified_timestamp(text) or unified_timestamp(

757

self._search_regex(

758

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

759

text.lower(), 'time text', default=None)))

760

761

if text and timestamp is None:

762

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

763

return timestamp, text

764

765

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

766

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

767

default_client='web'):

768

for retry in self.RetryManager():

769

try:

770

response = self._call_api(

771

ep=ep, fatal=True, headers=headers,

772

video_id=item_id, query=query, note=note,

773

context=self._extract_context(ytcfg, default_client),

774

api_key=self._extract_api_key(ytcfg, default_client),

775

api_hostname=api_hostname, default_client=default_client)

776

except ExtractorError as e:

777

if not isinstance(e.cause, network_exceptions):

778

return self._error_or_warning(e, fatal=fatal)

779

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

784

if not is_html(first_bytes):

785

yt_error = try_get(

786

self._parse_json(

787

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

788

lambda x: x['error']['message'], str)

789

if yt_error:

790

self._report_alerts([('ERROR', yt_error)], fatal=False)

791

# Downloading page may result in intermittent 5xx HTTP error

792

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

793

# We also want to catch all other network exceptions since errors in later pages can be troublesome

794

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

795

if e.cause.code not in (403, 429):

796

retry.error = e

797

continue

798

return self._error_or_warning(e, fatal=fatal)

799

800

try:

801

self._extract_and_report_alerts(response, only_once=True)

802

except ExtractorError as e:

803

# YouTube servers may return errors we want to retry on in a 200 OK response

804

# See: https://github.com/yt-dlp/yt-dlp/issues/839

805

if 'unknown error' in e.msg.lower():

806

retry.error = e

807

continue

808

return self._error_or_warning(e, fatal=fatal)

809

# Youtube sometimes sends incomplete data

810

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

811

if not traverse_obj(response, *variadic(check_get_keys)):

812

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

819

return re.match(r'https?://music\.youtube\.com/', url) is not None

820

821

def _extract_video(self, renderer):

822

video_id = renderer.get('videoId')

823

title = self._get_text(renderer, 'title')

824

description = self._get_text(renderer, 'descriptionSnippet')

825

duration = parse_duration(self._get_text(

826

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

827

if duration is None:

828

duration = parse_duration(self._search_regex(

829

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

830

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

831

video_id, default=None, group='duration'))

832

833

view_count = self._get_count(renderer, 'viewCountText')

834

835

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

836

channel_id = traverse_obj(

837

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

838

expected_type=str, get_all=False)

839

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

840

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

841

overlay_style = traverse_obj(

842

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

843

get_all=False, expected_type=str)

844

badges = self._extract_badges(renderer)

845

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

846

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

847

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

848

expected_type=str)) or ''

849

url = f'https://www.youtube.com/watch?v={video_id}'

850

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

851

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

860

'duration': duration,

861

'view_count': view_count,

862

'uploader': uploader,

863

'channel_id': channel_id,

864

'thumbnails': thumbnails,

865

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

866

if self._configuration_arg('approximate_date', ie_key='youtubetab')

867

else None),

868

'live_status': ('is_upcoming' if scheduled_timestamp is not None

869

else 'was_live' if 'streamed' in time_text.lower()

870

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

871

else None),

872

'release_timestamp': scheduled_timestamp,

873

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

878

IE_DESC = 'YouTube'

879

_VALID_URL = r"""(?x)^

880

(

881

(?:https?://|//) # http(s):// or protocol-independent URL

882

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

883

(?:www\.)?deturl\.com/www\.youtube\.com|

884

(?:www\.)?pwnyoutube\.com|

885

(?:www\.)?hooktube\.com|

886

(?:www\.)?yourepeat\.com|

887

tube\.majestyc\.net|

888

%(invidious)s|

889

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

890

(?:.*?\#/)? # handle anchor (#/) redirect urls

891

(?: # the various things that can precede the ID:

892

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

893

|(?: # or the v= param in all its forms

894

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

895

(?:\?|\#!?) # the params delimiter ? or # or #!

896

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

902

vid\.plus| # or vid.plus/xxxx

903

zwearz\.com/watch| # or zwearz.com/watch/xxxx

904

%(invidious)s

905

)/

906

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

907

)

908

)? # all until now is optional -> you can pass the naked ID

909

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

910

(?(1).+)? # if we found the ID, everything can follow

911

(?:\#|$)""" % {

912

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

913

}

914

_EMBED_REGEX = [r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

925

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

926

\1''']

927

_PLAYER_INFO_RE = (

928

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

929

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

930

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

931

)

932

_formats = {

933

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

934

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

935

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

936

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

937

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

938

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

939

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

940

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

941

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

942

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

943

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

944

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

945

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

946

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

947

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

948

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

949

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

950

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

955

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

956

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

957

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

958

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

959

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

960

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

961

962

# Apple HTTP Live Streaming

963

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

964

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

965

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

966

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

967

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

968

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

969

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

970

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

971

972

# DASH mp4 video

973

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

974

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

975

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

976

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

977

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

978

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

979

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

980

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

981

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

982

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

983

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

984

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

986

# Dash mp4 audio

987

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

988

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

989

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

990

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

991

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

992

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

993

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

994

995

# Dash webm

996

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

997

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

998

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

999

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1000

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1001

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1002

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1003

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1004

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1005

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1006

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1007

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1008

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1009

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1010

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1011

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1012

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1013

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1014

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1015

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1016

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1018

1019

# Dash webm audio

1020

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1021

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1022

1023

# Dash webm audio with opus inside

1024

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1025

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1026

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1027

1028

# RTMP (unnamed)

1029

'_rtmp': {'protocol': 'rtmp'},

1030

1031

# av01 video only formats sometimes served with "unknown" codecs

1032

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1033

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1034

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1035

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1036

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1037

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1038

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1039

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1040

}

1041

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1053

'uploader': 'Philipp Hagemeister',

1054

'uploader_id': 'phihag',

1055

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1056

'channel': 'Philipp Hagemeister',

1057

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1058

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1059

'upload_date': '20121002',

1060

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1061

'categories': ['Science & Technology'],

1062

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1067

'playable_in_embed': True,

1068

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1069

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1074

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1079

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1084

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1085

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1086

'uploader': 'SET India',

1087

'uploader_id': 'setindia',

1088

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1089

'age_limit': 18,

1090

},

1091

'skip': 'Private video',

1092

},

1093

{

1094

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1095

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1100

'uploader': 'Philipp Hagemeister',

1101

'uploader_id': 'phihag',

1102

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1103

'channel': 'Philipp Hagemeister',

1104

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1105

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1106

'upload_date': '20121002',

1107

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1108

'categories': ['Science & Technology'],

1109

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1114

'playable_in_embed': True,

1115

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1116

'live_status': 'not_live',

1117

'age_limit': 0,

1118

'comment_count': int,

1119

'channel_follower_count': int

1120

},

1121

'params': {

1122

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1127

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1132

'uploader_id': '8KVIDEO',

1133

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1134

'description': '',

1135

'uploader': '8KVIDEO',

1136

'title': 'UHDTV TEST 8K VIDEO.mp4'

1137

},

1138

'params': {

1139

'youtube_include_dash_manifest': True,

1140

'format': '141',

1141

},

1142

'skip': 'format 141 not served anymore',

1143

},

1144

# DASH manifest with encrypted signature

1145

{

1146

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1151

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1152

'duration': 244,

1153

'uploader': 'AfrojackVEVO',

1154

'uploader_id': 'AfrojackVEVO',

1155

'upload_date': '20131011',

1156

'abr': 129.495,

1157

'like_count': int,

1158

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1159

'playable_in_embed': True,

1160

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1161

'view_count': int,

1162

'track': 'The Spark',

1163

'live_status': 'not_live',

1164

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1165

'channel': 'Afrojack',

1166

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1167

'tags': 'count:19',

1168

'availability': 'public',

1169

'categories': ['Music'],

1170

'age_limit': 0,

1171

'alt_title': 'The Spark',

1172

'channel_follower_count': int

1173

},

1174

'params': {

1175

'youtube_include_dash_manifest': True,

1176

'format': '141/bestaudio[ext=m4a]',

1177

},

1178

},

1179

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1180

{

1181

'note': 'Embed allowed age-gate video',

1182

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1187

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1188

'duration': 142,

1189

'uploader': 'The Witcher',

1190

'uploader_id': 'WitcherGame',

1191

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1192

'upload_date': '20140605',

1193

'age_limit': 18,

1194

'categories': ['Gaming'],

1195

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1196

'availability': 'needs_auth',

1197

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1198

'like_count': int,

1199

'channel': 'The Witcher',

1200

'live_status': 'not_live',

1201

'tags': 'count:17',

1202

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1203

'playable_in_embed': True,

1204

'view_count': int,

1205

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1210

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1215

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1216

'upload_date': '20200408',

1217

'uploader_id': 'FlyingKitty900',

1218

'uploader': 'FlyingKitty',

1219

'age_limit': 18,

1220

'availability': 'needs_auth',

1221

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1222

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1223

'channel': 'FlyingKitty',

1224

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1225

'view_count': int,

1226

'categories': ['Entertainment'],

1227

'live_status': 'not_live',

1228

'tags': ['Flyingkitty', 'godzilla 2'],

1229

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1230

'like_count': int,

1231

'duration': 177,

1232

'playable_in_embed': True,

1233

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1238

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1239

'info_dict': {

1240

'id': 'Tq92D6wQ1mg',

1241

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1242

'ext': 'mp4',

1243

'upload_date': '20191228',

1244

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1245

'uploader': 'Projekt Melody',

1246

'description': 'md5:17eccca93a786d51bc67646756894066',

1247

'age_limit': 18,

1248

'like_count': int,

1249

'availability': 'needs_auth',

1250

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1251

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1252

'view_count': int,

1253

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1254

'channel': 'Projekt Melody',

1255

'live_status': 'not_live',

1256

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1257

'playable_in_embed': True,

1258

'categories': ['Entertainment'],

1259

'duration': 106,

1260

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1261

'comment_count': int,

1262

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1267

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1272

'uploader': 'Herr Lurik',

1273

'uploader_id': 'st3in234',

1274

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1275

'upload_date': '20130730',

1276

'track': 'Such mich find mich',

1277

'age_limit': 0,

1278

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1279

'like_count': int,

1280

'playable_in_embed': False,

1281

'creator': 'OOMPH!',

1282

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1283

'view_count': int,

1284

'alt_title': 'Such mich find mich',

1285

'duration': 210,

1286

'channel': 'Herr Lurik',

1287

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1288

'categories': ['Music'],

1289

'availability': 'public',

1290

'uploader_url': 'http://www.youtube.com/user/st3in234',

1291

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1292

'live_status': 'not_live',

1293

'artist': 'OOMPH!',

1294

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1299

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1300

'only_matching': True,

1301

},

1302

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1303

# YouTube Red ad is not captured for creator

1304

{

1305

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1311

'uploader_id': 'deadmau5',

1312

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1313

'creator': 'deadmau5',

1314

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1315

'uploader': 'deadmau5',

1316

'title': 'Deadmau5 - Some Chords (HD)',

1317

'alt_title': 'Some Chords',

1318

'availability': 'public',

1319

'tags': 'count:14',

1320

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1321

'view_count': int,

1322

'live_status': 'not_live',

1323

'channel': 'deadmau5',

1324

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1325

'like_count': int,

1326

'track': 'Some Chords',

1327

'artist': 'deadmau5',

1328

'playable_in_embed': True,

1329

'age_limit': 0,

1330

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1331

'categories': ['Music'],

1332

'album': 'Some Chords',

1333

'channel_follower_count': int

1334

},

1335

'expected_warnings': [

1336

'DASH manifest missing',

1337

]

1338

},

1339

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1340

{

1341

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1347

'uploader_id': 'olympic',

1348

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1349

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1350

'uploader': 'Olympics',

1351

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1352

'like_count': int,

1353

'release_timestamp': 1343767800,

1354

'playable_in_embed': True,

1355

'categories': ['Sports'],

1356

'release_date': '20120731',

1357

'channel': 'Olympics',

1358

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1359

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1360

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1361

'age_limit': 0,

1362

'availability': 'public',

1363

'live_status': 'was_live',

1364

'view_count': int,

1365

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1366

'channel_follower_count': int

1367

},

1368

'params': {

1369

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1379

'duration': 85,

1380

'upload_date': '20110310',

1381

'uploader_id': 'AllenMeow',

1382

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1383

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1384

'uploader': '孫ᄋᄅ',

1385

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1386

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1391

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1392

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1393

'view_count': int,

1394

'categories': ['People & Blogs'],

1395

'like_count': int,

1396

'live_status': 'not_live',

1397

'availability': 'unlisted',

1398

'comment_count': int,

1399

'channel_follower_count': int

1400

},

1401

},

1402

# url_encoded_fmt_stream_map is empty string

1403

{

1404

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1409

'description': '',

1410

'upload_date': '20150404',

1411

'uploader_id': 'spbelect',

1412

'uploader': 'Наблюдатели Петербурга',

1413

},

1414

'params': {

1415

'skip_download': 'requires avconv',

1416

},

1417

'skip': 'This live event has ended.',

1418

},

1419

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1420

{

1421

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1426

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1427

'duration': 220,

1428

'upload_date': '20150625',

1429

'uploader_id': 'dorappi2000',

1430

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1431

'uploader': 'dorappi2000',

1432

'formats': 'mincount:31',

1433

},

1434

'skip': 'not actual anymore',

1435

},

1436

# DASH manifest with segment_list

1437

{

1438

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1439

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1444

'uploader': 'Airtek',

1445

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1446

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1447

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1448

},

1449

'params': {

1450

'youtube_include_dash_manifest': True,

1451

'format': '135', # bestvideo

1452

},

1453

'skip': 'This live event has ended.',

1454

},

1455

{

1456

# Multifeed videos (multiple cameras), URL is for Main Camera

1457

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1458

'info_dict': {

1459

'id': 'jvGDaLqkpTg',

1460

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1461

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1468

'description': 'md5:e03b909557865076822aa169218d6a5d',

1469

'duration': 10643,

1470

'upload_date': '20161111',

1471

'uploader': 'Team PGP',

1472

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1473

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1480

'description': 'md5:e03b909557865076822aa169218d6a5d',

1481

'duration': 10991,

1482

'upload_date': '20161111',

1483

'uploader': 'Team PGP',

1484

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1485

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1492

'description': 'md5:e03b909557865076822aa169218d6a5d',

1493

'duration': 10995,

1494

'upload_date': '20161111',

1495

'uploader': 'Team PGP',

1496

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1497

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1504

'description': 'md5:e03b909557865076822aa169218d6a5d',

1505

'duration': 10990,

1506

'upload_date': '20161111',

1507

'uploader': 'Team PGP',

1508

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1509

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1514

},

1515

'skip': 'Not multifeed anymore',

1516

},

1517

{

1518

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1519

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1520

'info_dict': {

1521

'id': 'gVfLd0zydlo',

1522

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1523

},

1524

'playlist_count': 2,

1525

'skip': 'Not multifeed anymore',

1526

},

1527

{

1528

'url': 'https://vid.plus/FlRa-iH7PGw',

1529

'only_matching': True,

1530

},

1531

{

1532

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1533

'only_matching': True,

1534

},

1535

{

1536

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1537

# Also tests cut-off URL expansion in video description (see

1538

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1539

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1540

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1545

'alt_title': 'Dark Walk',

1546

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1547

'duration': 133,

1548

'upload_date': '20151119',

1549

'uploader_id': 'IronSoulElf',

1550

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1551

'uploader': 'IronSoulElf',

1552

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1553

'track': 'Dark Walk',

1554

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1555

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1556

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1557

'categories': ['Film & Animation'],

1558

'view_count': int,

1559

'live_status': 'not_live',

1560

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1561

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1562

'tags': 'count:13',

1563

'availability': 'public',

1564

'channel': 'IronSoulElf',

1565

'playable_in_embed': True,

1566

'like_count': int,

1567

'age_limit': 0,

1568

'channel_follower_count': int

1569

},

1570

'params': {

1571

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1576

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1577

'only_matching': True,

1578

},

1579

{

1580

# Video with yt:stretch=17:0

1581

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1586

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1587

'upload_date': '20151107',

1588

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1589

'uploader': 'CH GAMER DROID',

1590

},

1591

'params': {

1592

'skip_download': True,

1593

},

1594

'skip': 'This video does not exist.',

1595

},

1596

{

1597

# Video with incomplete 'yt:stretch=16:'

1598

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1599

'only_matching': True,

1600

},

1601

{

1602

# Video licensed under Creative Commons

1603

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1608

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1609

'duration': 721,

1610

'upload_date': '20150128',

1611

'uploader_id': 'BerkmanCenter',

1612

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1613

'uploader': 'The Berkman Klein Center for Internet & Society',

1614

'license': 'Creative Commons Attribution license (reuse allowed)',

1615

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1616

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1617

'like_count': int,

1618

'age_limit': 0,

1619

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1620

'channel': 'The Berkman Klein Center for Internet & Society',

1621

'availability': 'public',

1622

'view_count': int,

1623

'categories': ['Education'],

1624

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1625

'live_status': 'not_live',

1626

'playable_in_embed': True,

1627

'comment_count': int,

1628

'channel_follower_count': int

1629

},

1630

'params': {

1631

'skip_download': True,

},

},

{

# Channel-like uploader_url

1636

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1641

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1642

'duration': 4060,

1643

'upload_date': '20151120',

1644

'uploader': 'Bernie Sanders',

1645

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1646

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1647

'license': 'Creative Commons Attribution license (reuse allowed)',

1648

'playable_in_embed': True,

1649

'tags': 'count:12',

1650

'like_count': int,

1651

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1652

'age_limit': 0,

1653

'availability': 'public',

1654

'categories': ['News & Politics'],

1655

'channel': 'Bernie Sanders',

1656

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1657

'view_count': int,

1658

'live_status': 'not_live',

1659

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1660

'comment_count': int,

1661

'channel_follower_count': int

1662

},

1663

'params': {

1664

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1669

'only_matching': True,

1670

},

1671

{

1672

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1673

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1674

'only_matching': True,

1675

},

1676

{

1677

# Rental video preview

1678

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1683

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1684

'upload_date': '20150811',

1685

'uploader': 'FlixMatrix',

1686

'uploader_id': 'FlixMatrixKaravan',

1687

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1688

'license': 'Standard YouTube License',

1689

},

1690

'params': {

1691

'skip_download': True,

1692

},

1693

'skip': 'This video is not available.',

1694

},

1695

{

1696

# YouTube Red video with episode data

1697

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1702

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1703

'duration': 2085,

1704

'upload_date': '20170118',

1705

'uploader': 'Vsauce',

1706

'uploader_id': 'Vsauce',

1707

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1708

'series': 'Mind Field',

1709

'season_number': 1,

1710

'episode_number': 1,

1711

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1712

'tags': 'count:12',

1713

'view_count': int,

1714

'availability': 'public',

1715

'age_limit': 0,

1716

'channel': 'Vsauce',

1717

'episode': 'Episode 1',

1718

'categories': ['Entertainment'],

1719

'season': 'Season 1',

1720

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1721

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1722

'like_count': int,

1723

'playable_in_embed': True,

1724

'live_status': 'not_live',

1725

'channel_follower_count': int

1726

},

1727

'params': {

1728

'skip_download': True,

1729

},

1730

'expected_warnings': [

1731

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1736

# as inappropriate or offensive to some audiences.

1737

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1742

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1743

'duration': 965,

1744

'upload_date': '20140124',

1745

'uploader': 'New Century Foundation',

1746

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1747

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1748

},

1749

'params': {

1750

'skip_download': True,

1751

},

1752

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1757

'only_matching': True,

1758

},

1759

{

1760

# geo restricted to JP

1761

'url': 'sJL6WA-aGkQ',

1762

'only_matching': True,

1763

},

1764

{

1765

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1766

'only_matching': True,

1767

},

1768

{

1769

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1770

'only_matching': True,

1771

},

1772

{

1773

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1774

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1775

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1780

'only_matching': True,

1781

},

1782

{

1783

# Video with unsupported adaptive stream type formats

1784

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1789

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1790

'duration': 433,

1791

'upload_date': '20130923',

1792

'uploader': 'Amelia Putri Harwita',

1793

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1794

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1795

'formats': 'maxcount:10',

1796

},

1797

'params': {

1798

'skip_download': True,

1799

'youtube_include_dash_manifest': False,

1800

},

1801

'skip': 'not actual anymore',

1802

},

1803

{

1804

# Youtube Music Auto-generated description

1805

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1810

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1811

'upload_date': '20190312',

1812

'uploader': 'Stephen - Topic',

1813

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1814

'artist': 'Stephen',

1815

'track': 'Voyeur Girl',

1816

'album': 'it\'s too much love to know my dear',

1817

'release_date': '20190313',

1818

'release_year': 2019,

1819

'alt_title': 'Voyeur Girl',

1820

'view_count': int,

1821

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1822

'playable_in_embed': True,

1823

'like_count': int,

1824

'categories': ['Music'],

1825

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1826

'channel': 'Stephen',

1827

'availability': 'public',

1828

'creator': 'Stephen',

1829

'duration': 169,

1830

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1831

'age_limit': 0,

1832

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1833

'tags': 'count:11',

1834

'live_status': 'not_live',

1835

'channel_follower_count': int

1836

},

1837

'params': {

1838

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1843

'only_matching': True,

1844

},

1845

{

1846

# invalid -> valid video id redirection

1847

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1852

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1853

'upload_date': '20090125',

1854

'uploader': 'Prochorowka',

1855

'uploader_id': 'Prochorowka',

1856

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1857

'artist': 'Panjabi MC',

1858

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1859

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1860

},

1861

'params': {

1862

'skip_download': True,

1863

},

1864

'skip': 'Video unavailable',

1865

},

1866

{

1867

# empty description results in an empty string

1868

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1875

'uploader_id': 'ElevageOrVert',

1876

'uploader': 'ElevageOrVert',

1877

'view_count': int,

1878

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1879

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1880

'like_count': int,

1881

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1882

'tags': [],

1883

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1884

'availability': 'public',

1885

'age_limit': 0,

1886

'categories': ['Pets & Animals'],

1887

'duration': 7,

1888

'playable_in_embed': True,

1889

'live_status': 'not_live',

1890

'channel': 'ElevageOrVert',

1891

'channel_follower_count': int

1892

},

1893

'params': {

1894

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1899

# see [2] for an example with '};' inside ytInitialPlayerResponse

1900

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1901

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1902

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1907

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1908

'upload_date': '20130831',

1909

'uploader_id': 'kudvenkat',

1910

'uploader': 'kudvenkat',

1911

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1912

'like_count': int,

1913

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1914

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1915

'live_status': 'not_live',

1916

'categories': ['Education'],

1917

'availability': 'public',

1918

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1919

'tags': 'count:12',

1920

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1925

'comment_count': int,

1926

'channel_follower_count': int

1927

},

1928

'params': {

1929

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1934

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1935

'only_matching': True,

1936

},

1937

{

1938

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1939

'only_matching': True,

1940

},

1941

{

1942

# https://github.com/ytdl-org/youtube-dl/pull/28094

1943

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1949

'upload_date': '20141120',

1950

'uploader': 'The Cinematic Orchestra - Topic',

1951

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1952

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1953

'artist': 'The Cinematic Orchestra',

1954

'track': 'Burn Out',

1955

'album': 'Every Day',

1956

'like_count': int,

1957

'live_status': 'not_live',

1958

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1963

'creator': 'The Cinematic Orchestra',

1964

'channel': 'The Cinematic Orchestra',

1965

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1966

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1967

'availability': 'public',

1968

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1969

'categories': ['Music'],

1970

'playable_in_embed': True,

1971

'channel_follower_count': int

1972

},

1973

'params': {

1974

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1979

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1980

'only_matching': True,

1981

},

1982

{

1983

# controversial video, requires bpctr/contentCheckOk

1984

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1989

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1990

'uploader': 'CBS Mornings',

1991

'uploader_id': 'CBSThisMorning',

1992

'upload_date': '20140716',

1993

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1994

'duration': 170,

1995

'categories': ['News & Politics'],

1996

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1997

'view_count': int,

1998

'channel': 'CBS Mornings',

1999

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2000

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2001

'age_limit': 18,

2002

'availability': 'needs_auth',

2003

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2004

'like_count': int,

2005

'live_status': 'not_live',

2006

'playable_in_embed': True,

2007

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2012

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2017

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2018

'upload_date': '20201120',

2019

'uploader': 'Walk around Japan',

2020

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2021

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2022

'duration': 1456,

2023

'categories': ['Travel & Events'],

2024

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2025

'view_count': int,

2026

'channel': 'Walk around Japan',

2027

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2028

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2029

'age_limit': 0,

2030

'availability': 'public',

2031

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2032

'live_status': 'not_live',

2033

'playable_in_embed': True,

2034

'channel_follower_count': int

2035

},

2036

'params': {

2037

'skip_download': True,

2038

},

2039

}, {

2040

# Has multiple audio streams

2041

'url': 'WaOKSUlf4TM',

2042

'only_matching': True

2043

}, {

2044

# Requires Premium: has format 141 when requested using YTM url

2045

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2046

'only_matching': True

2047

}, {

2048

# multiple subtitles with same lang_code

2049

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2050

'only_matching': True,

2051

}, {

2052

# Force use android client fallback

2053

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2054

'info_dict': {

2055

'id': 'YOelRv7fMxY',

2056

'title': 'DIGGING A SECRET TUNNEL Part 1',

2057

'ext': '3gp',

2058

'upload_date': '20210624',

2059

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2060

'uploader': 'colinfurze',

2061

'uploader_id': 'colinfurze',

2062

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2063

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2064

'duration': 596,

2065

'categories': ['Entertainment'],

2066

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2067

'view_count': int,

2068

'channel': 'colinfurze',

2069

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2070

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2071

'age_limit': 0,

2072

'availability': 'public',

2073

'like_count': int,

2074

'live_status': 'not_live',

2075

'playable_in_embed': True,

2076

'channel_follower_count': int

2077

},

2078

'params': {

2079

'format': '17', # 3gp format available on android

2080

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2085

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2086

'only_matching': True,

2087

'params': {

2088

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2093

'only_matching': True,

2094

}, {

2095

'note': 'Storyboards',

2096

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2102

'uploader_id': 'scishow',

2103

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2104

'upload_date': '20140324',

2105

'uploader': 'SciShow',

2106

'like_count': int,

2107

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2108

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2109

'view_count': int,

2110

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2111

'playable_in_embed': True,

2112

'tags': 'count:12',

2113

'uploader_url': 'http://www.youtube.com/user/scishow',

2114

'availability': 'public',

2115

'channel': 'SciShow',

2116

'live_status': 'not_live',

2117

'duration': 248,

2118

'categories': ['Education'],

2119

'age_limit': 0,

2120

'channel_follower_count': int

2121

}, 'params': {'format': 'mhtml', 'skip_download': True}

2122

}, {

2123

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2124

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2129

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2130

'uploader': 'Leon Nguyen',

2131

'uploader_id': 'VNSXIII',

2132

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2133

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2134

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2139

'tags': 'count:23',

2140

'playable_in_embed': True,

2141

'live_status': 'not_live',

2142

'upload_date': '20220103',

2143

'like_count': int,

2144

'availability': 'public',

2145

'channel': 'Leon Nguyen',

2146

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2147

'comment_count': int,

2148

'channel_follower_count': int

2149

}

2150

}, {

2151

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2152

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2157

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2158

'uploader': 'Quackity',

2159

'uploader_id': 'QuackityHQ',

2160

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2161

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2162

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2167

'tags': 'count:26',

2168

'playable_in_embed': True,

2169

'live_status': 'not_live',

2170

'release_timestamp': 1641172509,

2171

'release_date': '20220103',

2172

'upload_date': '20220103',

2173

'like_count': int,

2174

'availability': 'public',

2175

'channel': 'Quackity',

2176

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2177

'channel_follower_count': int

2178

}

2179

},

2180

{ # continuous livestream. Microformat upload date should be preferred.

2181

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2182

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2183

'info_dict': {

2184

'id': 'kgx4WGK0oNU',

2185

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2186

'ext': 'mp4',

2187

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2188

'availability': 'public',

2189

'age_limit': 0,

2190

'release_timestamp': 1637975704,

2191

'upload_date': '20210619',

2192

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2193

'live_status': 'is_live',

2194

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2195

'uploader': '阿鲍Abao',

2196

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2197

'channel': 'Abao in Tokyo',

2198

'channel_follower_count': int,

2199

'release_date': '20211127',

2200

'tags': 'count:39',

2201

'categories': ['People & Blogs'],

2202

'like_count': int,

2203

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2204

'view_count': int,

2205

'playable_in_embed': True,

2206

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2207

},

2208

'params': {'skip_download': True}

2209

}, {

2210

# Story. Requires specific player params to work.

2211

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2216

'view_count': int,

2217

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2218

'upload_date': '20220526',

2219

'categories': ['Education'],

2220

'title': 'Story',

2221

'channel': 'IT\'S HISTORY',

2222

'description': '',

2223

'uploader_id': 'BlastfromthePast',

2224

'duration': 12,

2225

'uploader': 'IT\'S HISTORY',

2226

'playable_in_embed': True,

2227

'age_limit': 0,

2228

'live_status': 'not_live',

2229

'tags': [],

2230

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2231

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2232

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2233

},

2234

'skip': 'stories get removed after some period of time',

2235

}, {

2236

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2241

'upload_date': '20220323',

2242

'like_count': int,

2243

'availability': 'unlisted',

2244

'channel': 'nao20010128nao',

2245

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2246

'age_limit': 0,

2247

'uploader': 'nao20010128nao',

2248

'uploader_id': 'nao20010128nao',

2249

'categories': ['Music'],

2250

'view_count': int,

2251

'description': '',

2252

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2253

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2254

'live_status': 'not_live',

2255

'playable_in_embed': True,

2256

'channel_follower_count': int,

2257

'duration': 6,

2258

'tags': [],

2259

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2260

}

2261

}, {

2262

'note': '6 channel audio',

2263

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2264

'only_matching': True,

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2270

{

2271

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2272

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2277

'upload_date': '20080526',

2278

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2279

'uploader': 'Christopher Sykes',

2280

'uploader_id': 'ChristopherJSykes',

2281

'age_limit': 0,

2282

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2283

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2284

'playable_in_embed': True,

2285

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2286

'like_count': int,

2287

'comment_count': int,

2288

'channel': 'Christopher Sykes',

2289

'live_status': 'not_live',

2290

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2291

'availability': 'public',

2292

'duration': 195,

2293

'view_count': int,

2294

'categories': ['Science & Technology'],

2295

'channel_follower_count': int,

2296

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2297

},

2298

'params': {

2299

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2306

from ..utils import parse_qs

2307

2308

qs = parse_qs(url)

2309

if qs.get('list', [None])[0]:

2310

return False

2311

return super().suitable(url)

2312

2313

def __init__(self, *args, **kwargs):

2314

super().__init__(*args, **kwargs)

2315

self._code_cache = {}

2316

self._player_cache = {}

2317

2318

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2319

lock = threading.Lock()

2320

2321

is_live = True

2322

start_time = time.time()

2323

formats = [f for f in formats if f.get('is_from_start')]

2324

2325

def refetch_manifest(format_id, delay):

2326

nonlocal formats, start_time, is_live

2327

if time.time() <= start_time + delay:

2328

return

2329

2330

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2331

video_details = traverse_obj(

2332

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2333

microformats = traverse_obj(

2334

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2335

expected_type=dict, default=[])

2336

_, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2337

start_time = time.time()

2338

2339

def mpd_feed(format_id, delay):

2340

"""

2341

@returns (manifest_url, manifest_stream_number, is_live) or None

2342

"""

2343

with lock:

2344

refetch_manifest(format_id, delay)

2345

2346

f = next((f for f in formats if f['format_id'] == format_id), None)

2347

if not f:

2348

if not is_live:

2349

self.to_screen(f'{video_id}: Video is no longer live')

2350

else:

2351

self.report_warning(

2352

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2353

return None

2354

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2359

f['fragments'] = functools.partial(

2360

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2361

2362

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2363

FETCH_SPAN, MAX_DURATION = 5, 432000

2364

2365

mpd_url, stream_number, is_live = None, None, True

2366

2367

begin_index = 0

2368

download_start_time = ctx.get('start') or time.time()

2369

2370

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2371

if lack_early_segments:

2372

self.report_warning(bug_reports_message(

2373

'Starting download from the last 120 hours of the live stream since '

2374

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2375

lack_early_segments = True

2376

2377

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2378

fragments, fragment_base_url = None, None

2379

2380

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2381

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2382

# Obtain from MPD's maximum seq value

2383

old_mpd_url = mpd_url

2384

last_error = ctx.pop('last_error', None)

2385

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2386

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2387

or (mpd_url, stream_number, False))

2388

if not refresh_sequence:

2389

if expire_fast and not is_live:

2390

return False, last_seq

2391

elif old_mpd_url == mpd_url:

2392

return True, last_seq

2393

try:

2394

fmts, _ = self._extract_mpd_formats_and_subtitles(

2395

mpd_url, None, note=False, errnote=False, fatal=False)

2396

except ExtractorError:

2397

fmts = None

2398

if not fmts:

2399

no_fragment_score += 2

2400

return False, last_seq

2401

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2402

fragments = fmt_info['fragments']

2403

fragment_base_url = fmt_info['fragment_base_url']

2404

assert fragment_base_url

2405

2406

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2407

return True, _last_seq

2408

2409

while is_live:

2410

fetch_time = time.time()

2411

if no_fragment_score > 30:

2412

return

2413

if last_segment_url:

2414

# Obtain from "X-Head-Seqnum" header value from each segment

2415

try:

2416

urlh = self._request_webpage(

2417

last_segment_url, None, note=False, errnote=False, fatal=False)

2418

except ExtractorError:

2419

urlh = None

2420

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2421

if last_seq is None:

2422

no_fragment_score += 2

2423

last_segment_url = None

2424

continue

2425

else:

2426

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2427

no_fragment_score += 2

2428

if not should_continue:

2429

continue

2430

2431

if known_idx > last_seq:

2432

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2438

# skip from the start when it's negative value

2439

known_idx = last_seq + begin_index

2440

if lack_early_segments:

2441

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2442

try:

2443

for idx in range(known_idx, last_seq):

2444

# do not update sequence here or you'll get skipped some part of it

2445

should_continue, _ = _extract_sequence_from_mpd(False, False)

2446

if not should_continue:

2447

known_idx = idx - 1

2448

raise ExtractorError('breaking out of outer loop')

2449

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2450

yield {

2451

'url': last_segment_url,

2452

'fragment_count': last_seq,

2453

}

2454

if known_idx == last_seq:

2455

no_fragment_score += 5

2456

else:

2457

no_fragment_score = 0

2458

known_idx = last_seq

2459

except ExtractorError:

2460

continue

2461

2462

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2463

2464

def _extract_player_url(self, *ytcfgs, webpage=None):

2465

player_url = traverse_obj(

2466

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2467

get_all=False, expected_type=str)

2468

if not player_url:

2469

return

2470

return urljoin('https://www.youtube.com', player_url)

2471

2472

def _download_player_url(self, video_id, fatal=False):

2473

res = self._download_webpage(

2474

'https://www.youtube.com/iframe_api',

2475

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2476

if res:

2477

player_version = self._search_regex(

2478

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2479

if player_version:

2480

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2481

2482

def _signature_cache_id(self, example_sig):

2483

""" Return a string representation of a signature """

2484

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2485

2486

@classmethod

2487

def _extract_player_info(cls, player_url):

2488

for player_re in cls._PLAYER_INFO_RE:

2489

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2494

return id_m.group('id')

2495

2496

def _load_player(self, video_id, player_url, fatal=True):

2497

player_id = self._extract_player_info(player_url)

2498

if player_id not in self._code_cache:

2499

code = self._download_webpage(

2500

player_url, video_id, fatal=fatal,

2501

note='Downloading player ' + player_id,

2502

errnote='Download of %s failed' % player_url)

2503

if code:

2504

self._code_cache[player_id] = code

2505

return self._code_cache.get(player_id)

2506

2507

def _extract_signature_function(self, video_id, player_url, example_sig):

2508

player_id = self._extract_player_info(player_url)

2509

2510

# Read from filesystem cache

2511

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2512

assert os.path.basename(func_id) == func_id

2513

2514

self.write_debug(f'Extracting signature function {func_id}')

2515

cache_spec = self.cache.load('youtube-sigfuncs', func_id)

2516

if cache_spec is not None:

2517

return lambda s: ''.join(s[i] for i in cache_spec)

2518

2519

code = self._load_player(video_id, player_url)

2520

if code:

2521

res = self._parse_sig_js(code)

2522

2523

test_string = ''.join(map(chr, range(len(example_sig))))

2524

cache_res = res(test_string)

2525

cache_spec = [ord(c) for c in cache_res]

2526

2527

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2528

return res

2529

2530

def _print_sig_code(self, func, example_sig):

2531

if not self.get_param('youtube_print_sig_code'):

2532

return

2533

2534

def gen_sig_code(idxs):

2535

def _genslice(start, end, step):

2536

starts = '' if start == 0 else str(start)

2537

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2538

steps = '' if step == 1 else (':%d' % step)

2539

return f's[{starts}{ends}{steps}]'

2540

2541

step = None

2542

# Quelch pyflakes warnings - start will be set when step is set

2543

start = '(Never used)'

2544

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2549

step = None

2550

continue

2551

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2561

2562

test_string = ''.join(map(chr, range(len(example_sig))))

2563

cache_res = func(test_string)

2564

cache_spec = [ord(c) for c in cache_res]

2565

expr_code = ' + '.join(gen_sig_code(cache_spec))

2566

signature_id_tuple = '(%s)' % (

2567

', '.join(str(len(p)) for p in example_sig.split('.')))

2568

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2569

' return %s\n') % (signature_id_tuple, expr_code)

2570

self.to_screen('Extracted signature function:\n' + code)

2571

2572

def _parse_sig_js(self, jscode):

2573

funcname = self._search_regex(

2574

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2575

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2576

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2577

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2578

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2579

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2580

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2581

# Obsolete patterns

2582

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2583

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2584

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2585

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2586

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2587

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2588

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2589

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2590

jscode, 'Initial JS player signature function name', group='sig')

2591

2592

jsi = JSInterpreter(jscode)

2593

initial_function = jsi.extract_function(funcname)

2594

return lambda s: initial_function([s])

2595

2596

def _decrypt_signature(self, s, video_id, player_url):

2597

"""Turn the encrypted s field into a working signature"""

2598

try:

2599

player_id = (player_url, self._signature_cache_id(s))

2600

if player_id not in self._player_cache:

2601

func = self._extract_signature_function(video_id, player_url, s)

2602

self._player_cache[player_id] = func

2603

func = self._player_cache[player_id]

2604

self._print_sig_code(func, s)

2605

return func(s)

2606

except Exception as e:

2607

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2608

2609

def _decrypt_nsig(self, s, video_id, player_url):

2610

"""Turn the encrypted n field into a working signature"""

2611

if player_url is None:

2612

raise ExtractorError('Cannot decrypt nsig without player_url')

2613

player_url = urljoin('https://www.youtube.com', player_url)

2614

2615

sig_id = ('nsig_value', s)

2616

if sig_id in self._player_cache:

2617

return self._player_cache[sig_id]

2618

2619

try:

2620

player_id = ('nsig', player_url)

2621

if player_id not in self._player_cache:

2622

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2623

func = self._player_cache[player_id]

2624

self._player_cache[sig_id] = func(s)

2625

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2626

return self._player_cache[sig_id]

2627

except Exception as e:

2628

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2629

2630

def _extract_n_function_name(self, jscode):

2631

nfunc, idx = self._search_regex(

2632

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2633

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2634

if not idx:

2635

return nfunc

2636

return json.loads(js_to_json(self._search_regex(

2637

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2638

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2639

2640

def _extract_n_function(self, video_id, player_url):

2641

player_id = self._extract_player_info(player_url)

2642

func_code = self.cache.load('youtube-nsig', player_id)

2643

2644

if func_code:

2645

jsi = JSInterpreter(func_code)

2646

else:

2647

jscode = self._load_player(video_id, player_url)

2648

funcname = self._extract_n_function_name(jscode)

2649

jsi = JSInterpreter(jscode)

2650

func_code = jsi.extract_function_code(funcname)

2651

self.cache.store('youtube-nsig', player_id, func_code)

2652

2653

if self.get_param('youtube_print_sig_code'):

2654

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2655

func = jsi.extract_function_from_code(*func_code)

def inner(s):

ret = func([s])

if ret.startswith('enhanced_except_'):

2660

raise ExtractorError('Signature function returned an exception')

return ret

return inner

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2665

"""

2666

Extract signatureTimestamp (sts)

2667

Required to tell API what sig/player version is in use.

2668

"""

2669

sts = None

2670

if isinstance(ytcfg, dict):

2671

sts = int_or_none(ytcfg.get('STS'))

2672

2673

if not sts:

2674

# Attempt to extract from player

2675

if player_url is None:

2676

error_msg = 'Cannot extract signature timestamp without player_url.'

2677

if fatal:

2678

raise ExtractorError(error_msg)

2679

self.report_warning(error_msg)

2680

return

2681

code = self._load_player(video_id, player_url, fatal=fatal)

2682

if code:

2683

sts = int_or_none(self._search_regex(

2684

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2685

'JS player signature timestamp', group='sts', fatal=fatal))

2686

return sts

2687

2688

def _mark_watched(self, video_id, player_responses):

2689

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2690

label = 'fully ' if is_full else ''

2691

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2692

expected_type=url_or_none)

2693

if not url:

2694

self.report_warning(f'Unable to mark {label}watched')

2695

return

2696

parsed_url = urllib.parse.urlparse(url)

2697

qs = urllib.parse.parse_qs(parsed_url.query)

2698

2699

# cpn generation algorithm is reverse engineered from base.js.

2700

# In fact it works even with dummy cpn.

2701

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2702

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2703

2704

# # more consistent results setting it to right before the end

2705

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2716

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2723

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2724

2725

self._download_webpage(

2726

url, video_id, f'Marking {label}watched',

2727

'Unable to mark watched', fatal=False)

2728

2729

@classmethod

2730

def _extract_from_webpage(cls, url, webpage):

2731

# Invidious Instances

2732

# https://github.com/yt-dlp/yt-dlp/issues/195

2733

# https://github.com/iv-org/invidious/pull/1730

2734

mobj = re.search(

2735

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

2736

webpage)

2737

if mobj:

2738

yield cls.url_result(mobj.group('url'), cls)

2739

raise cls.StopExtraction()

2740

2741

yield from super()._extract_from_webpage(url, webpage)

2742

2743

# lazyYT YouTube embed

2744

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

2745

yield cls.url_result(unescapeHTML(id_), cls, id_)

2746

2747

# Wordpress "YouTube Video Importer" plugin

2748

for m in re.findall(r'''(?x)<div[^>]+

2749

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2750

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

2751

yield cls.url_result(m[-1], cls, m[-1])

2752

2753

@classmethod

2754

def extract_id(cls, url):

2755

video_id = cls.get_temp_id(url)

2756

if not video_id:

2757

raise ExtractorError(f'Invalid URL: {url}')

2758

return video_id

2759

2760

def _extract_chapters_from_json(self, data, duration):

2761

chapter_list = traverse_obj(

2762

data, (

2763

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2764

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2765

), expected_type=list)

2766

2767

return self._extract_chapters(

2768

chapter_list,

2769

chapter_time=lambda chapter: float_or_none(

2770

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2771

chapter_title=lambda chapter: traverse_obj(

2772

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2773

duration=duration)

2774

2775

def _extract_chapters_from_engagement_panel(self, data, duration):

2776

content_list = traverse_obj(

2777

data,

2778

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2779

expected_type=list, default=[])

2780

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2781

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2782

2783

return next(filter(None, (

2784

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2785

chapter_time, chapter_title, duration)

2786

for contents in content_list)), [])

2787

2788

def _extract_chapters_from_description(self, description, duration):

2789

return self._extract_chapters(

2790

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2791

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2792

duration=duration, strict=False)

2793

2794

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2799

'title': chapter_title(chapter),

2800

} for chapter in chapter_list or []]

2801

if not strict:

2802

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2803

2804

chapters = [{'start_time': 0}]

2805

for idx, chapter in enumerate(chapter_list):

2806

if chapter['start_time'] is None:

2807

self.report_warning(f'Incomplete chapter {idx}')

2808

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2809

chapters.append(chapter)

2810

else:

2811

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2812

return chapters[1:]

2813

2814

def _extract_comment(self, comment_renderer, parent=None):

2815

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2820

2821

# note: timestamp is an estimate calculated from the current time and time_text

2822

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2823

author = self._get_text(comment_renderer, 'authorText')

2824

author_id = try_get(comment_renderer,

2825

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

2826

2827

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2828

lambda x: x['likeCount']), str)) or 0

2829

author_thumbnail = try_get(comment_renderer,

2830

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

2831

2832

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2833

is_favorited = 'creatorHeart' in (try_get(

2834

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2839

'time_text': time_text,

2840

'like_count': votes,

2841

'is_favorited': is_favorited,

2842

'author': author,

2843

'author_id': author_id,

2844

'author_thumbnail': author_thumbnail,

2845

'author_is_uploader': author_is_uploader,

2846

'parent': parent or 'root'

2847

}

2848

2849

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2850

2851

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2852

2853

def extract_header(contents):

2854

_continuation = None

2855

for content in contents:

2856

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2857

expected_comment_count = self._get_count(

2858

comments_header_renderer, 'countText', 'commentsCount')

2859

2860

if expected_comment_count:

2861

tracker['est_total'] = expected_comment_count

2862

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2863

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2864

2865

sort_menu_item = try_get(

2866

comments_header_renderer,

2867

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2868

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2869

2870

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2871

if not _continuation:

2872

continue

2873

2874

sort_text = str_or_none(sort_menu_item.get('title'))

2875

if not sort_text:

2876

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2877

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2882

if not parent:

2883

tracker['current_page_thread'] = 0

2884

for content in contents:

2885

if not parent and tracker['total_parent_comments'] >= max_parents:

2886

yield

2887

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2888

comment_renderer = get_first(

2889

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2890

expected_type=dict, default={})

2891

2892

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2897

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2898

yield comment

2899

2900

# Attempt to get the replies

2901

comment_replies_renderer = try_get(

2902

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2903

2904

if comment_replies_renderer:

2905

tracker['current_page_thread'] += 1

2906

comment_entries_iter = self._comment_entries(

2907

comment_replies_renderer, ytcfg, video_id,

2908

parent=comment.get('id'), tracker=tracker)

2909

yield from itertools.islice(comment_entries_iter, min(

2910

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2911

2912

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2918

total_parent_comments=0,

2919

total_reply_comments=0)

2920

2921

# TODO: Deprecated

2922

# YouTube comments have a max depth of 2

2923

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2924

if max_depth:

2925

self._downloader.deprecation_warning(

2926

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2927

if max_depth == 1 and parent:

2928

return

2929

2930

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2931

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2932

2933

continuation = self._extract_continuation(root_continuation_data)

2934

2935

response = None

2936

is_forced_continuation = False

2937

is_first_continuation = parent is None

2938

if is_first_continuation and not continuation:

2939

# Sometimes you can get comments by generating the continuation yourself,

2940

# even if YouTube initially reports them being disabled - e.g. stories comments.

2941

# Note: if the comment section is actually disabled, YouTube may return a response with

2942

# required check_get_keys missing. So we will disable that check initially in this case.

2943

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2944

is_forced_continuation = True

2945

2946

for page_num in itertools.count(0):

2947

if not continuation:

2948

break

2949

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2950

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2951

if page_num == 0:

2952

if is_first_continuation:

2953

note_prefix = 'Downloading comment section API JSON'

2954

else:

2955

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2956

tracker['current_page_thread'], comment_prog_str)

2957

else:

2958

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2959

' ' if parent else '', ' replies' if parent else '',

2960

page_num, comment_prog_str)

2961

2962

response = self._extract_response(

2963

item_id=None, query=continuation,

2964

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2965

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2966

is_forced_continuation = False

2967

continuation_contents = traverse_obj(

2968

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2969

2970

continuation = None

2971

for continuation_section in continuation_contents:

2972

continuation_items = traverse_obj(

2973

continuation_section,

2974

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2975

get_all=False, expected_type=list) or []

2976

if is_first_continuation:

2977

continuation = extract_header(continuation_items)

2978

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2992

if message and not parent and tracker['running_total'] == 0:

2993

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2994

2995

@staticmethod

2996

def _generate_comment_continuation(video_id):

2997

"""

2998

Generates initial comment section continuation token from given video id

2999

"""

3000

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

3001

return base64.b64encode(token.encode()).decode()

3002

3003

def _get_comments(self, ytcfg, video_id, contents, webpage):

3004

"""Entry for comment extraction"""

3005

def _real_comment_extract(contents):

3006

renderer = next((

3007

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3008

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3009

yield from self._comment_entries(renderer, ytcfg, video_id)

3010

3011

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3012

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3013

3014

@staticmethod

3015

def _get_checkok_params():

3016

return {'contentCheckOk': True, 'racyCheckOk': True}

3017

3018

@classmethod

3019

def _generate_player_context(cls, sts=None):

3020

context = {

3021

'html5Preference': 'HTML5_PREF_WANTS',

3022

}

3023

if sts is not None:

3024

context['signatureTimestamp'] = sts

3025

return {

3026

'playbackContext': {

3027

'contentPlaybackContext': context

3028

},

3029

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3034

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3035

return True

3036

3037

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3038

AGE_GATE_REASONS = (

3039

'confirm your age', 'age-restricted', 'inappropriate', # reason

3040

'age_verification_required', 'age_check_required', # status

3041

)

3042

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3043

3044

@staticmethod

3045

def _is_unplayable(player_response):

3046

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3047

3048

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3049

3050

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3051

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3052

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3053

headers = self.generate_api_headers(

3054

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3059

}

3060

yt_query.update(self._generate_player_context(sts))

3061

return self._extract_response(

3062

item_id=video_id, ep='player', query=yt_query,

3063

ytcfg=player_ytcfg, headers=headers, fatal=True,

3064

default_client=client,

3065

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3066

) or None

3067

3068

def _get_requested_clients(self, url, smuggled_data):

3069

requested_clients = []

3070

default = ['android', 'web']

3071

allowed_clients = sorted(

3072

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3073

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3074

for client in self._configuration_arg('player_client'):

3075

if client in allowed_clients:

3076

requested_clients.append(client)

3077

elif client == 'default':

3078

requested_clients.extend(default)

3079

elif client == 'all':

3080

requested_clients.extend(allowed_clients)

3081

else:

3082

self.report_warning(f'Skipping unsupported client {client}')

3083

if not requested_clients:

3084

requested_clients = default

3085

3086

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3087

requested_clients.extend(

3088

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3089

3090

return orderedSet(requested_clients)

3091

3092

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3093

initial_pr = None

3094

if webpage:

3095

initial_pr = self._search_json(

3096

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3097

3098

all_clients = set(clients)

3099

clients = clients[::-1]

3100

prs = []

3101

3102

def append_client(*client_names):

3103

""" Append the first client name that exists but not already used """

3104

for client_name in client_names:

3105

actual_client = _split_innertube_client(client_name)[0]

3106

if actual_client in INNERTUBE_CLIENTS:

3107

if actual_client not in all_clients:

3108

clients.append(client_name)

3109

all_clients.add(actual_client)

3110

return

3111

3112

# Android player_response does not have microFormats which are needed for

3113

# extraction of some data. So we return the initial_pr with formats

3114

# stripped out even if not requested by the user

3115

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3116

if initial_pr:

3117

pr = dict(initial_pr)

3118

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3123

player_url = None

3124

while clients:

3125

client, base_client, variant = _split_innertube_client(clients.pop())

3126

player_ytcfg = master_ytcfg if client == 'web' else {}

3127

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3128

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3129

3130

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3131

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3132

if 'js' in self._configuration_arg('player_skip'):

3133

require_js_player = False

3134

player_url = None

3135

3136

if not player_url and not tried_iframe_fallback and require_js_player:

3137

player_url = self._download_player_url(video_id)

3138

tried_iframe_fallback = True

3139

3140

try:

3141

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3142

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3143

except ExtractorError as e:

3144

if last_error:

3145

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3151

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3152

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3153

if pr_video_id and pr_video_id != video_id:

3154

self.report_warning(

3155

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3160

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3161

append_client(f'{base_client}_creator')

3162

elif self._is_agegated(pr):

3163

if variant == 'tv_embedded':

3164

append_client(f'{base_client}_embedded')

3165

elif not variant:

3166

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3172

return prs, player_url

3173

3174

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):

3175

itags, stream_ids = {}, []

3176

itag_qualities, res_qualities = {}, {0: -1}

3177

q = qualities([

3178

# Normally tiny is the smallest video-only formats. But

3179

# audio-only formats with unknown quality may get tagged as tiny

3180

'tiny',

3181

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3182

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3183

])

3184

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3185

3186

for fmt in streaming_formats:

3187

if fmt.get('targetDurationSec'):

3188

continue

3189

3190

itag = str_or_none(fmt.get('itag'))

3191

audio_track = fmt.get('audioTrack') or {}

3192

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3193

if stream_id in stream_ids:

3194

continue

3195

3196

quality = fmt.get('quality')

3197

height = int_or_none(fmt.get('height'))

3198

if quality == 'tiny' or not quality:

3199

quality = fmt.get('audioQuality', '').lower() or quality

3200

# The 3gp format (17) in android client has a quality of "small",

3201

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3207

if height:

3208

res_qualities[height] = quality

3209

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3210

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3211

# number of fragment that would subsequently requested with (`&sq=N`)

3212

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3213

continue

3214

3215

fmt_url = fmt.get('url')

3216

if not fmt_url:

3217

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3218

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3219

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3220

if not all((sc, fmt_url, player_url, encrypted_sig)):

3221

continue

3222

try:

3223

fmt_url += '&%s=%s' % (

3224

traverse_obj(sc, ('sp', -1)) or 'signature',

3225

self._decrypt_signature(encrypted_sig, video_id, player_url)

3226

)

3227

except ExtractorError as e:

3228

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3229

self.write_debug(e, only_once=True)

3230

continue

3231

3232

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3237

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3238

except ExtractorError as e:

3239

self.report_warning(

3240

'nsig extraction failed: You may experience throttling for some formats\n'

3241

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3242

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3247

stream_ids.append(stream_id)

3248

3249

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3250

language_preference = (

3251

10 if audio_track.get('audioIsDefault') and 10

3252

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3253

else -1)

3254

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3255

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3256

# Make sure to avoid false positives with small duration differences.

3257

# E.g. __2ABJjxzNo, ySuUZEjARPY

3258

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3259

if is_damaged:

3260

self.report_warning(

3261

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3262

dct = {

3263

'asr': int_or_none(fmt.get('audioSampleRate')),

3264

'filesize': int_or_none(fmt.get('contentLength')),

3265

'format_id': itag,

3266

'format_note': join_nonempty(

3267

'%s%s' % (audio_track.get('displayName') or '',

3268

' (default)' if language_preference > 0 else ''),

3269

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3270

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3271

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3272

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3273

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3274

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3275

'fps': int_or_none(fmt.get('fps')) or None,

3276

'audio_channels': fmt.get('audioChannels'),

3277

'height': height,

3278

'quality': q(quality),

3279

'has_drm': bool(fmt.get('drmFamilies')),

3280

'tbr': tbr,

3281

'url': fmt_url,

3282

'width': int_or_none(fmt.get('width')),

3283

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3284

'desc' if language_preference < -1 else ''),

3285

'language_preference': language_preference,

3286

# Strictly de-prioritize damaged and 3gp formats

3287

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3288

}

3289

mime_mobj = re.match(

3290

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3291

if mime_mobj:

3292

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3293

dct.update(parse_codecs(mime_mobj.group(2)))

3294

no_audio = dct.get('acodec') == 'none'

3295

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3301

dct['downloader_options'] = {

3302

# Youtube throttles chunks >~10M

3303

'http_chunk_size': 10485760,

3304

}

3305

if dct.get('ext'):

3306

dct['container'] = dct['ext'] + '_dash'

3307

yield dct

3308

3309

live_from_start = is_live and self.get_param('live_from_start')

3310

skip_manifests = self._configuration_arg('skip')

3311

if not self.get_param('youtube_include_hls_manifest', True):

3312

skip_manifests.append('hls')

3313

if not self.get_param('youtube_include_dash_manifest', True):

3314

skip_manifests.append('dash')

3315

get_dash = 'dash' not in skip_manifests and (

3316

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3317

get_hls = not live_from_start and 'hls' not in skip_manifests

3318

3319

def process_manifest_format(f, proto, itag):

3320

if itag in itags:

3321

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3322

return False

3323

itag = f'{itag}-{proto}'

3324

if itag:

3325

f['format_id'] = itag

3326

itags[itag] = proto

3327

3328

f['quality'] = itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)

3329

if f['quality'] == -1 and f.get('height'):

3330

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3335

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3336

if hls_manifest_url:

3337

fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)

3338

subtitles = self._merge_subtitles(subs, subtitles)

3339

for f in fmts:

3340

if process_manifest_format(f, 'hls', self._search_regex(

3341

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3342

yield f

3343

3344

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3345

if dash_manifest_url:

3346

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3347

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3348

for f in formats:

3349

if process_manifest_format(f, 'dash', f['format_id']):

3350

f['filesize'] = int_or_none(self._search_regex(

3351

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3352

if live_from_start:

3353

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3359

spec = get_first(

3360

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3361

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3366

args = args.split('#')

3367

counts = list(map(int_or_none, args[:5]))

3368

if len(args) != 8 or not all(counts):

3369

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3370

continue

3371

width, height, frame_count, cols, rows = counts

3372

N, sigh = args[6:]

3373

3374

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3375

fragment_count = frame_count / (cols * rows)

3376

fragment_duration = duration / fragment_count

3377

yield {

3378

'format_id': f'sb{i}',

3379

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3392

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3393

} for j in range(math.ceil(fragment_count))],

3394

}

3395

3396

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3397

webpage = None

3398

if 'webpage' not in self._configuration_arg('player_skip'):

3399

webpage = self._download_webpage(

3400

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3401

3402

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3403

3404

player_responses, player_url = self._extract_player_responses(

3405

self._get_requested_clients(url, smuggled_data),

3406

video_id, webpage, master_ytcfg)

3407

3408

return webpage, master_ytcfg, player_responses, player_url

3409

3410

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3411

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3412

is_live = get_first(video_details, 'isLive')

3413

if is_live is None:

3414

is_live = get_first(live_broadcast_details, 'isLiveNow')

3415

3416

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3417

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)

3418

3419

return live_broadcast_details, is_live, streaming_data, formats, subtitles

3420

3421

def _real_extract(self, url):

3422

url, smuggled_data = unsmuggle_url(url, {})

3423

video_id = self._match_id(url)

3424

3425

base_url = self.http_scheme() + '//www.youtube.com/'

3426

webpage_url = base_url + 'watch?v=' + video_id

3427

3428

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3429

3430

playability_statuses = traverse_obj(

3431

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3432

3433

trailer_video_id = get_first(

3434

playability_statuses,

3435

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3436

expected_type=str)

3437

if trailer_video_id:

3438

return self.url_result(

3439

trailer_video_id, self.ie_key(), trailer_video_id)

3440

3441

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3442

if webpage else (lambda x: None))

3443

3444

video_details = traverse_obj(

3445

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3446

microformats = traverse_obj(

3447

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3448

expected_type=dict, default=[])

3449

video_title = (

3450

get_first(video_details, 'title')

3451

or self._get_text(microformats, (..., 'title'))

3452

or search_meta(['og:title', 'twitter:title', 'title']))

3453

video_description = get_first(video_details, 'shortDescription')

3454

3455

multifeed_metadata_list = get_first(

3456

player_responses,

3457

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3458

expected_type=str)

3459

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3460

if self.get_param('noplaylist'):

3461

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3466

# Unquote should take place before split on comma (,) since textual

3467

# fields may contain comma as well (see

3468

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3469

feed_data = urllib.parse.parse_qs(

3470

urllib.parse.unquote_plus(feed))

3471

3472

def feed_entry(name):

3473

return try_get(

3474

feed_data, lambda x: x[name][0], str)

3475

3476

feed_id = feed_entry('id')

3477

if not feed_id:

3478

continue

3479

feed_title = feed_entry('title')

3480

title = video_title

3481

if feed_title:

3482

title += ' (%s)' % feed_title

3483

entries.append({

3484

'_type': 'url_transparent',

3485

'ie_key': 'Youtube',

3486

'url': smuggle_url(

3487

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3488

{'force_singlefeed': True}),

3489

'title': title,

3490

})

3491

feed_ids.append(feed_id)

3492

self.to_screen(

3493

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3494

% (', '.join(feed_ids), video_id))

3495

return self.playlist_result(

3496

entries, video_id, video_title, video_description)

3497

3498

duration = int_or_none(

3499

get_first(video_details, 'lengthSeconds')

3500

or get_first(microformats, 'lengthSeconds')

3501

or parse_duration(search_meta('duration'))) or None

3502

3503

live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \

3504

self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3505

3506

if not formats:

3507

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3508

self.report_drm(video_id)

3509

pemr = get_first(

3510

playability_statuses,

3511

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3512

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3513

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3514

if subreason:

3515

if subreason == 'The uploader has not made this video available in your country.':

3516

countries = get_first(microformats, 'availableCountries')

3517

if not countries:

3518

regions_allowed = search_meta('regionsAllowed')

3519

countries = regions_allowed.split(',') if regions_allowed else None

3520

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3521

reason += f'. {subreason}'

3522

if reason:

3523

self.raise_no_formats(reason, expected=True)

3524

3525

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3526

if not keywords and webpage:

3527

keywords = [

3528

unescapeHTML(m.group('content'))

3529

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3530

for keyword in keywords:

3531

if keyword.startswith('yt:stretch='):

3532

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3533

if mobj:

3534

# NB: float is intentional for forcing float division

3535

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3540

f['stretched_ratio'] = ratio

3541

break

3542

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3543

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3544

if thumbnail_url:

3545

thumbnails.append({

3546

'url': thumbnail_url,

3547

})

3548

original_thumbnails = thumbnails.copy()

3549

3550

# The best resolution thumbnails sometimes does not appear in the webpage

3551

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3552

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3553

thumbnail_names = [

3554

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3555

# in resolution, these are not the custom thumbnail. So de-prioritize them

3556

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3557

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3558

]

3559

n_thumbnail_names = len(thumbnail_names)

3560

thumbnails.extend({

3561

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3562

video_id=video_id, name=name, ext=ext,

3563

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3564

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3565

for thumb in thumbnails:

3566

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3567

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3568

self._remove_duplicate_formats(thumbnails)

3569

self._downloader._sort_thumbnails(original_thumbnails)

3570

3571

category = get_first(microformats, 'category') or search_meta('genre')

3572

channel_id = str_or_none(

3573

get_first(video_details, 'channelId')

3574

or get_first(microformats, 'externalChannelId')

3575

or search_meta('channelId'))

3576

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3577

3578

live_content = get_first(video_details, 'isLiveContent')

3579

is_upcoming = get_first(video_details, 'isUpcoming')

3580

if is_live is None:

3581

if is_upcoming or live_content is False:

3582

is_live = False

3583

if is_upcoming is None and (live_content or is_live):

3584

is_upcoming = False

3585

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3586

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3587

if not duration and live_end_time and live_start_time:

3588

duration = live_end_time - live_start_time

3589

3590

if is_live and self.get_param('live_from_start'):

3591

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3592

3593

formats.extend(self._extract_storyboard(player_responses, duration))

3594

3595

# source_preference is lower for throttled/potentially damaged formats

3596

self._sort_formats(formats, (

3597

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3602

'formats': formats,

3603

'thumbnails': thumbnails,

3604

# The best thumbnail that we are sure exists. Prevents unnecessary

3605

# URL checking if user don't care about getting the best possible thumbnail

3606

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3607

'description': video_description,

3608

'uploader': get_first(video_details, 'author'),

3609

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3610

'uploader_url': owner_profile_url,

3611

'channel_id': channel_id,

3612

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3613

'duration': duration,

3614

'view_count': int_or_none(

3615

get_first((video_details, microformats), (..., 'viewCount'))

3616

or search_meta('interactionCount')),

3617

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3618

'age_limit': 18 if (

3619

get_first(microformats, 'isFamilySafe') is False

3620

or search_meta('isFamilyFriendly') == 'false'

3621

or search_meta('og:restrictions:age') == '18+') else 0,

3622

'webpage_url': webpage_url,

3623

'categories': [category] if category else None,

3624

'tags': keywords,

3625

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3626

'is_live': is_live,

3627

'was_live': (False if is_live or is_upcoming or live_content is False

3628

else None if is_live is None or is_upcoming is None

3629

else live_content),

3630

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3631

'release_timestamp': live_start_time,

3632

}

3633

3634

if get_first(video_details, 'isPostLiveDvr'):

3635

self.write_debug('Video is in Post-Live Manifestless mode')

3636

info['live_status'] = 'post_live'

3637

if (duration or 0) > 4 * 3600:

3638

self.report_warning(

3639

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3640

'This is a known issue and patches are welcome')

3641

3642

subtitles = {}

3643

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3644

if pctr:

3645

def get_lang_code(track):

3646

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3647

or track.get('languageCode'))

3648

3649

# Converted into dicts to remove duplicates

3650

captions = {

3651

get_lang_code(sub): sub

3652

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3653

translation_languages = {

3654

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3655

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3656

3657

def process_language(container, base_url, lang_code, sub_name, query):

3658

lang_subs = container.setdefault(lang_code, [])

3659

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

3670

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

3671

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

3672

for lang_code, caption_track in captions.items():

3673

base_url = caption_track.get('baseUrl')

3674

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3675

if not base_url:

3676

continue

3677

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3678

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3683

if not caption_track.get('isTranslatable'):

3684

continue

3685

for trans_code, trans_name in translation_languages.items():

3686

if not trans_code:

3687

continue

3688

orig_trans_code = trans_code

3689

if caption_track.get('kind') != 'asr':

3690

if not get_translated_subs:

3691

continue

3692

trans_code += f'-{lang_code}'

3693

trans_name += format_field(lang_name, None, ' from %s')

3694

# Add an "-orig" label to the original language so that it can be distinguished.

3695

# The subs are returned without "-orig" as well for compatibility

3696

if lang_code == f'a-{orig_trans_code}':

3697

process_language(

3698

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3699

# Setting tlang=lang returns damaged subtitles.

3700

process_language(automatic_captions, base_url, trans_code, trans_name,

3701

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3702

3703

info['automatic_captions'] = automatic_captions

3704

info['subtitles'] = subtitles

3705

3706

parsed_url = urllib.parse.urlparse(url)

3707

for component in [parsed_url.fragment, parsed_url.query]:

3708

query = urllib.parse.parse_qs(component)

3709

for k, v in query.items():

3710

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3711

d_k += '_time'

3712

if d_k not in info and k in s_ks:

3713

info[d_k] = parse_duration(query[k][0])

3714

3715

# Youtube Music Auto-generated description

3716

if video_description:

3717

mobj = re.search(

3718

r'''(?xs)

3719

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3720

(?P<album>[^\n]+)

3721

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3722

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3723

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3724

.+\nAuto-generated\ by\ YouTube\.\s*$

3725

''', video_description)

3726

if mobj:

3727

release_year = mobj.group('release_year')

3728

release_date = mobj.group('release_date')

3729

if release_date:

3730

release_date = release_date.replace('-', '')

3731

if not release_year:

3732

release_year = release_date[:4]

3733

info.update({

3734

'album': mobj.group('album'.strip()),

3735

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3736

'track': mobj.group('track').strip(),

3737

'release_date': release_date,

3738

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

3744

if not initial_data:

3745

query = {'videoId': video_id}

3746

query.update(self._get_checkok_params())

3747

initial_data = self._extract_response(

3748

item_id=video_id, ep='next', fatal=False,

3749

ytcfg=master_ytcfg, query=query,

3750

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3751

note='Downloading initial data API JSON')

3752

3753

info['comment_count'] = traverse_obj(initial_data, (

3754

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

3755

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

3756

), (

3757

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

3758

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

3759

), expected_type=int_or_none, get_all=False)

3760

3761

try: # This will error if there is no livechat

3762

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3763

except (KeyError, IndexError, TypeError):

3764

pass

3765

else:

3766

info.setdefault('subtitles', {})['live_chat'] = [{

3767

# url is needed to set cookies

3768

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

3769

'video_id': video_id,

3770

'ext': 'json',

3771

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3777

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3778

or self._extract_chapters_from_description(video_description, duration)

3779

or None)

3780

3781

contents = traverse_obj(

3782

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3783

expected_type=list, default=[])

3784

3785

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3786

if vpir:

3787

stl = vpir.get('superTitleLink')

3788

if stl:

3789

stl = self._get_text(stl)

3790

if try_get(

3791

vpir,

3792

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3793

info['location'] = stl

3794

else:

3795

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3796

if mobj:

3797

info.update({

3798

'series': mobj.group(1),

3799

'season_number': int(mobj.group(2)),

3800

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3805

list) or []):

3806

tbr = tlb.get('toggleButtonRenderer') or {}

3807

for getter, regex in [(

3808

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3809

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3810

lambda x: x['accessibility'],

3811

lambda x: x['accessibilityData']['accessibilityData'],

3812

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3813

label = (try_get(tbr, getter, dict) or {}).get('label')

3814

if label:

3815

mobj = re.match(regex, label)

3816

if mobj:

3817

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3818

break

3819

sbr_tooltip = try_get(

3820

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3821

if sbr_tooltip:

3822

like_count, dislike_count = sbr_tooltip.split(' / ')

3823

info.update({

3824

'like_count': str_to_int(like_count),

3825

'dislike_count': str_to_int(dislike_count),

3826

})

3827

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3828

if vsir:

3829

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3830

info.update({

3831

'channel': self._get_text(vor, 'title'),

3832

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3837

list) or []

3838

multiple_songs = False

3839

for row in rows:

3840

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3841

multiple_songs = True

3842

break

3843

for row in rows:

3844

mrr = row.get('metadataRowRenderer') or {}

3845

mrr_title = mrr.get('title')

3846

if not mrr_title:

3847

continue

3848

mrr_title = self._get_text(mrr, 'title')

3849

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3850

if mrr_title == 'License':

3851

info['license'] = mrr_contents_text

3852

elif not multiple_songs:

3853

if mrr_title == 'Album':

3854

info['album'] = mrr_contents_text

3855

elif mrr_title == 'Artist':

3856

info['artist'] = mrr_contents_text

3857

elif mrr_title == 'Song':

3858

info['track'] = mrr_contents_text

3859

3860

fallbacks = {

3861

'channel': 'uploader',

3862

'channel_id': 'uploader_id',

3863

'channel_url': 'uploader_url',

3864

}

3865

3866

# The upload date for scheduled, live and past live streams / premieres in microformats

3867

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3868

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3869

upload_date = (

3870

unified_strdate(get_first(microformats, 'uploadDate'))

3871

or unified_strdate(search_meta('uploadDate')))

3872

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3873

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3874

info['upload_date'] = upload_date

3875

3876

for to, frm in fallbacks.items():

3877

if not info.get(to):

3878

info[to] = info.get(frm)

3879

3880

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3886

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3887

is_membersonly = None

3888

is_premium = None

3889

if initial_data and is_private is not None:

3890

is_membersonly = False

3891

is_premium = False

3892

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3893

badge_labels = set()

3894

for content in contents:

3895

if not isinstance(content, dict):

3896

continue

3897

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3898

for badge_label in badge_labels:

3899

if badge_label.lower() == 'members only':

3900

is_membersonly = True

3901

elif badge_label.lower() == 'premium':

3902

is_premium = True

3903

elif badge_label.lower() == 'unlisted':

3904

is_unlisted = True

3905

3906

info['availability'] = self._availability(

3907

is_private=is_private,

3908

needs_premium=is_premium,

3909

needs_subscription=is_membersonly,

3910

needs_auth=info['age_limit'] >= 18,

3911

is_unlisted=None if is_private is None else is_unlisted)

3912

3913

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3914

3915

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3921

3922

@staticmethod

3923

def passthrough_smuggled_data(func):

3924

def _smuggle(entries, smuggled_data):

3925

for entry in entries:

3926

# TODO: Convert URL to music.youtube instead.

3927

# Do we need to passthrough any other smuggled_data?

3928

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3929

yield entry

3930

3931

@functools.wraps(func)

3932

def wrapper(self, url):

3933

url, smuggled_data = unsmuggle_url(url, {})

3934

if self.is_music_url(url):

3935

smuggled_data['is_music_url'] = True

3936

info_dict = func(self, url, smuggled_data)

3937

if smuggled_data and info_dict.get('entries'):

3938

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3943

channel_id = self._html_search_meta(

3944

'channelId', webpage, 'channel id', default=None)

3945

if channel_id:

3946

return channel_id

3947

channel_url = self._html_search_meta(

3948

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3949

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3950

'twitter:app:url:googleplay'), webpage, 'channel url')

3951

return self._search_regex(

3952

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3953

channel_url, 'channel id')

3954

3955

@staticmethod

3956

def _extract_basic_item_renderer(item):

3957

# Modified from _extract_grid_item_renderer

3958

known_basic_renderers = (

3959

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3960

)

3961

for key, renderer in item.items():

3962

if not isinstance(renderer, dict):

3963

continue

3964

elif key in known_basic_renderers:

3965

return renderer

3966

elif key.startswith('grid') and key.endswith('Renderer'):

3967

return renderer

3968

3969

def _grid_entries(self, grid_renderer):

3970

for item in grid_renderer['items']:

3971

if not isinstance(item, dict):

3972

continue

3973

renderer = self._extract_basic_item_renderer(item)

3974

if not isinstance(renderer, dict):

3975

continue

3976

title = self._get_text(renderer, 'title')

3977

3978

# playlist

3979

playlist_id = renderer.get('playlistId')

3980

if playlist_id:

3981

yield self.url_result(

3982

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3983

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3988

if video_id:

3989

yield self._extract_video(renderer)

3990

continue

3991

# channel

3992

channel_id = renderer.get('channelId')

3993

if channel_id:

3994

yield self.url_result(

3995

'https://www.youtube.com/channel/%s' % channel_id,

3996

ie=YoutubeTabIE.ie_key(), video_title=title)

3997

continue

3998

# generic endpoint URL support

3999

ep_url = urljoin('https://www.youtube.com/', try_get(

4000

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

4001

str))

4002

if ep_url:

4003

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

4004

if ie.suitable(ep_url):

4005

yield self.url_result(

4006

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4007

break

4008

4009

def _music_reponsive_list_entry(self, renderer):

4010

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4011

if video_id:

4012

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4013

ie=YoutubeIE.ie_key(), video_id=video_id)

4014

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4015

if playlist_id:

4016

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4017

if video_id:

4018

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4019

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4020

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4021

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4022

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4023

if browse_id:

4024

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4025

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4026

4027

def _shelf_entries_from_content(self, shelf_renderer):

4028

content = shelf_renderer.get('content')

4029

if not isinstance(content, dict):

4030

return

4031

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4032

if renderer:

4033

# TODO: add support for nested playlists so each shelf is processed

4034

# as separate playlist

4035

# TODO: this includes only first N items

4036

yield from self._grid_entries(renderer)

4037

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4043

ep = try_get(

4044

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4045

str)

4046

shelf_url = urljoin('https://www.youtube.com', ep)

4047

if shelf_url:

4048

# Skipping links to another channels, note that checking for

4049

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4050

# will not work

4051

if skip_channels and '/channels?' in shelf_url:

4052

return

4053

title = self._get_text(shelf_renderer, 'title')

4054

yield self.url_result(shelf_url, video_title=title)

4055

# Shelf may not contain shelf URL, fallback to extraction from content

4056

yield from self._shelf_entries_from_content(shelf_renderer)

4057

4058

def _playlist_entries(self, video_list_renderer):

4059

for content in video_list_renderer['contents']:

4060

if not isinstance(content, dict):

4061

continue

4062

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4063

if not isinstance(renderer, dict):

4064

continue

4065

video_id = renderer.get('videoId')

4066

if not video_id:

4067

continue

4068

yield self._extract_video(renderer)

4069

4070

def _rich_entries(self, rich_grid_renderer):

4071

renderer = try_get(

4072

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4073

video_id = renderer.get('videoId')

4074

if not video_id:

4075

return

4076

yield self._extract_video(renderer)

4077

4078

def _video_entry(self, video_renderer):

4079

video_id = video_renderer.get('videoId')

4080

if video_id:

4081

return self._extract_video(video_renderer)

4082

4083

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4084

url = urljoin('https://youtube.com', traverse_obj(

4085

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4086

if url:

4087

return self.url_result(

4088

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4089

4090

def _post_thread_entries(self, post_thread_renderer):

4091

post_renderer = try_get(

4092

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4093

if not post_renderer:

4094

return

4095

# video attachment

4096

video_renderer = try_get(

4097

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4098

video_id = video_renderer.get('videoId')

4099

if video_id:

4100

entry = self._extract_video(video_renderer)

4101

if entry:

4102

yield entry

4103

# playlist attachment

4104

playlist_id = try_get(

4105

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4106

if playlist_id:

4107

yield self.url_result(

4108

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4109

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4110

# inline video links

4111

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4112

for run in runs:

4113

if not isinstance(run, dict):

4114

continue

4115

ep_url = try_get(

4116

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4117

if not ep_url:

4118

continue

4119

if not YoutubeIE.suitable(ep_url):

4120

continue

4121

ep_video_id = YoutubeIE._match_id(ep_url)

4122

if video_id == ep_video_id:

4123

continue

4124

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4125

4126

def _post_thread_continuation_entries(self, post_thread_continuation):

4127

contents = post_thread_continuation.get('contents')

4128

if not isinstance(contents, list):

4129

return

4130

for content in contents:

4131

renderer = content.get('backstagePostThreadRenderer')

4132

if isinstance(renderer, dict):

4133

yield from self._post_thread_entries(renderer)

4134

continue

4135

renderer = content.get('videoRenderer')

4136

if isinstance(renderer, dict):

4137

yield self._video_entry(renderer)

4138

4139

r''' # unused

4140

def _rich_grid_entries(self, contents):

4141

for content in contents:

4142

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4143

if video_renderer:

4144

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4150

# continuation_list is modified in-place with continuation_list = [continuation_token]

4151

continuation_list[:] = [None]

4152

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4153

for content in contents:

4154

if not isinstance(content, dict):

4155

continue

4156

is_renderer = traverse_obj(

4157

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4158

expected_type=dict)

4159

if not is_renderer:

4160

renderer = content.get('richItemRenderer')

4161

if renderer:

4162

for entry in self._rich_entries(renderer):

4163

yield entry

4164

continuation_list[0] = self._extract_continuation(parent_renderer)

4165

continue

4166

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4167

for isr_content in isr_contents:

4168

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4173

'gridRenderer': self._grid_entries,

4174

'reelShelfRenderer': self._grid_entries,

4175

'shelfRenderer': self._shelf_entries,

4176

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4177

'backstagePostThreadRenderer': self._post_thread_entries,

4178

'videoRenderer': lambda x: [self._video_entry(x)],

4179

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4180

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4181

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4182

}

4183

for key, renderer in isr_content.items():

4184

if key not in known_renderers:

4185

continue

4186

for entry in known_renderers[key](renderer):

4187

if entry:

4188

yield entry

4189

continuation_list[0] = self._extract_continuation(renderer)

4190

break

4191

4192

if not continuation_list[0]:

4193

continuation_list[0] = self._extract_continuation(is_renderer)

4194

4195

if not continuation_list[0]:

4196

continuation_list[0] = self._extract_continuation(parent_renderer)

4197

4198

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4199

continuation_list = [None]

4200

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4201

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4206

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4207

yield from extract_entries(parent_renderer)

4208

continuation = continuation_list[0]

4209

4210

for page_num in itertools.count(1):

4211

if not continuation:

4212

break

4213

headers = self.generate_api_headers(

4214

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4215

response = self._extract_response(

4216

item_id=f'{item_id} page {page_num}',

4217

query=continuation, headers=headers, ytcfg=ytcfg,

4218

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4223

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4224

visitor_data = self._extract_visitor_data(response) or visitor_data

4225

4226

known_continuation_renderers = {

4227

'playlistVideoListContinuation': self._playlist_entries,

4228

'gridContinuation': self._grid_entries,

4229

'itemSectionContinuation': self._post_thread_continuation_entries,

4230

'sectionListContinuation': extract_entries, # for feeds

4231

}

4232

continuation_contents = try_get(

4233

response, lambda x: x['continuationContents'], dict) or {}

4234

continuation_renderer = None

4235

for key, value in continuation_contents.items():

4236

if key not in known_continuation_renderers:

4237

continue

4238

continuation_renderer = value

4239

continuation_list = [None]

4240

yield from known_continuation_renderers[key](continuation_renderer)

4241

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4242

break

4243

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4248

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4249

'gridVideoRenderer': (self._grid_entries, 'items'),

4250

'gridChannelRenderer': (self._grid_entries, 'items'),

4251

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4252

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4253

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4254

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4255

}

4256

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4257

continuation_items = try_get(

4258

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4259

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4260

video_items_renderer = None

4261

for key, value in continuation_item.items():

4262

if key not in known_renderers:

4263

continue

4264

video_items_renderer = {known_renderers[key][1]: continuation_items}

4265

continuation_list = [None]

4266

yield from known_renderers[key][0](video_items_renderer)

4267

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4268

break

4269

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4275

for tab in tabs:

4276

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4277

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4282

4283

def _extract_uploader(self, data):

4284

uploader = {}

4285

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4286

owner = try_get(

4287

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4288

if owner:

4289

owner_text = owner.get('text')

4290

uploader['uploader'] = self._search_regex(

4291

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4292

uploader['uploader_id'] = try_get(

4293

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4294

uploader['uploader_url'] = urljoin(

4295

'https://www.youtube.com/',

4296

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4297

return {k: v for k, v in uploader.items() if v is not None}

4298

4299

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4300

playlist_id = title = description = channel_url = channel_name = channel_id = None

4301

tags = []

4302

4303

selected_tab = self._extract_selected_tab(tabs)

4304

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4305

renderer = try_get(

4306

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4307

if renderer:

4308

channel_name = renderer.get('title')

4309

channel_url = renderer.get('channelUrl')

4310

channel_id = renderer.get('externalId')

4311

else:

4312

renderer = try_get(

4313

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4314

4315

if renderer:

4316

title = renderer.get('title')

4317

description = renderer.get('description', '')

4318

playlist_id = channel_id

4319

tags = renderer.get('keywords', '').split()

4320

4321

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4322

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4323

def _get_uncropped(url):

4324

return url_or_none((url or '').split('=')[0] + '=s0')

4325

4326

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4327

if avatar_thumbnails:

4328

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4329

if uncropped_avatar:

4330

avatar_thumbnails.append({

4331

'url': uncropped_avatar,

4332

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4337

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4338

for banner in channel_banners:

4339

banner['preference'] = -10

4340

4341

if channel_banners:

4342

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4343

if uncropped_banner:

4344

channel_banners.append({

4345

'url': uncropped_banner,

4346

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4351

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4352

4353

if playlist_id is None:

4354

playlist_id = item_id

4355

4356

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4357

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4358

if title is None:

4359

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4360

title += format_field(selected_tab, 'title', ' - %s')

4361

title += format_field(selected_tab, 'expandedText', ' - %s')

4362

4363

metadata = {

4364

'playlist_id': playlist_id,

4365

'playlist_title': title,

4366

'playlist_description': description,

4367

'uploader': channel_name,

4368

'uploader_id': channel_id,

4369

'uploader_url': channel_url,

4370

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4371

'tags': tags,

4372

'view_count': self._get_count(playlist_stats, 1),

4373

'availability': self._extract_availability(data),

4374

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4375

'playlist_count': self._get_count(playlist_stats, 0),

4376

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4377

}

4378

if not channel_id:

4379

metadata.update(self._extract_uploader(data))

4380

metadata.update({

4381

'channel': metadata['uploader'],

4382

'channel_id': metadata['uploader_id'],

4383

'channel_url': metadata['uploader_url']})

4384

return self.playlist_result(

4385

self._entries(

4386

selected_tab, playlist_id, ytcfg,

4387

self._extract_account_syncid(ytcfg, data),

4388

self._extract_visitor_data(data, ytcfg)),

4389

**metadata)

4390

4391

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4392

first_id = last_id = response = None

4393

for page_num in itertools.count(1):

4394

videos = list(self._playlist_entries(playlist))

4395

if not videos:

4396

return

4397

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4398

if start >= len(videos):

4399

return

4400

yield from videos[start:]

4401

first_id = first_id or videos[0]['id']

4402

last_id = videos[-1]['id']

4403

watch_endpoint = try_get(

4404

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4405

headers = self.generate_api_headers(

4406

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4407

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4408

query = {

4409

'playlistId': playlist_id,

4410

'videoId': watch_endpoint.get('videoId') or last_id,

4411

'index': watch_endpoint.get('index') or len(videos),

4412

'params': watch_endpoint.get('params') or 'OAE%3D'

4413

}

4414

response = self._extract_response(

4415

item_id='%s page %d' % (playlist_id, page_num),

4416

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4417

check_get_keys='contents'

4418

)

4419

playlist = try_get(

4420

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4421

4422

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4423

title = playlist.get('title') or try_get(

4424

data, lambda x: x['titleText']['simpleText'], str)

4425

playlist_id = playlist.get('playlistId') or item_id

4426

4427

# Delegating everything except mix playlists to regular tab-based playlist URL

4428

playlist_url = urljoin(url, try_get(

4429

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4430

str))

4431

4432

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4433

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4434

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4435

4436

if playlist_url and playlist_url != url and not is_known_unviewable:

4437

return self.url_result(

4438

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4439

video_title=title)

4440

4441

return self.playlist_result(

4442

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4443

playlist_id=playlist_id, playlist_title=title)

4444

4445

def _extract_availability(self, data):

4446

"""

4447

Gets the availability of a given playlist/tab.

4448

Note: Unless YouTube tells us explicitly, we do not assume it is public

4449

@param data: response

4450

"""

4451

is_private = is_unlisted = None

4452

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4453

badge_labels = self._extract_badges(renderer)

4454

4455

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4456

privacy_dropdown_entries = try_get(

4457

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4458

for renderer_dict in privacy_dropdown_entries:

4459

is_selected = try_get(

4460

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4461

if not is_selected:

4462

continue

4463

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4464

if label:

4465

badge_labels.add(label.lower())

4466

break

4467

4468

for badge_label in badge_labels:

4469

if badge_label == 'unlisted':

4470

is_unlisted = True

4471

elif badge_label == 'private':

4472

is_private = True

4473

elif badge_label == 'public':

4474

is_unlisted = is_private = False

4475

return self._availability(is_private, False, False, False, is_unlisted)

4476

4477

@staticmethod

4478

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4479

sidebar_renderer = try_get(

4480

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4481

for item in sidebar_renderer:

4482

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4487

"""

4488

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4489

"""

4490

browse_id = params = None

4491

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4492

if not renderer:

4493

return

4494

menu_renderer = try_get(

4495

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4496

for menu_item in menu_renderer:

4497

if not isinstance(menu_item, dict):

4498

continue

4499

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4500

text = try_get(

4501

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4502

if not text or text.lower() != 'show unavailable videos':

4503

continue

4504

browse_endpoint = try_get(

4505

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4506

browse_id = browse_endpoint.get('browseId')

4507

params = browse_endpoint.get('params')

4508

break

4509

4510

headers = self.generate_api_headers(

4511

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4512

visitor_data=self._extract_visitor_data(data, ytcfg))

4513

query = {

4514

'params': params or 'wgYCCAA=',

4515

'browseId': browse_id or 'VL%s' % item_id

4516

}

4517

return self._extract_response(

4518

item_id=item_id, headers=headers, query=query,

4519

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4520

note='Downloading API JSON with unavailable videos')

4521

4522

@functools.cached_property

4523

def skip_webpage(self):

4524

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4525

4526

def _extract_webpage(self, url, item_id, fatal=True):

4527

webpage, data = None, None

4528

for retry in self.RetryManager(fatal=fatal):

4529

try:

4530

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4531

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4532

except ExtractorError as e:

4533

if isinstance(e.cause, network_exceptions):

4534

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4535

retry.error = e

4536

continue

4537

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4542

except ExtractorError as e:

4543

self._error_or_warning(e, fatal=fatal)

4544

break

4545

4546

# Sometimes youtube returns a webpage with incomplete ytInitialData

4547

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4548

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4549

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4555

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4556

if not ytcfg and self.is_authenticated:

4557

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4558

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4559

raise ExtractorError(

4560

f'{msg}. If you are not downloading private content, or '

4561

'your cookies are only for the first account and channel,'

4562

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4563

expected=True)

4564

self.report_warning(msg, only_once=True)

4565

4566

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4567

data = None

4568

if not self.skip_webpage:

4569

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4570

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4571

# Reject webpage data if redirected to home page without explicitly requesting

4572

selected_tab = self._extract_selected_tab(traverse_obj(

4573

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4574

if (url != 'https://www.youtube.com/feed/recommended'

4575

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4576

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4577

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4578

if fatal:

4579

raise ExtractorError(msg, expected=True)

4580

self.report_warning(msg, only_once=True)

4581

if not data:

4582

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4583

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4584

return data, ytcfg

4585

4586

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4587

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4588

resolve_response = self._extract_response(

4589

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4590

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4591

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4592

for ep_key, ep in endpoints.items():

4593

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4594

if params:

4595

return self._extract_response(

4596

item_id=item_id, query=params, ep=ep, headers=headers,

4597

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4598

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4599

err_note = 'Failed to resolve url (does the playlist exist?)'

4600

if fatal:

4601

raise ExtractorError(err_note, expected=True)

4602

self.report_warning(err_note, item_id)

4603

4604

_SEARCH_PARAMS = None

4605

4606

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4607

data = {'query': query}

4608

if params is NO_DEFAULT:

4609

params = self._SEARCH_PARAMS

4610

if params:

4611

data['params'] = params

4612

4613

content_keys = (

4614

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4615

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4616

# ytmusic search

4617

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4618

('continuationContents', ),

4619

)

4620

display_id = f'query "{query}"'

4621

check_get_keys = tuple({keys[0] for keys in content_keys})

4622

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4623

self._report_playlist_authcheck(ytcfg, fatal=False)

4624

4625

continuation_list = [None]

4626

search = None

4627

for page_num in itertools.count(1):

4628

data.update(continuation_list[0] or {})

4629

headers = self.generate_api_headers(

4630

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4631

search = self._extract_response(

4632

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4633

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4634

slr_contents = traverse_obj(search, *content_keys)

4635

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4636

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4641

IE_DESC = 'YouTube Tabs'

4642

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4651

(?P<not_channel>

4652

feed/|hashtag/|

4653

(?:playlist|watch)\?.*?\blist=

4654

)|

4655

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4660

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4661

}

4662

IE_NAME = 'youtube:tab'

4663

4664

_TESTS = [{

4665

'note': 'playlists, multipage',

4666

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4667

'playlist_mincount': 94,

4668

'info_dict': {

4669

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4670

'title': 'Igor Kleiner - Playlists',

4671

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4672

'uploader': 'Igor Kleiner',

4673

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4674

'channel': 'Igor Kleiner',

4675

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4676

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4677

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4678

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4679

'channel_follower_count': int

4680

},

4681

}, {

4682

'note': 'playlists, multipage, different order',

4683

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4684

'playlist_mincount': 94,

4685

'info_dict': {

4686

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4687

'title': 'Igor Kleiner - Playlists',

4688

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4689

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4690

'uploader': 'Igor Kleiner',

4691

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4692

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4693

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4694

'channel': 'Igor Kleiner',

4695

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4696

'channel_follower_count': int

4697

},

4698

}, {

4699

'note': 'playlists, series',

4700

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4701

'playlist_mincount': 5,

4702

'info_dict': {

4703

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4704

'title': '3Blue1Brown - Playlists',

4705

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4706

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4707

'uploader': '3Blue1Brown',

4708

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4709

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4710

'channel': '3Blue1Brown',

4711

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4712

'tags': ['Mathematics'],

4713

'channel_follower_count': int

4714

},

4715

}, {

4716

'note': 'playlists, singlepage',

4717

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4718

'playlist_mincount': 4,

4719

'info_dict': {

4720

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4721

'title': 'ThirstForScience - Playlists',

4722

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4723

'uploader': 'ThirstForScience',

4724

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4725

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4726

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4727

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4728

'tags': 'count:13',

4729

'channel': 'ThirstForScience',

4730

'channel_follower_count': int

4731

}

4732

}, {

4733

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4734

'only_matching': True,

4735

}, {

4736

'note': 'basic, single video playlist',

4737

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4738

'info_dict': {

4739

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4740

'uploader': 'Sergey M.',

4741

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4742

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4747

'channel': 'Sergey M.',

4748

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4749

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4750

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4755

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4756

'info_dict': {

4757

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4758

'uploader': 'Sergey M.',

4759

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4760

'title': 'youtube-dl empty playlist',

4761

'tags': [],

4762

'channel': 'Sergey M.',

4763

'description': '',

4764

'modified_date': '20160902',

4765

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4766

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4767

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4773

'info_dict': {

4774

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4775

'title': 'lex will - Home',

4776

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4777

'uploader': 'lex will',

4778

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4779

'channel': 'lex will',

4780

'tags': ['bible', 'history', 'prophesy'],

4781

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4782

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4783

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4784

'channel_follower_count': int

4785

},

4786

'playlist_mincount': 2,

4787

}, {

4788

'note': 'Videos tab',

4789

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4790

'info_dict': {

4791

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4792

'title': 'lex will - Videos',

4793

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4794

'uploader': 'lex will',

4795

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4796

'tags': ['bible', 'history', 'prophesy'],

4797

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4798

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4799

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4800

'channel': 'lex will',

4801

'channel_follower_count': int

4802

},

4803

'playlist_mincount': 975,

4804

}, {

4805

'note': 'Videos tab, sorted by popular',

4806

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4807

'info_dict': {

4808

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4809

'title': 'lex will - Videos',

4810

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4811

'uploader': 'lex will',

4812

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4813

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4814

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4815

'channel': 'lex will',

4816

'tags': ['bible', 'history', 'prophesy'],

4817

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4818

'channel_follower_count': int

4819

},

4820

'playlist_mincount': 199,

4821

}, {

4822

'note': 'Playlists tab',

4823

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4824

'info_dict': {

4825

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4826

'title': 'lex will - Playlists',

4827

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4828

'uploader': 'lex will',

4829

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4830

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4831

'channel': 'lex will',

4832

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4833

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4834

'tags': ['bible', 'history', 'prophesy'],

4835

'channel_follower_count': int

4836

},

4837

'playlist_mincount': 17,

4838

}, {

4839

'note': 'Community tab',

4840

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4841

'info_dict': {

4842

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4843

'title': 'lex will - Community',

4844

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4845

'uploader': 'lex will',

4846

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4847

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4848

'channel': 'lex will',

4849

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4850

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4851

'tags': ['bible', 'history', 'prophesy'],

4852

'channel_follower_count': int

4853

},

4854

'playlist_mincount': 18,

4855

}, {

4856

'note': 'Channels tab',

4857

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4858

'info_dict': {

4859

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4860

'title': 'lex will - Channels',

4861

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4862

'uploader': 'lex will',

4863

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4864

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4865

'channel': 'lex will',

4866

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4867

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4868

'tags': ['bible', 'history', 'prophesy'],

4869

'channel_follower_count': int

4870

},

4871

'playlist_mincount': 12,

4872

}, {

4873

'note': 'Search tab',

4874

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4875

'playlist_mincount': 40,

4876

'info_dict': {

4877

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4878

'title': '3Blue1Brown - Search - linear algebra',

4879

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4880

'uploader': '3Blue1Brown',

4881

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4882

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4883

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4884

'tags': ['Mathematics'],

4885

'channel': '3Blue1Brown',

4886

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4887

'channel_follower_count': int

4888

},

4889

}, {

4890

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4891

'only_matching': True,

4892

}, {

4893

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4894

'only_matching': True,

4895

}, {

4896

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4897

'only_matching': True,

4898

}, {

4899

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4900

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4901

'info_dict': {

4902

'title': '29C3: Not my department',

4903

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4904

'uploader': 'Christiaan008',

4905

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4906

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4907

'tags': [],

4908

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4909

'view_count': int,

4910

'modified_date': '20150605',

4911

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4912

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4913

'channel': 'Christiaan008',

4914

},

4915

'playlist_count': 96,

4916

}, {

4917

'note': 'Large playlist',

4918

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4919

'info_dict': {

4920

'title': 'Uploads from Cauchemar',

4921

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4922

'uploader': 'Cauchemar',

4923

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4924

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4925

'tags': [],

4926

'modified_date': r're:\d{8}',

4927

'channel': 'Cauchemar',

4928

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4929

'view_count': int,

4930

'description': '',

4931

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4932

},

4933

'playlist_mincount': 1123,

4934

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4935

}, {

4936

'note': 'even larger playlist, 8832 videos',

4937

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4938

'only_matching': True,

4939

}, {

4940

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4941

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4942

'info_dict': {

4943

'title': 'Uploads from Interstellar Movie',

4944

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4945

'uploader': 'Interstellar Movie',

4946

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4947

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4948

'tags': [],

4949

'view_count': int,

4950

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4951

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4952

'channel': 'Interstellar Movie',

4953

'description': '',

4954

'modified_date': r're:\d{8}',

4955

},

4956

'playlist_mincount': 21,

4957

}, {

4958

'note': 'Playlist with "show unavailable videos" button',

4959

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4960

'info_dict': {

4961

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4962

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4963

'uploader': 'Phim Siêu Nhân Nhật Bản',

4964

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4965

'view_count': int,

4966

'channel': 'Phim Siêu Nhân Nhật Bản',

4967

'tags': [],

4968

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4969

'description': '',

4970

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4971

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4972

'modified_date': r're:\d{8}',

4973

},

4974

'playlist_mincount': 200,

4975

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4976

}, {

4977

'note': 'Playlist with unavailable videos in page 7',

4978

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4979

'info_dict': {

4980

'title': 'Uploads from BlankTV',

4981

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4982

'uploader': 'BlankTV',

4983

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4984

'channel': 'BlankTV',

4985

'channel_url': 'https://www.youtube.com/c/blanktv',

4986

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4987

'view_count': int,

4988

'tags': [],

4989

'uploader_url': 'https://www.youtube.com/c/blanktv',

4990

'modified_date': r're:\d{8}',

4991

'description': '',

4992

},

4993

'playlist_mincount': 1000,

4994

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4995

}, {

4996

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4997

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4998

'info_dict': {

4999

'title': 'Data Analysis with Dr Mike Pound',

5000

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

5001

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5002

'uploader': 'Computerphile',

5003

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

5004

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5005

'tags': [],

5006

'view_count': int,

5007

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5008

'channel_url': 'https://www.youtube.com/user/Computerphile',

5009

'channel': 'Computerphile',

5010

},

5011

'playlist_mincount': 11,

5012

}, {

5013

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5014

'only_matching': True,

5015

}, {

5016

'note': 'Playlist URL that does not actually serve a playlist',

5017

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5022

'uploader': 'STREEM',

5023

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5024

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5025

'upload_date': '20150526',

5026

'license': 'Standard YouTube License',

5027

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5028

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5035

},

5036

'skip': 'This video is not available.',

5037

'add_ie': [YoutubeIE.ie_key()],

5038

}, {

5039

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5040

'only_matching': True,

5041

}, {

5042

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5043

'only_matching': True,

5044

}, {

5045

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5046

'info_dict': {

5047

'id': 'Wq15eF5vCbI', # This will keep changing

5048

'ext': 'mp4',

5049

'title': str,

5050

'uploader': 'Sky News',

5051

'uploader_id': 'skynews',

5052

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5053

'upload_date': r're:\d{8}',

5054

'description': str,

5055

'categories': ['News & Politics'],

5056

'tags': list,

5057

'like_count': int,

5058

'release_timestamp': 1642502819,

5059

'channel': 'Sky News',

5060

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5061

'age_limit': 0,

5062

'view_count': int,

5063

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5064

'playable_in_embed': True,

5065

'release_date': '20220118',

5066

'availability': 'public',

5067

'live_status': 'is_live',

5068

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5069

'channel_follower_count': int

5070

},

5071

'params': {

5072

'skip_download': True,

5073

},

5074

'expected_warnings': ['Ignoring subtitle tracks found in '],

5075

}, {

5076

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5081

'uploader': 'The Young Turks',

5082

'uploader_id': 'TheYoungTurks',

5083

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5084

'upload_date': '20150715',

5085

'license': 'Standard YouTube License',

5086

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5087

'categories': ['News & Politics'],

5088

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5093

},

5094

'only_matching': True,

5095

}, {

5096

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5097

'only_matching': True,

5098

}, {

5099

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5100

'only_matching': True,

5101

}, {

5102

'note': 'A channel that is not live. Should raise error',

5103

'url': 'https://www.youtube.com/user/numberphile/live',

5104

'only_matching': True,

5105

}, {

5106

'url': 'https://www.youtube.com/feed/trending',

5107

'only_matching': True,

5108

}, {

5109

'url': 'https://www.youtube.com/feed/library',

5110

'only_matching': True,

5111

}, {

5112

'url': 'https://www.youtube.com/feed/history',

5113

'only_matching': True,

5114

}, {

5115

'url': 'https://www.youtube.com/feed/subscriptions',

5116

'only_matching': True,

5117

}, {

5118

'url': 'https://www.youtube.com/feed/watch_later',

5119

'only_matching': True,

5120

}, {

5121

'note': 'Recommended - redirects to home page.',

5122

'url': 'https://www.youtube.com/feed/recommended',

5123

'only_matching': True,

5124

}, {

5125

'note': 'inline playlist with not always working continuations',

5126

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5127

'only_matching': True,

5128

}, {

5129

'url': 'https://www.youtube.com/course',

5130

'only_matching': True,

5131

}, {

5132

'url': 'https://www.youtube.com/zsecurity',

5133

'only_matching': True,

5134

}, {

5135

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5136

'only_matching': True,

5137

}, {

5138

'url': 'https://www.youtube.com/TheYoungTurks/live',

5139

'only_matching': True,

5140

}, {

5141

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5148

}, {

5149

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5150

'only_matching': True,

5151

}, {

5152

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5153

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5154

'only_matching': True

5155

}, {

5156

'note': '/browse/ should redirect to /channel/',

5157

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5158

'only_matching': True

5159

}, {

5160

'note': 'VLPL, should redirect to playlist?list=PL...',

5161

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5162

'info_dict': {

5163

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5164

'uploader': 'NoCopyrightSounds',

5165

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5166

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5167

'title': 'NCS : All Releases 💿',

5168

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5169

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5170

'modified_date': r're:\d{8}',

5171

'view_count': int,

5172

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5173

'tags': [],

5174

'channel': 'NoCopyrightSounds',

5175

},

5176

'playlist_mincount': 166,

5177

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5178

}, {

5179

'note': 'Topic, should redirect to playlist?list=UU...',

5180

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5181

'info_dict': {

5182

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5183

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5184

'title': 'Uploads from Royalty Free Music - Topic',

5185

'uploader': 'Royalty Free Music - Topic',

5186

'tags': [],

5187

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5188

'channel': 'Royalty Free Music - Topic',

5189

'view_count': int,

5190

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5191

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5192

'modified_date': r're:\d{8}',

5193

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5194

'description': '',

5195

},

5196

'expected_warnings': [

5197

'The URL does not have a videos tab',

5198

r'[Uu]navailable videos (are|will be) hidden',

5199

],

5200

'playlist_mincount': 101,

5201

}, {

5202

'note': 'Topic without a UU playlist',

5203

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5204

'info_dict': {

5205

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5206

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5207

'tags': [],

5208

},

5209

'expected_warnings': [

5210

'the playlist redirect gave error',

5211

],

5212

'playlist_mincount': 9,

5213

}, {

5214

'note': 'Youtube music Album',

5215

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5216

'info_dict': {

5217

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5218

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5223

'modified_date': r're:\d{8}',

5224

},

5225

'playlist_count': 50,

5226

}, {

5227

'note': 'unlisted single video playlist',

5228

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5229

'info_dict': {

5230

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5231

'uploader': 'colethedj',

5232

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5233

'title': 'yt-dlp unlisted playlist test',

5234

'availability': 'unlisted',

5235

'tags': [],

5236

'modified_date': '20220418',

5237

'channel': 'colethedj',

5238

'view_count': int,

5239

'description': '',

5240

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5241

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5242

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5247

'url': 'https://www.youtube.com/feed/recommended',

5248

'info_dict': {

5249

'id': 'recommended',

5250

'title': 'recommended',

5251

'tags': [],

5252

},

5253

'playlist_mincount': 50,

5254

'params': {

5255

'skip_download': True,

5256

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5257

},

5258

}, {

5259

'note': 'API Fallback: /videos tab, sorted by oldest first',

5260

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5261

'info_dict': {

5262

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5263

'title': 'Cody\'sLab - Videos',

5264

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5265

'uploader': 'Cody\'sLab',

5266

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5267

'channel': 'Cody\'sLab',

5268

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5269

'tags': [],

5270

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5271

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5272

'channel_follower_count': int

5273

},

5274

'playlist_mincount': 650,

5275

'params': {

5276

'skip_download': True,

5277

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5278

},

5279

}, {

5280

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5281

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5282

'info_dict': {

5283

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5284

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5285

'title': 'Uploads from Royalty Free Music - Topic',

5286

'uploader': 'Royalty Free Music - Topic',

5287

'modified_date': r're:\d{8}',

5288

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5289

'description': '',

5290

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5291

'tags': [],

5292

'channel': 'Royalty Free Music - Topic',

5293

'view_count': int,

5294

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5295

},

5296

'expected_warnings': [

5297

'does not have a videos tab',

5298

r'[Uu]navailable videos (are|will be) hidden',

5299

],

5300

'playlist_mincount': 101,

5301

'params': {

5302

'skip_download': True,

5303

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5304

},

5305

}, {

5306

'note': 'non-standard redirect to regional channel',

5307

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5308

'only_matching': True

5309

}, {

5310

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5311

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5312

'info_dict': {

5313

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5314

'modified_date': '20220407',

5315

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5316

'tags': [],

5317

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5318

'uploader': 'pukkandan',

5319

'availability': 'unlisted',

5320

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5321

'channel': 'pukkandan',

5322

'description': 'Test for collaborative playlist',

5323

'title': 'yt-dlp test - collaborative playlist',

5324

'view_count': int,

5325

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5326

},

5327

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5332

return False if YoutubeIE.suitable(url) else super().suitable(url)

5333

5334

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5335

5336

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5337

def _real_extract(self, url, smuggled_data):

5338

item_id = self._match_id(url)

5339

url = urllib.parse.urlunparse(

5340

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5341

compat_opts = self.get_param('compat_opts', [])

5342

5343

def get_mobj(url):

5344

mobj = self._URL_RE.match(url).groupdict()

5345

mobj.update((k, '') for k, v in mobj.items() if v is None)

5346

return mobj

5347

5348

mobj, redirect_warning = get_mobj(url), None

5349

# Youtube returns incomplete data if tabname is not lower case

5350

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5351

if is_channel:

5352

if smuggled_data.get('is_music_url'):

5353

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5354

item_id = item_id[2:]

5355

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5356

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5357

mdata = self._extract_tab_endpoint(

5358

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5359

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5360

get_all=False, expected_type=str)

5361

if not murl:

5362

raise ExtractorError('Failed to resolve album to playlist')

5363

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5364

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5365

pre = f'https://www.youtube.com/channel/{item_id}'

5366

5367

original_tab_name = tab

5368

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5369

# Home URLs should redirect to /videos/

5370

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5371

'To download only the videos in the home page, add a "/featured" to the URL')

5372

tab = '/videos'

5373

5374

url = ''.join((pre, tab, post))

5375

mobj = get_mobj(url)

5376

5377

# Handle both video/playlist URLs

5378

qs = parse_qs(url)

5379

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5380

5381

if not video_id and mobj['not_channel'].startswith('watch'):

5382

if not playlist_id:

5383

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5384

raise ExtractorError('Unable to recognize tab page')

5385

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5386

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5387

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5388

mobj = get_mobj(url)

5389

5390

if video_id and playlist_id:

5391

if self.get_param('noplaylist'):

5392

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5393

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5394

ie=YoutubeIE.ie_key(), video_id=video_id)

5395

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5396

5397

data, ytcfg = self._extract_data(url, item_id)

5398

5399

# YouTube may provide a non-standard redirect to the regional channel

5400

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5401

redirect_url = traverse_obj(

5402

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5403

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5404

redirect_url = ''.join((

5405

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5406

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5407

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5408

5409

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5410

if tabs:

5411

selected_tab = self._extract_selected_tab(tabs)

5412

selected_tab_name = selected_tab.get('title', '').lower()

5413

if selected_tab_name == 'home':

5414

selected_tab_name = 'featured'

5415

requested_tab_name = mobj['tab'][1:]

5416

if 'no-youtube-channel-redirect' not in compat_opts:

5417

if requested_tab_name == 'live': # Live tab should have redirected to the video

5418

raise UserNotLive(video_id=mobj['id'])

5419

if requested_tab_name not in ('', selected_tab_name):

5420

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5421

if not original_tab_name:

5422

if item_id[:2] == 'UC':

5423

# Topic channels don't have /videos. Use the equivalent playlist instead

5424

pl_id = f'UU{item_id[2:]}'

5425

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5426

try:

5427

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5428

except ExtractorError:

5429

redirect_warning += ' and the playlist redirect gave error'

5430

else:

5431

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5432

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5433

if selected_tab_name and selected_tab_name != requested_tab_name:

5434

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5435

else:

5436

raise ExtractorError(redirect_warning, expected=True)

5437

5438

if redirect_warning:

5439

self.to_screen(redirect_warning)

5440

self.write_debug(f'Final URL: {url}')

5441

5442

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5443

if 'no-youtube-unavailable-videos' not in compat_opts:

5444

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5445

self._extract_and_report_alerts(data, only_once=True)

5446

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5447

if tabs:

5448

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5449

5450

playlist = traverse_obj(

5451

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5452

if playlist:

5453

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5454

5455

video_id = traverse_obj(

5456

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5457

if video_id:

5458

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5459

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5460

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5461

ie=YoutubeIE.ie_key(), video_id=video_id)

5462

5463

raise ExtractorError('Unable to recognize tab page')

5464

5465

5466

class YoutubePlaylistIE(InfoExtractor):

5467

IE_DESC = 'YouTube playlists'

5468

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5479

)''' % {

5480

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5481

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5482

}

5483

IE_NAME = 'youtube:playlist'

5484

_TESTS = [{

5485

'note': 'issue #673',

5486

'url': 'PLBB231211A4F62143',

5487

'info_dict': {

5488

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5489

'id': 'PLBB231211A4F62143',

5490

'uploader': 'Wickman',

5491

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5492

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5493

'view_count': int,

5494

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5495

'modified_date': r're:\d{8}',

5496

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5497

'channel': 'Wickman',

5498

'tags': [],

5499

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5500

},

5501

'playlist_mincount': 29,

5502

}, {

5503

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5504

'info_dict': {

5505

'title': 'YDL_safe_search',

5506

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5507

},

5508

'playlist_count': 2,

5509

'skip': 'This playlist is private',

5510

}, {

5511

'note': 'embedded',

5512

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5517

'uploader': 'milan',

5518

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5519

'description': '',

5520

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5521

'tags': [],

5522

'modified_date': '20140919',

5523

'view_count': int,

5524

'channel': 'milan',

5525

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5526

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5527

},

5528

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5529

}, {

5530

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5531

'playlist_mincount': 455,

5532

'info_dict': {

5533

'title': '2018 Chinese New Singles (11/6 updated)',

5534

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5535

'uploader': 'LBK',

5536

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5537

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5538

'channel': 'LBK',

5539

'view_count': int,

5540

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5541

'tags': [],

5542

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5543

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5544

'modified_date': r're:\d{8}',

5545

},

5546

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5547

}, {

5548

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5549

'only_matching': True,

5550

}, {

5551

# music album playlist

5552

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5553

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5558

if YoutubeTabIE.suitable(url):

5559

return False

5560

from ..utils import parse_qs

5561

qs = parse_qs(url)

5562

if qs.get('v', [None])[0]:

5563

return False

5564

return super().suitable(url)

5565

5566

def _real_extract(self, url):

5567

playlist_id = self._match_id(url)

5568

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5569

url = update_url_query(

5570

'https://www.youtube.com/playlist',

5571

parse_qs(url) or {'list': playlist_id})

5572

if is_music_url:

5573

url = smuggle_url(url, {'is_music_url': True})

5574

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5575

5576

5577

class YoutubeYtBeIE(InfoExtractor):

5578

IE_DESC = 'youtu.be'

5579

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5580

_TESTS = [{

5581

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5586

'uploader': 'Backus-Page House Museum',

5587

'uploader_id': 'backuspagemuseum',

5588

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5589

'upload_date': '20161008',

5590

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5591

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5596

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5597

'channel': 'Backus-Page House Museum',

5598

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5599

'live_status': 'not_live',

5600

'view_count': int,

5601

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5602

'availability': 'public',

5603

'duration': 59,

5604

'comment_count': int,

5605

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

5610

},

5611

}, {

5612

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5613

'only_matching': True,

5614

}]

5615

5616

def _real_extract(self, url):

5617

mobj = self._match_valid_url(url)

5618

video_id = mobj.group('id')

5619

playlist_id = mobj.group('playlist_id')

5620

return self.url_result(

5621

update_url_query('https://www.youtube.com/watch', {

5622

'v': video_id,

5623

'list': playlist_id,

5624

'feature': 'youtu.be',

5625

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5626

5627

5628

class YoutubeLivestreamEmbedIE(InfoExtractor):

5629

IE_DESC = 'YouTube livestream embeds'

5630

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5631

_TESTS = [{

5632

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5633

'only_matching': True,

5634

}]

5635

5636

def _real_extract(self, url):

5637

channel_id = self._match_id(url)

5638

return self.url_result(

5639

f'https://www.youtube.com/channel/{channel_id}/live',

5640

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5641

5642

5643

class YoutubeYtUserIE(InfoExtractor):

5644

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5645

IE_NAME = 'youtube:user'

5646

_VALID_URL = r'ytuser:(?P<id>.+)'

5647

_TESTS = [{

5648

'url': 'ytuser:phihag',

5649

'only_matching': True,

5650

}]

5651

5652

def _real_extract(self, url):

5653

user_id = self._match_id(url)

5654

return self.url_result(

5655

'https://www.youtube.com/user/%s/videos' % user_id,

5656

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5657

5658

5659

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5660

IE_NAME = 'youtube:favorites'

5661

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5662

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5663

_LOGIN_REQUIRED = True

5664

_TESTS = [{

5665

'url': ':ytfav',

5666

'only_matching': True,

5667

}, {

5668

'url': ':ytfavorites',

5669

'only_matching': True,

5670

}]

5671

5672

def _real_extract(self, url):

5673

return self.url_result(

5674

'https://www.youtube.com/playlist?list=LL',

5675

ie=YoutubeTabIE.ie_key())

5676

5677

5678

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5679

IE_NAME = 'youtube:notif'

5680

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5681

_VALID_URL = r':ytnotif(?:ication)?s?'

5682

_LOGIN_REQUIRED = True

5683

_TESTS = [{

5684

'url': ':ytnotif',

5685

'only_matching': True,

5686

}, {

5687

'url': ':ytnotifications',

5688

'only_matching': True,

5689

}]

5690

5691

def _extract_notification_menu(self, response, continuation_list):

5692

notification_list = traverse_obj(

5693

response,

5694

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5695

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5696

expected_type=list) or []

5697

continuation_list[0] = None

5698

for item in notification_list:

5699

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5700

if entry:

5701

yield entry

5702

continuation = item.get('continuationItemRenderer')

5703

if continuation:

5704

continuation_list[0] = continuation

5705

5706

def _extract_notification_renderer(self, notification):

5707

video_id = traverse_obj(

5708

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5709

url = f'https://www.youtube.com/watch?v={video_id}'

5710

channel_id = None

5711

if not video_id:

5712

browse_ep = traverse_obj(

5713

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5714

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5715

post_id = self._search_regex(

5716

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5717

'post id', default=None)

5718

if not channel_id or not post_id:

5719

return

5720

# The direct /post url redirects to this in the browser

5721

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5722

5723

channel = traverse_obj(

5724

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5725

expected_type=str)

5726

notification_title = self._get_text(notification, 'shortMessage')

5727

if notification_title:

5728

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5729

# TODO: handle recommended videos

5730

title = self._search_regex(

5731

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5732

'video title', default=None)

5733

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5734

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5740

'video_id': video_id,

5741

'title': title,

5742

'channel_id': channel_id,

5743

'channel': channel,

5744

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5745

'upload_date': upload_date,

5746

}

5747

5748

def _notification_menu_entries(self, ytcfg):

5749

continuation_list = [None]

5750

response = None

5751

for page in itertools.count(1):

5752

ctoken = traverse_obj(

5753

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5754

response = self._extract_response(

5755

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5756

ep='notification/get_notification_menu', check_get_keys='actions',

5757

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5758

yield from self._extract_notification_menu(response, continuation_list)

5759

if not continuation_list[0]:

5760

break

5761

5762

def _real_extract(self, url):

5763

display_id = 'notifications'

5764

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5765

self._report_playlist_authcheck(ytcfg)

5766

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5767

5768

5769

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5770

IE_DESC = 'YouTube search'

5771

IE_NAME = 'youtube:search'

5772

_SEARCH_KEY = 'ytsearch'

5773

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5774

_TESTS = [{

5775

'url': 'ytsearch5:youtube-dl test video',

5776

'playlist_count': 5,

5777

'info_dict': {

5778

'id': 'youtube-dl test video',

5779

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5785

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5786

_SEARCH_KEY = 'ytsearchdate'

5787

IE_DESC = 'YouTube search, newest videos first'

5788

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5789

_TESTS = [{

5790

'url': 'ytsearchdate5:youtube-dl test video',

5791

'playlist_count': 5,

5792

'info_dict': {

5793

'id': 'youtube-dl test video',

5794

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5800

IE_DESC = 'YouTube search URLs with sorting and filter support'

5801

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5802

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5803

_TESTS = [{

5804

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5805

'playlist_mincount': 5,

5806

'info_dict': {

5807

'id': 'youtube-dl test video',

5808

'title': 'youtube-dl test video',

5809

}

5810

}, {

5811

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5812

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5819

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

5824

# 'entries': [{

5825

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5831

'only_matching': True,

5832

}]

5833

5834

def _real_extract(self, url):

5835

qs = parse_qs(url)

5836

query = (qs.get('search_query') or qs.get('q'))[0]

5837

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5838

5839

5840

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5841

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

5842

IE_NAME = 'youtube:music:search_url'

5843

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5844

_TESTS = [{

5845

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5846

'playlist_count': 16,

5847

'info_dict': {

5848

'id': 'royalty free music',

5849

'title': 'royalty free music',

5850

}

5851

}, {

5852

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5853

'playlist_mincount': 30,

5854

'info_dict': {

5855

'id': 'royalty free music - songs',

5856

'title': 'royalty free music - songs',

5857

},

5858

'params': {'extract_flat': 'in_playlist'}

5859

}, {

5860

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5861

'playlist_mincount': 30,

5862

'info_dict': {

5863

'id': 'royalty free music - community playlists',

5864

'title': 'royalty free music - community playlists',

5865

},

5866

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5871

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5872

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5873

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5874

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5875

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5876

}

5877

5878

def _real_extract(self, url):

5879

qs = parse_qs(url)

5880

query = (qs.get('search_query') or qs.get('q'))[0]

5881

params = qs.get('sp', (None,))[0]

5882

if params:

5883

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5884

else:

5885

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

5886

params = self._SECTIONS.get(section)

5887

if not params:

5888

section = None

5889

title = join_nonempty(query, section, delim=' - ')

5890

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5891

5892

5893

class YoutubeFeedsInfoExtractor(InfoExtractor):

5894

"""

5895

Base class for feed extractors

5896

Subclasses must re-define the _FEED_NAME property.

5897

"""

5898

_LOGIN_REQUIRED = True

5899

_FEED_NAME = 'feeds'

5900

5901

def _real_initialize(self):

5902

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5907

5908

def _real_extract(self, url):

5909

return self.url_result(

5910

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5911

5912

5913

class YoutubeWatchLaterIE(InfoExtractor):

5914

IE_NAME = 'youtube:watchlater'

5915

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5916

_VALID_URL = r':ytwatchlater'

5917

_TESTS = [{

5918

'url': ':ytwatchlater',

5919

'only_matching': True,

5920

}]

5921

5922

def _real_extract(self, url):

5923

return self.url_result(

5924

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5925

5926

5927

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5928

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5929

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5930

_FEED_NAME = 'recommended'

5931

_LOGIN_REQUIRED = False

5932

_TESTS = [{

5933

'url': ':ytrec',

5934

'only_matching': True,

5935

}, {

5936

'url': ':ytrecommended',

5937

'only_matching': True,

5938

}, {

5939

'url': 'https://youtube.com',

5940

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5945

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5946

_VALID_URL = r':ytsub(?:scription)?s?'

5947

_FEED_NAME = 'subscriptions'

5948

_TESTS = [{

5949

'url': ':ytsubs',

5950

'only_matching': True,

5951

}, {

5952

'url': ':ytsubscriptions',

5953

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5958

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5959

_VALID_URL = r':ythis(?:tory)?'

5960

_FEED_NAME = 'history'

5961

_TESTS = [{

5962

'url': ':ythistory',

5963

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5968

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5969

IE_NAME = 'youtube:stories'

5970

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5971

_TESTS = [{

5972

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5973

'only_matching': True,

5974

}]

5975

5976

def _real_extract(self, url):

5977

playlist_id = f'RLTD{self._match_id(url)}'

5978

return self.url_result(

5979

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5980

ie=YoutubeTabIE, video_id=playlist_id)

5981

5982

5983

class YoutubeTruncatedURLIE(InfoExtractor):

5984

IE_NAME = 'youtube:truncated_url'

5985

IE_DESC = False # Do not list

5986

_VALID_URL = r'''(?x)

5987

(?:https?://)?

5988

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5989

(?:watch\?(?:

5990

feature=[a-z_]+|

5991

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

6004

'only_matching': True,

6005

}, {

6006

'url': 'https://www.youtube.com/watch?',

6007

'only_matching': True,

6008

}, {

6009

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6010

'only_matching': True,

6011

}, {

6012

'url': 'https://www.youtube.com/watch?feature=foo',

6013

'only_matching': True,

6014

}, {

6015

'url': 'https://www.youtube.com/watch?hl=en-GB',

6016

'only_matching': True,

6017

}, {

6018

'url': 'https://www.youtube.com/watch?t=2372',

6019

'only_matching': True,

6020

}]

6021

6022

def _real_extract(self, url):

6023

raise ExtractorError(

6024

'Did you forget to quote the URL? Remember that & is a meta '

6025

'character in most shells, so you want to put the URL in quotes, '

6026

'like youtube-dl '

6027

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6028

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6033

IE_NAME = 'youtube:clip'

6034

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6035

_TESTS = [{

6036

# FIXME: Other metadata should be extracted from the clip, not from the base video

6037

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6038

'info_dict': {

6039

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6040

'ext': 'mp4',

6041

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6046

'categories': ['Gaming'],

6047

'channel': 'Scott The Woz',

6048

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6049

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6050

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6051

'like_count': int,

6052

'playable_in_embed': True,

6053

'tags': 'count:17',

6054

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6055

'title': 'Mobile Games on Console - Scott The Woz',

6056

'upload_date': '20210920',

6057

'uploader': 'Scott The Woz',

6058

'uploader_id': 'scottthewoz',

6059

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6060

'view_count': int,

6061

'live_status': 'not_live',

6062

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6067

clip_id = self._match_id(url)

6068

_, data = self._extract_webpage(url, clip_id)

6069

6070

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6071

if not video_id:

6072

raise ExtractorError('Unable to find video ID')

6073

6074

clip_data = traverse_obj(data, (

6075

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6076

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6077

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6078

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6079

6080

return {

6081

'_type': 'url_transparent',

6082

'url': f'https://www.youtube.com/watch?v={video_id}',

6083

'ie_key': YoutubeIE.ie_key(),

6084

'id': clip_id,

6085

'section_start': int(clip_data['startTimeMs']) / 1000,

6086

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6091

IE_NAME = 'youtube:truncated_id'

6092

IE_DESC = False # Do not list

6093

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6094

6095

_TESTS = [{

6096

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6097

'only_matching': True,

6098

}]

6099

6100

def _real_extract(self, url):

6101

video_id = self._match_id(url)

6102

raise ExtractorError(

6103

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6104

expected=True)