jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16	import urllib.error
	17	import urllib.parse
	18
	19	from .common import InfoExtractor, SearchInfoExtractor
	20	from ..compat import functools
	21	from ..jsinterp import JSInterpreter
	22	from ..utils import (
	23	NO_DEFAULT,
	24	ExtractorError,
	25	UserNotLive,
	26	bug_reports_message,
	27	classproperty,
	28	clean_html,
	29	datetime_from_str,
	30	dict_get,
	31	float_or_none,
	32	format_field,
	33	get_first,
	34	int_or_none,
	35	is_html,
	36	join_nonempty,
	37	js_to_json,
	38	mimetype2ext,
	39	network_exceptions,
	40	orderedSet,
	41	parse_codecs,
	42	parse_count,
	43	parse_duration,
	44	parse_iso8601,
	45	parse_qs,
	46	qualities,
	47	remove_start,
	48	smuggle_url,
	49	str_or_none,
	50	str_to_int,
	51	strftime_or_none,
	52	traverse_obj,
	53	try_get,
	54	unescapeHTML,
	55	unified_strdate,
	56	unified_timestamp,
	57	unsmuggle_url,
	58	update_url_query,
	59	url_or_none,
	60	urljoin,
	61	variadic,
	62	)
	63
	64	# any clients starting with _ cannot be explicitly requested by the user
	65	INNERTUBE_CLIENTS = {
	66	'web': {
	67	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	68	'INNERTUBE_CONTEXT': {
	69	'client': {
	70	'clientName': 'WEB',
	71	'clientVersion': '2.20220801.00.00',
	72	}
	73	},
	74	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	75	},
	76	'web_embedded': {
	77	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	78	'INNERTUBE_CONTEXT': {
	79	'client': {
	80	'clientName': 'WEB_EMBEDDED_PLAYER',
	81	'clientVersion': '1.20220731.00.00',
	82	},
	83	},
	84	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	85	},
	86	'web_music': {
	87	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	88	'INNERTUBE_HOST': 'music.youtube.com',
	89	'INNERTUBE_CONTEXT': {
	90	'client': {
	91	'clientName': 'WEB_REMIX',
	92	'clientVersion': '1.20220727.01.00',
	93	}
	94	},
	95	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	96	},
	97	'web_creator': {
	98	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	99	'INNERTUBE_CONTEXT': {
	100	'client': {
	101	'clientName': 'WEB_CREATOR',
	102	'clientVersion': '1.20220726.00.00',
	103	}
	104	},
	105	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	106	},
	107	'android': {
	108	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	109	'INNERTUBE_CONTEXT': {
	110	'client': {
	111	'clientName': 'ANDROID',
	112	'clientVersion': '17.29.34',
	113	'androidSdkVersion': 30
	114	}
	115	},
	116	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	117	'REQUIRE_JS_PLAYER': False
	118	},
	119	'android_embedded': {
	120	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	121	'INNERTUBE_CONTEXT': {
	122	'client': {
	123	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	124	'clientVersion': '17.29.34',
	125	'androidSdkVersion': 30
	126	},
	127	},
	128	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	129	'REQUIRE_JS_PLAYER': False
	130	},
	131	'android_music': {
	132	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	133	'INNERTUBE_CONTEXT': {
	134	'client': {
	135	'clientName': 'ANDROID_MUSIC',
	136	'clientVersion': '5.16.51',
	137	'androidSdkVersion': 30
	138	}
	139	},
	140	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	141	'REQUIRE_JS_PLAYER': False
	142	},
	143	'android_creator': {
	144	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	145	'INNERTUBE_CONTEXT': {
	146	'client': {
	147	'clientName': 'ANDROID_CREATOR',
	148	'clientVersion': '22.28.100',
	149	'androidSdkVersion': 30
	150	},
	151	},
	152	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	153	'REQUIRE_JS_PLAYER': False
	154	},
	155	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	156	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	157	'ios': {
	158	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	159	'INNERTUBE_CONTEXT': {
	160	'client': {
	161	'clientName': 'IOS',
	162	'clientVersion': '17.30.1',
	163	'deviceModel': 'iPhone14,3',
	164	}
	165	},
	166	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	167	'REQUIRE_JS_PLAYER': False
	168	},
	169	'ios_embedded': {
	170	'INNERTUBE_CONTEXT': {
	171	'client': {
	172	'clientName': 'IOS_MESSAGES_EXTENSION',
	173	'clientVersion': '17.30.1',
	174	'deviceModel': 'iPhone14,3',
	175	},
	176	},
	177	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	178	'REQUIRE_JS_PLAYER': False
	179	},
	180	'ios_music': {
	181	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	182	'INNERTUBE_CONTEXT': {
	183	'client': {
	184	'clientName': 'IOS_MUSIC',
	185	'clientVersion': '5.18',
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_creator': {
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_CREATOR',
	195	'clientVersion': '22.29.101',
	196	},
	197	},
	198	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	199	'REQUIRE_JS_PLAYER': False
	200	},
	201	# mweb has 'ultralow' formats
	202	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	203	'mweb': {
	204	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	205	'INNERTUBE_CONTEXT': {
	206	'client': {
	207	'clientName': 'MWEB',
	208	'clientVersion': '2.20220801.00.00',
	209	}
	210	},
	211	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	212	},
	213	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	214	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	215	'tv_embedded': {
	216	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	217	'INNERTUBE_CONTEXT': {
	218	'client': {
	219	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	220	'clientVersion': '2.0',
	221	},
	222	},
	223	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	224	},
	225	}
	226
	227
	228	def _split_innertube_client(client_name):
	229	variant, *base = client_name.rsplit('.', 1)
	230	if base:
	231	return variant, base[0], variant
	232	base, *variant = client_name.split('_', 1)
	233	return client_name, base, variant[0] if variant else None
	234
	235
	236	def build_innertube_clients():
	237	THIRD_PARTY = {
	238	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	239	}
	240	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	241	priority = qualities(BASE_CLIENTS[::-1])
	242
	243	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	244	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	245	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	246	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	247	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	248
	249	_, base_client, variant = _split_innertube_client(client)
	250	ytcfg['priority'] = 10 * priority(base_client)
	251
	252	if not variant:
	253	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	254	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	255	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	256	embedscreen['priority'] -= 3
	257	elif variant == 'embedded':
	258	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	259	ytcfg['priority'] -= 2
	260	else:
	261	ytcfg['priority'] -= 3
	262
	263
	264	build_innertube_clients()
	265
	266
	267	class YoutubeBaseInfoExtractor(InfoExtractor):
	268	"""Provide base functions for Youtube extractors"""
	269
	270	_RESERVED_NAMES = (
	271	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	272	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	273	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	274	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	275
	276	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	277
	278	# _NETRC_MACHINE = 'youtube'
	279
	280	# If True it will raise an error if no login info is provided
	281	_LOGIN_REQUIRED = False
	282
	283	_INVIDIOUS_SITES = (
	284	# invidious-redirect websites
	285	r'(?:www\.)?redirect\.invidious\.io',
	286	r'(?:(?:www\|dev)\.)?invidio\.us',
	287	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	288	r'(?:www\.)?invidious\.pussthecat\.org',
	289	r'(?:www\.)?invidious\.zee\.li',
	290	r'(?:www\.)?invidious\.ethibox\.fr',
	291	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	292	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	293	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	294	# youtube-dl invidious instances list
	295	r'(?:(?:www\|no)\.)?invidiou\.sh',
	296	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	297	r'(?:www\.)?invidious\.kabi\.tk',
	298	r'(?:www\.)?invidious\.mastodon\.host',
	299	r'(?:www\.)?invidious\.zapashcanon\.fr',
	300	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	301	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	302	r'(?:www\.)?invidious\.himiko\.cloud',
	303	r'(?:www\.)?invidious\.reallyancient\.tech',
	304	r'(?:www\.)?invidious\.tube',
	305	r'(?:www\.)?invidiou\.site',
	306	r'(?:www\.)?invidious\.site',
	307	r'(?:www\.)?invidious\.xyz',
	308	r'(?:www\.)?invidious\.nixnet\.xyz',
	309	r'(?:www\.)?invidious\.048596\.xyz',
	310	r'(?:www\.)?invidious\.drycat\.fr',
	311	r'(?:www\.)?inv\.skyn3t\.in',
	312	r'(?:www\.)?tube\.poal\.co',
	313	r'(?:www\.)?tube\.connect\.cafe',
	314	r'(?:www\.)?vid\.wxzm\.sx',
	315	r'(?:www\.)?vid\.mint\.lgbt',
	316	r'(?:www\.)?vid\.puffyan\.us',
	317	r'(?:www\.)?yewtu\.be',
	318	r'(?:www\.)?yt\.elukerio\.org',
	319	r'(?:www\.)?yt\.lelux\.fi',
	320	r'(?:www\.)?invidious\.ggc-project\.de',
	321	r'(?:www\.)?yt\.maisputain\.ovh',
	322	r'(?:www\.)?ytprivate\.com',
	323	r'(?:www\.)?invidious\.13ad\.de',
	324	r'(?:www\.)?invidious\.toot\.koeln',
	325	r'(?:www\.)?invidious\.fdn\.fr',
	326	r'(?:www\.)?watch\.nettohikari\.com',
	327	r'(?:www\.)?invidious\.namazso\.eu',
	328	r'(?:www\.)?invidious\.silkky\.cloud',
	329	r'(?:www\.)?invidious\.exonip\.de',
	330	r'(?:www\.)?invidious\.riverside\.rocks',
	331	r'(?:www\.)?invidious\.blamefran\.net',
	332	r'(?:www\.)?invidious\.moomoo\.de',
	333	r'(?:www\.)?ytb\.trom\.tf',
	334	r'(?:www\.)?yt\.cyberhost\.uk',
	335	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	336	r'(?:www\.)?qklhadlycap4cnod\.onion',
	337	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	338	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	339	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	340	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	341	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	342	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	343	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	344	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	345	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	346	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	347	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	348	r'(?:www\.)?piped\.kavin\.rocks',
	349	r'(?:www\.)?piped\.silkky\.cloud',
	350	r'(?:www\.)?piped\.tokhmi\.xyz',
	351	r'(?:www\.)?piped\.moomoo\.me',
	352	r'(?:www\.)?il\.ax',
	353	r'(?:www\.)?piped\.syncpundit\.com',
	354	r'(?:www\.)?piped\.mha\.fi',
	355	r'(?:www\.)?piped\.mint\.lgbt',
	356	r'(?:www\.)?piped\.privacy\.com\.de',
	357	)
	358
	359	def _initialize_consent(self):
	360	cookies = self._get_cookies('https://www.youtube.com/')
	361	if cookies.get('__Secure-3PSID'):
	362	return
	363	consent_id = None
	364	consent = cookies.get('CONSENT')
	365	if consent:
	366	if 'YES' in consent.value:
	367	return
	368	consent_id = self._search_regex(
	369	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	370	if not consent_id:
	371	consent_id = random.randint(100, 999)
	372	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	373
	374	def _initialize_pref(self):
	375	cookies = self._get_cookies('https://www.youtube.com/')
	376	pref_cookie = cookies.get('PREF')
	377	pref = {}
	378	if pref_cookie:
	379	try:
	380	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	381	except ValueError:
	382	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	383	pref.update({'hl': 'en', 'tz': 'UTC'})
	384	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	385
	386	def _real_initialize(self):
	387	self._initialize_pref()
	388	self._initialize_consent()
	389	self._check_login_required()
	390
	391	def _check_login_required(self):
	392	if self._LOGIN_REQUIRED and not self._cookies_passed:
	393	self.raise_login_required('Login details are needed to download this content', method='cookies')
	394
	395	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	396	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	397
	398	def _get_default_ytcfg(self, client='web'):
	399	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	400
	401	def _get_innertube_host(self, client='web'):
	402	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	403
	404	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	405	# try_get but with fallback to default ytcfg client values when present
	406	_func = lambda y: try_get(y, getter, expected_type)
	407	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	408
	409	def _extract_client_name(self, ytcfg, default_client='web'):
	410	return self._ytcfg_get_safe(
	411	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	412	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	413
	414	def _extract_client_version(self, ytcfg, default_client='web'):
	415	return self._ytcfg_get_safe(
	416	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	417	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	418
	419	def _select_api_hostname(self, req_api_hostname, default_client=None):
	420	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	421	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	422
	423	def _extract_api_key(self, ytcfg=None, default_client='web'):
	424	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	425
	426	def _extract_context(self, ytcfg=None, default_client='web'):
	427	context = get_first(
	428	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	429	# Enforce language and tz for extraction
	430	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	431	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	432	return context
	433
	434	_SAPISID = None
	435
	436	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	437	time_now = round(time.time())
	438	if self._SAPISID is None:
	439	yt_cookies = self._get_cookies('https://www.youtube.com')
	440	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	441	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	442	sapisid_cookie = dict_get(
	443	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	444	if sapisid_cookie and sapisid_cookie.value:
	445	self._SAPISID = sapisid_cookie.value
	446	self.write_debug('Extracted SAPISID cookie')
	447	# SAPISID cookie is required if not already present
	448	if not yt_cookies.get('SAPISID'):
	449	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	450	self._set_cookie(
	451	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	452	else:
	453	self._SAPISID = False
	454	if not self._SAPISID:
	455	return None
	456	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	457	sapisidhash = hashlib.sha1(
	458	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	459	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	460
	461	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	462	note='Downloading API JSON', errnote='Unable to download API page',
	463	context=None, api_key=None, api_hostname=None, default_client='web'):
	464
	465	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	466	data.update(query)
	467	real_headers = self.generate_api_headers(default_client=default_client)
	468	real_headers.update({'content-type': 'application/json'})
	469	if headers:
	470	real_headers.update(headers)
	471	api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
	472	or api_key or self._extract_api_key(default_client=default_client))
	473	return self._download_json(
	474	f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
	475	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	476	data=json.dumps(data).encode('utf8'), headers=real_headers,
	477	query={'key': api_key, 'prettyPrint': 'false'})
	478
	479	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	480	return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
	481
	482	@staticmethod
	483	def _extract_session_index(*data):
	484	"""
	485	Index of current account in account list.
	486	See: https://github.com/yt-dlp/yt-dlp/pull/519
	487	"""
	488	for ytcfg in data:
	489	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	490	if session_index is not None:
	491	return session_index
	492
	493	# Deprecated?
	494	def _extract_identity_token(self, ytcfg=None, webpage=None):
	495	if ytcfg:
	496	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
	497	if token:
	498	return token
	499	if webpage:
	500	return self._search_regex(

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

20

from ..compat import functools

21

from ..jsinterp import JSInterpreter

22

from ..utils import (

NO_DEFAULT,

ExtractorError,

UserNotLive,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

65

INNERTUBE_CLIENTS = {

66

'web': {

67

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

68

'INNERTUBE_CONTEXT': {

69

'client': {

70

'clientName': 'WEB',

71

'clientVersion': '2.20220801.00.00',

72

}

73

},

74

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

75

},

76

'web_embedded': {

77

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

78

'INNERTUBE_CONTEXT': {

79

'client': {

80

'clientName': 'WEB_EMBEDDED_PLAYER',

81

'clientVersion': '1.20220731.00.00',

82

},

83

},

84

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

85

},

86

'web_music': {

87

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

88

'INNERTUBE_HOST': 'music.youtube.com',

89

'INNERTUBE_CONTEXT': {

90

'client': {

91

'clientName': 'WEB_REMIX',

92

'clientVersion': '1.20220727.01.00',

93

}

94

},

95

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

96

},

97

'web_creator': {

98

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

99

'INNERTUBE_CONTEXT': {

100

'client': {

101

'clientName': 'WEB_CREATOR',

102

'clientVersion': '1.20220726.00.00',

103

}

104

},

105

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

106

},

107

'android': {

108

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

109

'INNERTUBE_CONTEXT': {

110

'client': {

111

'clientName': 'ANDROID',

112

'clientVersion': '17.29.34',

113

'androidSdkVersion': 30

114

}

115

},

116

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

117

'REQUIRE_JS_PLAYER': False

118

},

119

'android_embedded': {

120

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

121

'INNERTUBE_CONTEXT': {

122

'client': {

123

'clientName': 'ANDROID_EMBEDDED_PLAYER',

124

'clientVersion': '17.29.34',

125

'androidSdkVersion': 30

126

},

127

},

128

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

129

'REQUIRE_JS_PLAYER': False

130

},

131

'android_music': {

132

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

133

'INNERTUBE_CONTEXT': {

134

'client': {

135

'clientName': 'ANDROID_MUSIC',

136

'clientVersion': '5.16.51',

137

'androidSdkVersion': 30

138

}

139

},

140

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

141

'REQUIRE_JS_PLAYER': False

142

},

143

'android_creator': {

144

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

145

'INNERTUBE_CONTEXT': {

146

'client': {

147

'clientName': 'ANDROID_CREATOR',

148

'clientVersion': '22.28.100',

149

'androidSdkVersion': 30

150

},

151

},

152

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

153

'REQUIRE_JS_PLAYER': False

154

},

155

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

156

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

157

'ios': {

158

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

159

'INNERTUBE_CONTEXT': {

160

'client': {

161

'clientName': 'IOS',

162

'clientVersion': '17.30.1',

163

'deviceModel': 'iPhone14,3',

164

}

165

},

166

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

167

'REQUIRE_JS_PLAYER': False

168

},

169

'ios_embedded': {

170

'INNERTUBE_CONTEXT': {

171

'client': {

172

'clientName': 'IOS_MESSAGES_EXTENSION',

173

'clientVersion': '17.30.1',

174

'deviceModel': 'iPhone14,3',

175

},

176

},

177

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

178

'REQUIRE_JS_PLAYER': False

179

},

180

'ios_music': {

181

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

182

'INNERTUBE_CONTEXT': {

183

'client': {

184

'clientName': 'IOS_MUSIC',

185

'clientVersion': '5.18',

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_creator': {

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_CREATOR',

195

'clientVersion': '22.29.101',

196

},

197

},

198

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

199

'REQUIRE_JS_PLAYER': False

200

},

201

# mweb has 'ultralow' formats

202

# See: https://github.com/yt-dlp/yt-dlp/pull/557

203

'mweb': {

204

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

205

'INNERTUBE_CONTEXT': {

206

'client': {

207

'clientName': 'MWEB',

208

'clientVersion': '2.20220801.00.00',

209

}

210

},

211

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

212

},

213

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

214

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

215

'tv_embedded': {

216

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

217

'INNERTUBE_CONTEXT': {

218

'client': {

219

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

220

'clientVersion': '2.0',

221

},

222

},

223

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

229

variant, *base = client_name.rsplit('.', 1)

230

if base:

231

return variant, base[0], variant

232

base, *variant = client_name.split('_', 1)

233

return client_name, base, variant[0] if variant else None

234

235

236

def build_innertube_clients():

237

THIRD_PARTY = {

238

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

239

}

240

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

241

priority = qualities(BASE_CLIENTS[::-1])

242

243

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

244

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

245

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

246

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

247

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

248

249

_, base_client, variant = _split_innertube_client(client)

250

ytcfg['priority'] = 10 * priority(base_client)

251

252

if not variant:

253

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

254

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

255

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

256

embedscreen['priority'] -= 3

257

elif variant == 'embedded':

258

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

259

ytcfg['priority'] -= 2

260

else:

261

ytcfg['priority'] -= 3

262

263

264

build_innertube_clients()

265

266

267

class YoutubeBaseInfoExtractor(InfoExtractor):

268

"""Provide base functions for Youtube extractors"""

269

270

_RESERVED_NAMES = (

271

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

277

278

# _NETRC_MACHINE = 'youtube'

279

280

# If True it will raise an error if no login info is provided

281

_LOGIN_REQUIRED = False

282

283

_INVIDIOUS_SITES = (

284

# invidious-redirect websites

285

r'(?:www\.)?redirect\.invidious\.io',

286

r'(?:(?:www|dev)\.)?invidio\.us',

287

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

288

r'(?:www\.)?invidious\.pussthecat\.org',

289

r'(?:www\.)?invidious\.zee\.li',

290

r'(?:www\.)?invidious\.ethibox\.fr',

291

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

292

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

293

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

294

# youtube-dl invidious instances list

295

r'(?:(?:www|no)\.)?invidiou\.sh',

296

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

297

r'(?:www\.)?invidious\.kabi\.tk',

298

r'(?:www\.)?invidious\.mastodon\.host',

299

r'(?:www\.)?invidious\.zapashcanon\.fr',

300

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

301

r'(?:www\.)?invidious\.tinfoil-hat\.net',

302

r'(?:www\.)?invidious\.himiko\.cloud',

303

r'(?:www\.)?invidious\.reallyancient\.tech',

304

r'(?:www\.)?invidious\.tube',

305

r'(?:www\.)?invidiou\.site',

306

r'(?:www\.)?invidious\.site',

307

r'(?:www\.)?invidious\.xyz',

308

r'(?:www\.)?invidious\.nixnet\.xyz',

309

r'(?:www\.)?invidious\.048596\.xyz',

310

r'(?:www\.)?invidious\.drycat\.fr',

311

r'(?:www\.)?inv\.skyn3t\.in',

312

r'(?:www\.)?tube\.poal\.co',

313

r'(?:www\.)?tube\.connect\.cafe',

314

r'(?:www\.)?vid\.wxzm\.sx',

315

r'(?:www\.)?vid\.mint\.lgbt',

316

r'(?:www\.)?vid\.puffyan\.us',

317

r'(?:www\.)?yewtu\.be',

318

r'(?:www\.)?yt\.elukerio\.org',

319

r'(?:www\.)?yt\.lelux\.fi',

320

r'(?:www\.)?invidious\.ggc-project\.de',

321

r'(?:www\.)?yt\.maisputain\.ovh',

322

r'(?:www\.)?ytprivate\.com',

323

r'(?:www\.)?invidious\.13ad\.de',

324

r'(?:www\.)?invidious\.toot\.koeln',

325

r'(?:www\.)?invidious\.fdn\.fr',

326

r'(?:www\.)?watch\.nettohikari\.com',

327

r'(?:www\.)?invidious\.namazso\.eu',

328

r'(?:www\.)?invidious\.silkky\.cloud',

329

r'(?:www\.)?invidious\.exonip\.de',

330

r'(?:www\.)?invidious\.riverside\.rocks',

331

r'(?:www\.)?invidious\.blamefran\.net',

332

r'(?:www\.)?invidious\.moomoo\.de',

333

r'(?:www\.)?ytb\.trom\.tf',

334

r'(?:www\.)?yt\.cyberhost\.uk',

335

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

336

r'(?:www\.)?qklhadlycap4cnod\.onion',

337

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

338

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

339

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

340

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

341

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

342

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

343

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

344

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

345

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

346

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

347

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

348

r'(?:www\.)?piped\.kavin\.rocks',

349

r'(?:www\.)?piped\.silkky\.cloud',

350

r'(?:www\.)?piped\.tokhmi\.xyz',

351

r'(?:www\.)?piped\.moomoo\.me',

352

r'(?:www\.)?il\.ax',

353

r'(?:www\.)?piped\.syncpundit\.com',

354

r'(?:www\.)?piped\.mha\.fi',

355

r'(?:www\.)?piped\.mint\.lgbt',

356

r'(?:www\.)?piped\.privacy\.com\.de',

357

)

358

359

def _initialize_consent(self):

360

cookies = self._get_cookies('https://www.youtube.com/')

361

if cookies.get('__Secure-3PSID'):

362

return

363

consent_id = None

364

consent = cookies.get('CONSENT')

365

if consent:

366

if 'YES' in consent.value:

367

return

368

consent_id = self._search_regex(

369

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

370

if not consent_id:

371

consent_id = random.randint(100, 999)

372

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

373

374

def _initialize_pref(self):

375

cookies = self._get_cookies('https://www.youtube.com/')

376

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

381

except ValueError:

382

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

383

pref.update({'hl': 'en', 'tz': 'UTC'})

384

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

385

386

def _real_initialize(self):

387

self._initialize_pref()

388

self._initialize_consent()

389

self._check_login_required()

390

391

def _check_login_required(self):

392

if self._LOGIN_REQUIRED and not self._cookies_passed:

393

self.raise_login_required('Login details are needed to download this content', method='cookies')

394

395

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

396

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

397

398

def _get_default_ytcfg(self, client='web'):

399

return copy.deepcopy(INNERTUBE_CLIENTS[client])

400

401

def _get_innertube_host(self, client='web'):

402

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

403

404

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

405

# try_get but with fallback to default ytcfg client values when present

406

_func = lambda y: try_get(y, getter, expected_type)

407

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

408

409

def _extract_client_name(self, ytcfg, default_client='web'):

410

return self._ytcfg_get_safe(

411

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

412

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

413

414

def _extract_client_version(self, ytcfg, default_client='web'):

415

return self._ytcfg_get_safe(

416

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

417

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

418

419

def _select_api_hostname(self, req_api_hostname, default_client=None):

420

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

421

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

422

423

def _extract_api_key(self, ytcfg=None, default_client='web'):

424

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

425

426

def _extract_context(self, ytcfg=None, default_client='web'):

427

context = get_first(

428

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

429

# Enforce language and tz for extraction

430

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

431

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

437

time_now = round(time.time())

438

if self._SAPISID is None:

439

yt_cookies = self._get_cookies('https://www.youtube.com')

440

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

441

# See: https://github.com/yt-dlp/yt-dlp/issues/393

442

sapisid_cookie = dict_get(

443

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

444

if sapisid_cookie and sapisid_cookie.value:

445

self._SAPISID = sapisid_cookie.value

446

self.write_debug('Extracted SAPISID cookie')

447

# SAPISID cookie is required if not already present

448

if not yt_cookies.get('SAPISID'):

449

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

450

self._set_cookie(

451

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

452

else:

453

self._SAPISID = False

454

if not self._SAPISID:

455

return None

456

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

457

sapisidhash = hashlib.sha1(

458

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

459

return f'SAPISIDHASH {time_now}_{sapisidhash}'

460

461

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

462

note='Downloading API JSON', errnote='Unable to download API page',

463

context=None, api_key=None, api_hostname=None, default_client='web'):

464

465

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

466

data.update(query)

467

real_headers = self.generate_api_headers(default_client=default_client)

468

real_headers.update({'content-type': 'application/json'})

469

if headers:

470

real_headers.update(headers)

471

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

472

or api_key or self._extract_api_key(default_client=default_client))

473

return self._download_json(

474

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

475

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

476

data=json.dumps(data).encode('utf8'), headers=real_headers,

477

query={'key': api_key, 'prettyPrint': 'false'})

478

479

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

480

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

481

482

@staticmethod

483

def _extract_session_index(*data):

484

"""

485

Index of current account in account list.

486

See: https://github.com/yt-dlp/yt-dlp/pull/519

487

"""

488

for ytcfg in data:

489

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

490

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

495

if ytcfg:

496

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

501

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

502

'identity token', default=None, fatal=False)

503

504

@staticmethod

505

def _extract_account_syncid(*args):

506

"""

507

Extract syncId required to download private playlists of secondary channels

508

@params response and/or ytcfg

509

"""

510

for data in args:

511

# ytcfg includes channel_syncid if on secondary channel

512

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

517

lambda x: x['DATASYNC_ID']), str) or '').split('||')

518

if len(sync_ids) >= 2 and sync_ids[1]:

519

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

520

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

525

"""

526

Extracts visitorData from an API response or ytcfg

527

Appears to be used to track session state

528

"""

529

return get_first(

530

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

531

expected_type=str)

532

533

@functools.cached_property

534

def is_authenticated(self):

535

return bool(self._generate_sapisidhash_header())

536

537

def extract_ytcfg(self, video_id, webpage):

538

if not webpage:

539

return {}

540

return self._parse_json(

541

self._search_regex(

542

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

543

default='{}'), video_id, fatal=False) or {}

544

545

def generate_api_headers(

546

self, *, ytcfg=None, account_syncid=None, session_index=None,

547

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

548

549

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

550

headers = {

551

'X-YouTube-Client-Name': str(

552

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

553

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

554

'Origin': origin,

555

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

556

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

557

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

558

}

559

if session_index is None:

560

session_index = self._extract_session_index(ytcfg)

561

if account_syncid or session_index is not None:

562

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

563

564

auth = self._generate_sapisidhash_header(origin)

565

if auth is not None:

566

headers['Authorization'] = auth

567

headers['X-Origin'] = origin

568

return {h: v for h, v in headers.items() if v is not None}

569

570

def _download_ytcfg(self, client, video_id):

571

url = {

572

'web': 'https://www.youtube.com',

573

'web_music': 'https://music.youtube.com',

574

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

579

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

580

return self.extract_ytcfg(video_id, webpage) or {}

581

582

@staticmethod

583

def _build_api_continuation_query(continuation, ctp=None):

584

query = {

585

'continuation': continuation

586

}

587

# TODO: Inconsistency with clickTrackingParams.

588

# Currently we have a fixed ctp contained within context (from ytcfg)

589

# and a ctp in root query for continuation.

590

if ctp:

591

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

596

next_continuation = try_get(

597

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

598

lambda x: x['continuation']['reloadContinuationData']), dict)

599

if not next_continuation:

600

return

601

continuation = next_continuation.get('continuation')

602

if not continuation:

603

return

604

ctp = next_continuation.get('clickTrackingParams')

605

return cls._build_api_continuation_query(continuation, ctp)

606

607

@classmethod

608

def _extract_continuation_ep_data(cls, continuation_ep: dict):

609

if isinstance(continuation_ep, dict):

610

continuation = try_get(

611

continuation_ep, lambda x: x['continuationCommand']['token'], str)

612

if not continuation:

613

return

614

ctp = continuation_ep.get('clickTrackingParams')

615

return cls._build_api_continuation_query(continuation, ctp)

616

617

@classmethod

618

def _extract_continuation(cls, renderer):

619

next_continuation = cls._extract_next_continuation_data(renderer)

620

if next_continuation:

621

return next_continuation

622

623

contents = []

624

for key in ('contents', 'items'):

625

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

626

627

for content in contents:

628

if not isinstance(content, dict):

629

continue

630

continuation_ep = try_get(

631

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

632

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

633

dict)

634

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

640

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

641

if not isinstance(alert_dict, dict):

642

continue

643

for alert in alert_dict.values():

644

alert_type = alert.get('type')

645

if not alert_type:

646

continue

647

message = cls._get_text(alert, 'text')

648

if message:

649

yield alert_type, message

650

651

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

652

errors = []

653

warnings = []

654

for alert_type, alert_message in alerts:

655

if alert_type.lower() == 'error' and fatal:

656

errors.append([alert_type, alert_message])

657

else:

658

warnings.append([alert_type, alert_message])

659

660

for alert_type, alert_message in (warnings + errors[:-1]):

661

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

662

if errors:

663

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

664

665

def _extract_and_report_alerts(self, data, *args, **kwargs):

666

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

667

668

def _extract_badges(self, renderer: dict):

669

badges = set()

670

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

671

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)

672

if label:

673

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

678

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

683

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

684

obj = [obj]

685

for item in obj:

686

text = try_get(item, lambda x: x['simpleText'], str)

687

if text:

688

return text

689

runs = try_get(item, lambda x: x['runs'], list) or []

690

if not runs and isinstance(item, list):

691

runs = item

692

693

runs = runs[:min(len(runs), max_runs or len(runs))]

694

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

699

count_text = self._get_text(data, *path_list) or ''

700

count = parse_count(count_text)

701

if count is None:

702

count = str_to_int(

703

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

708

"""

709

Extract thumbnails from thumbnails dict

710

@param path_list: path list to level that contains 'thumbnails' key

711

"""

712

thumbnails = []

713

for path in path_list or [()]:

714

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

715

thumbnail_url = url_or_none(thumbnail.get('url'))

716

if not thumbnail_url:

717

continue

718

# Sometimes youtube gives a wrong thumbnail URL. See:

719

# https://github.com/yt-dlp/yt-dlp/issues/233

720

# https://github.com/ytdl-org/youtube-dl/issues/28023

721

if 'maxresdefault' in thumbnail_url:

722

thumbnail_url = thumbnail_url.split('?')[0]

723

thumbnails.append({

724

'url': thumbnail_url,

725

'height': int_or_none(thumbnail.get('height')),

726

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

732

"""

733

Extracts a relative time from string and converts to dt object

734

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

739

if start:

740

return datetime_from_str(start)

741

try:

742

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

747

"""@returns (timestamp, time_text)"""

748

text = self._get_text(renderer, *path_list) or ''

749

dt = self.extract_relative_time(text)

750

timestamp = None

751

if isinstance(dt, datetime.datetime):

752

timestamp = calendar.timegm(dt.timetuple())

753

754

if timestamp is None:

755

timestamp = (

756

unified_timestamp(text) or unified_timestamp(

757

self._search_regex(

758

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

759

text.lower(), 'time text', default=None)))

760

761

if text and timestamp is None:

762

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

763

return timestamp, text

764

765

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

766

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

767

default_client='web'):

768

for retry in self.RetryManager():

769

try:

770

response = self._call_api(

771

ep=ep, fatal=True, headers=headers,

772

video_id=item_id, query=query, note=note,

773

context=self._extract_context(ytcfg, default_client),

774

api_key=self._extract_api_key(ytcfg, default_client),

775

api_hostname=api_hostname, default_client=default_client)

776

except ExtractorError as e:

777

if not isinstance(e.cause, network_exceptions):

778

return self._error_or_warning(e, fatal=fatal)

779

elif not isinstance(e.cause, urllib.error.HTTPError):

retry.error = e

continue

first_bytes = e.cause.read(512)

784

if not is_html(first_bytes):

785

yt_error = try_get(

786

self._parse_json(

787

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

788

lambda x: x['error']['message'], str)

789

if yt_error:

790

self._report_alerts([('ERROR', yt_error)], fatal=False)

791

# Downloading page may result in intermittent 5xx HTTP error

792

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

793

# We also want to catch all other network exceptions since errors in later pages can be troublesome

794

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

795

if e.cause.code not in (403, 429):

796

retry.error = e

797

continue

798

return self._error_or_warning(e, fatal=fatal)

799

800

try:

801

self._extract_and_report_alerts(response, only_once=True)

802

except ExtractorError as e:

803

# YouTube servers may return errors we want to retry on in a 200 OK response

804

# See: https://github.com/yt-dlp/yt-dlp/issues/839

805

if 'unknown error' in e.msg.lower():

806

retry.error = e

807

continue

808

return self._error_or_warning(e, fatal=fatal)

809

# Youtube sometimes sends incomplete data

810

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

811

if not traverse_obj(response, *variadic(check_get_keys)):

812

retry.error = ExtractorError('Incomplete data received', expected=True)

continue

return response

@staticmethod

def is_music_url(url):

819

return re.match(r'https?://music\.youtube\.com/', url) is not None

820

821

def _extract_video(self, renderer):

822

video_id = renderer.get('videoId')

823

title = self._get_text(renderer, 'title')

824

description = self._get_text(renderer, 'descriptionSnippet')

825

duration = parse_duration(self._get_text(

826

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

827

if duration is None:

828

duration = parse_duration(self._search_regex(

829

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

830

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

831

video_id, default=None, group='duration'))

832

833

view_count = self._get_count(renderer, 'viewCountText')

834

835

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

836

channel_id = traverse_obj(

837

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

838

expected_type=str, get_all=False)

839

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

840

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

841

overlay_style = traverse_obj(

842

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

843

get_all=False, expected_type=str)

844

badges = self._extract_badges(renderer)

845

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

846

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

847

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

848

expected_type=str)) or ''

849

url = f'https://www.youtube.com/watch?v={video_id}'

850

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

851

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

860

'duration': duration,

861

'view_count': view_count,

862

'uploader': uploader,

863

'channel_id': channel_id,

864

'thumbnails': thumbnails,

865

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

866

if self._configuration_arg('approximate_date', ie_key='youtubetab')

867

else None),

868

'live_status': ('is_upcoming' if scheduled_timestamp is not None

869

else 'was_live' if 'streamed' in time_text.lower()

870

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

871

else None),

872

'release_timestamp': scheduled_timestamp,

873

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

878

IE_DESC = 'YouTube'

879

_VALID_URL = r"""(?x)^

880

(

881

(?:https?://|//) # http(s):// or protocol-independent URL

882

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

883

(?:www\.)?deturl\.com/www\.youtube\.com|

884

(?:www\.)?pwnyoutube\.com|

885

(?:www\.)?hooktube\.com|

886

(?:www\.)?yourepeat\.com|

887

tube\.majestyc\.net|

888

%(invidious)s|

889

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

890

(?:.*?\#/)? # handle anchor (#/) redirect urls

891

(?: # the various things that can precede the ID:

892

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

893

|(?: # or the v= param in all its forms

894

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

895

(?:\?|\#!?) # the params delimiter ? or # or #!

896

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

902

vid\.plus| # or vid.plus/xxxx

903

zwearz\.com/watch| # or zwearz.com/watch/xxxx

904

%(invidious)s

905

)/

906

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

907

)

908

)? # all until now is optional -> you can pass the naked ID

909

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

910

(?(1).+)? # if we found the ID, everything can follow

911

(?:\#|$)""" % {

912

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

913

}

914

_EMBED_REGEX = [r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

925

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

926

\1''']

927

_PLAYER_INFO_RE = (

928

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

929

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

930

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

931

)

932

_formats = {

933

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

934

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

935

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

936

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

937

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

938

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

939

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

940

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

941

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

942

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

943

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

944

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

945

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

946

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

947

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

948

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

949

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

950

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

955

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

956

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

957

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

958

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

959

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

960

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

961

962

# Apple HTTP Live Streaming

963

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

964

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

965

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

966

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

967

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

968

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

969

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

970

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

971

972

# DASH mp4 video

973

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

974

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

975

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

976

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

977

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

978

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

979

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

980

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

981

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

982

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

983

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

984

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

986

# Dash mp4 audio

987

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

988

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

989

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

990

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

991

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

992

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

993

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

994

995

# Dash webm

996

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

997

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

998

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

999

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1000

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1001

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1002

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1003

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1004

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1005

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1006

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1007

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1008

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1009

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1010

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1011

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1012

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1013

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1014

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1015

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1016

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1018

1019

# Dash webm audio

1020

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1021

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1022

1023

# Dash webm audio with opus inside

1024

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1025

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1026

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1027

1028

# RTMP (unnamed)

1029

'_rtmp': {'protocol': 'rtmp'},

1030

1031

# av01 video only formats sometimes served with "unknown" codecs

1032

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1033

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1034

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1035

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1036

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1037

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1038

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1039

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1040

}

1041

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1053

'uploader': 'Philipp Hagemeister',

1054

'uploader_id': 'phihag',

1055

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1056

'channel': 'Philipp Hagemeister',

1057

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1058

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1059

'upload_date': '20121002',

1060

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1061

'categories': ['Science & Technology'],

1062

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1067

'playable_in_embed': True,

1068

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1069

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'comment_count': int,

1074

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1079

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1084

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1085

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1086

'uploader': 'SET India',

1087

'uploader_id': 'setindia',

1088

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1089

'age_limit': 18,

1090

},

1091

'skip': 'Private video',

1092

},

1093

{

1094

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1095

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1100

'uploader': 'Philipp Hagemeister',

1101

'uploader_id': 'phihag',

1102

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1103

'channel': 'Philipp Hagemeister',

1104

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1105

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1106

'upload_date': '20121002',

1107

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1108

'categories': ['Science & Technology'],

1109

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1114

'playable_in_embed': True,

1115

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1116

'live_status': 'not_live',

1117

'age_limit': 0,

1118

'comment_count': int,

1119

'channel_follower_count': int

1120

},

1121

'params': {

1122

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1127

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1132

'uploader_id': '8KVIDEO',

1133

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1134

'description': '',

1135

'uploader': '8KVIDEO',

1136

'title': 'UHDTV TEST 8K VIDEO.mp4'

1137

},

1138

'params': {

1139

'youtube_include_dash_manifest': True,

1140

'format': '141',

1141

},

1142

'skip': 'format 141 not served anymore',

1143

},

1144

# DASH manifest with encrypted signature

1145

{

1146

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1151

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1152

'duration': 244,

1153

'uploader': 'AfrojackVEVO',

1154

'uploader_id': 'AfrojackVEVO',

1155

'upload_date': '20131011',

1156

'abr': 129.495,

1157

'like_count': int,

1158

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1159

'playable_in_embed': True,

1160

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1161

'view_count': int,

1162

'track': 'The Spark',

1163

'live_status': 'not_live',

1164

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1165

'channel': 'Afrojack',

1166

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1167

'tags': 'count:19',

1168

'availability': 'public',

1169

'categories': ['Music'],

1170

'age_limit': 0,

1171

'alt_title': 'The Spark',

1172

'channel_follower_count': int

1173

},

1174

'params': {

1175

'youtube_include_dash_manifest': True,

1176

'format': '141/bestaudio[ext=m4a]',

1177

},

1178

},

1179

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1180

{

1181

'note': 'Embed allowed age-gate video',

1182

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1187

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1188

'duration': 142,

1189

'uploader': 'The Witcher',

1190

'uploader_id': 'WitcherGame',

1191

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1192

'upload_date': '20140605',

1193

'age_limit': 18,

1194

'categories': ['Gaming'],

1195

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1196

'availability': 'needs_auth',

1197

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1198

'like_count': int,

1199

'channel': 'The Witcher',

1200

'live_status': 'not_live',

1201

'tags': 'count:17',

1202

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1203

'playable_in_embed': True,

1204

'view_count': int,

1205

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1210

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1215

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1216

'upload_date': '20200408',

1217

'uploader_id': 'FlyingKitty900',

1218

'uploader': 'FlyingKitty',

1219

'age_limit': 18,

1220

'availability': 'needs_auth',

1221

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1222

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1223

'channel': 'FlyingKitty',

1224

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1225

'view_count': int,

1226

'categories': ['Entertainment'],

1227

'live_status': 'not_live',

1228

'tags': ['Flyingkitty', 'godzilla 2'],

1229

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1230

'like_count': int,

1231

'duration': 177,

1232

'playable_in_embed': True,

1233

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1238

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1239

'info_dict': {

1240

'id': 'Tq92D6wQ1mg',

1241

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1242

'ext': 'mp4',

1243

'upload_date': '20191228',

1244

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1245

'uploader': 'Projekt Melody',

1246

'description': 'md5:17eccca93a786d51bc67646756894066',

1247

'age_limit': 18,

1248

'like_count': int,

1249

'availability': 'needs_auth',

1250

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1251

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1252

'view_count': int,

1253

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1254

'channel': 'Projekt Melody',

1255

'live_status': 'not_live',

1256

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1257

'playable_in_embed': True,

1258

'categories': ['Entertainment'],

1259

'duration': 106,

1260

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1261

'comment_count': int,

1262

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1267

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1272

'uploader': 'Herr Lurik',

1273

'uploader_id': 'st3in234',

1274

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1275

'upload_date': '20130730',

1276

'track': 'Such mich find mich',

1277

'age_limit': 0,

1278

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1279

'like_count': int,

1280

'playable_in_embed': False,

1281

'creator': 'OOMPH!',

1282

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1283

'view_count': int,

1284

'alt_title': 'Such mich find mich',

1285

'duration': 210,

1286

'channel': 'Herr Lurik',

1287

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1288

'categories': ['Music'],

1289

'availability': 'public',

1290

'uploader_url': 'http://www.youtube.com/user/st3in234',

1291

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1292

'live_status': 'not_live',

1293

'artist': 'OOMPH!',

1294

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1299

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1300

'only_matching': True,

1301

},

1302

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1303

# YouTube Red ad is not captured for creator

1304

{

1305

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1311

'uploader_id': 'deadmau5',

1312

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1313

'creator': 'deadmau5',

1314

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1315

'uploader': 'deadmau5',

1316

'title': 'Deadmau5 - Some Chords (HD)',

1317

'alt_title': 'Some Chords',

1318

'availability': 'public',

1319

'tags': 'count:14',

1320

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1321

'view_count': int,

1322

'live_status': 'not_live',

1323

'channel': 'deadmau5',

1324

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1325

'like_count': int,

1326

'track': 'Some Chords',

1327

'artist': 'deadmau5',

1328

'playable_in_embed': True,

1329

'age_limit': 0,

1330

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1331

'categories': ['Music'],

1332

'album': 'Some Chords',

1333

'channel_follower_count': int

1334

},

1335

'expected_warnings': [

1336

'DASH manifest missing',

1337

]

1338

},

1339

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1340

{

1341

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1347

'uploader_id': 'olympic',

1348

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1349

'description': 'md5:04bbbf3ccceb6795947572ca36f45904',

1350

'uploader': 'Olympics',

1351

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1352

'like_count': int,

1353

'release_timestamp': 1343767800,

1354

'playable_in_embed': True,

1355

'categories': ['Sports'],

1356

'release_date': '20120731',

1357

'channel': 'Olympics',

1358

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1359

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1360

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1361

'age_limit': 0,

1362

'availability': 'public',

1363

'live_status': 'was_live',

1364

'view_count': int,

1365

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1366

'channel_follower_count': int

1367

},

1368

'params': {

1369

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1379

'duration': 85,

1380

'upload_date': '20110310',

1381

'uploader_id': 'AllenMeow',

1382

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1383

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1384

'uploader': '孫ᄋᄅ',

1385

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1386

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1391

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1392

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1393

'view_count': int,

1394

'categories': ['People & Blogs'],

1395

'like_count': int,

1396

'live_status': 'not_live',

1397

'availability': 'unlisted',

1398

'comment_count': int,

1399

'channel_follower_count': int

1400

},

1401

},

1402

# url_encoded_fmt_stream_map is empty string

1403

{

1404

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1409

'description': '',

1410

'upload_date': '20150404',

1411

'uploader_id': 'spbelect',

1412

'uploader': 'Наблюдатели Петербурга',

1413

},

1414

'params': {

1415

'skip_download': 'requires avconv',

1416

},

1417

'skip': 'This live event has ended.',

1418

},

1419

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1420

{

1421

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1426

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1427

'duration': 220,

1428

'upload_date': '20150625',

1429

'uploader_id': 'dorappi2000',

1430

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1431

'uploader': 'dorappi2000',

1432

'formats': 'mincount:31',

1433

},

1434

'skip': 'not actual anymore',

1435

},

1436

# DASH manifest with segment_list

1437

{

1438

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1439

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1444

'uploader': 'Airtek',

1445

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1446

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1447

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1448

},

1449

'params': {

1450

'youtube_include_dash_manifest': True,

1451

'format': '135', # bestvideo

1452

},

1453

'skip': 'This live event has ended.',

1454

},

1455

{

1456

# Multifeed videos (multiple cameras), URL is for Main Camera

1457

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1458

'info_dict': {

1459

'id': 'jvGDaLqkpTg',

1460

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1461

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1468

'description': 'md5:e03b909557865076822aa169218d6a5d',

1469

'duration': 10643,

1470

'upload_date': '20161111',

1471

'uploader': 'Team PGP',

1472

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1473

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1480

'description': 'md5:e03b909557865076822aa169218d6a5d',

1481

'duration': 10991,

1482

'upload_date': '20161111',

1483

'uploader': 'Team PGP',

1484

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1485

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1492

'description': 'md5:e03b909557865076822aa169218d6a5d',

1493

'duration': 10995,

1494

'upload_date': '20161111',

1495

'uploader': 'Team PGP',

1496

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1497

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1504

'description': 'md5:e03b909557865076822aa169218d6a5d',

1505

'duration': 10990,

1506

'upload_date': '20161111',

1507

'uploader': 'Team PGP',

1508

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1509

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1514

},

1515

'skip': 'Not multifeed anymore',

1516

},

1517

{

1518

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1519

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1520

'info_dict': {

1521

'id': 'gVfLd0zydlo',

1522

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1523

},

1524

'playlist_count': 2,

1525

'skip': 'Not multifeed anymore',

1526

},

1527

{

1528

'url': 'https://vid.plus/FlRa-iH7PGw',

1529

'only_matching': True,

1530

},

1531

{

1532

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1533

'only_matching': True,

1534

},

1535

{

1536

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1537

# Also tests cut-off URL expansion in video description (see

1538

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1539

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1540

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1545

'alt_title': 'Dark Walk',

1546

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1547

'duration': 133,

1548

'upload_date': '20151119',

1549

'uploader_id': 'IronSoulElf',

1550

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1551

'uploader': 'IronSoulElf',

1552

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1553

'track': 'Dark Walk',

1554

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1555

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1556

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1557

'categories': ['Film & Animation'],

1558

'view_count': int,

1559

'live_status': 'not_live',

1560

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1561

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1562

'tags': 'count:13',

1563

'availability': 'public',

1564

'channel': 'IronSoulElf',

1565

'playable_in_embed': True,

1566

'like_count': int,

1567

'age_limit': 0,

1568

'channel_follower_count': int

1569

},

1570

'params': {

1571

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1576

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1577

'only_matching': True,

1578

},

1579

{

1580

# Video with yt:stretch=17:0

1581

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1586

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1587

'upload_date': '20151107',

1588

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1589

'uploader': 'CH GAMER DROID',

1590

},

1591

'params': {

1592

'skip_download': True,

1593

},

1594

'skip': 'This video does not exist.',

1595

},

1596

{

1597

# Video with incomplete 'yt:stretch=16:'

1598

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1599

'only_matching': True,

1600

},

1601

{

1602

# Video licensed under Creative Commons

1603

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1608

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1609

'duration': 721,

1610

'upload_date': '20150128',

1611

'uploader_id': 'BerkmanCenter',

1612

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1613

'uploader': 'The Berkman Klein Center for Internet & Society',

1614

'license': 'Creative Commons Attribution license (reuse allowed)',

1615

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1616

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1617

'like_count': int,

1618

'age_limit': 0,

1619

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1620

'channel': 'The Berkman Klein Center for Internet & Society',

1621

'availability': 'public',

1622

'view_count': int,

1623

'categories': ['Education'],

1624

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1625

'live_status': 'not_live',

1626

'playable_in_embed': True,

1627

'comment_count': int,

1628

'channel_follower_count': int

1629

},

1630

'params': {

1631

'skip_download': True,

},

},

{

# Channel-like uploader_url

1636

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1641

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1642

'duration': 4060,

1643

'upload_date': '20151120',

1644

'uploader': 'Bernie Sanders',

1645

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1646

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1647

'license': 'Creative Commons Attribution license (reuse allowed)',

1648

'playable_in_embed': True,

1649

'tags': 'count:12',

1650

'like_count': int,

1651

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1652

'age_limit': 0,

1653

'availability': 'public',

1654

'categories': ['News & Politics'],

1655

'channel': 'Bernie Sanders',

1656

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1657

'view_count': int,

1658

'live_status': 'not_live',

1659

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1660

'comment_count': int,

1661

'channel_follower_count': int

1662

},

1663

'params': {

1664

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1669

'only_matching': True,

1670

},

1671

{

1672

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1673

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1674

'only_matching': True,

1675

},

1676

{

1677

# Rental video preview

1678

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1683

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1684

'upload_date': '20150811',

1685

'uploader': 'FlixMatrix',

1686

'uploader_id': 'FlixMatrixKaravan',

1687

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1688

'license': 'Standard YouTube License',

1689

},

1690

'params': {

1691

'skip_download': True,

1692

},

1693

'skip': 'This video is not available.',

1694

},

1695

{

1696

# YouTube Red video with episode data

1697

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1702

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1703

'duration': 2085,

1704

'upload_date': '20170118',

1705

'uploader': 'Vsauce',

1706

'uploader_id': 'Vsauce',

1707

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1708

'series': 'Mind Field',

1709

'season_number': 1,

1710

'episode_number': 1,

1711

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1712

'tags': 'count:12',

1713

'view_count': int,

1714

'availability': 'public',

1715

'age_limit': 0,

1716

'channel': 'Vsauce',

1717

'episode': 'Episode 1',

1718

'categories': ['Entertainment'],

1719

'season': 'Season 1',

1720

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1721

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1722

'like_count': int,

1723

'playable_in_embed': True,

1724

'live_status': 'not_live',

1725

'channel_follower_count': int

1726

},

1727

'params': {

1728

'skip_download': True,

1729

},

1730

'expected_warnings': [

1731

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1736

# as inappropriate or offensive to some audiences.

1737

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1742

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1743

'duration': 965,

1744

'upload_date': '20140124',

1745

'uploader': 'New Century Foundation',

1746

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1747

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1748

},

1749

'params': {

1750

'skip_download': True,

1751

},

1752

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1757

'only_matching': True,

1758

},

1759

{

1760

# geo restricted to JP

1761

'url': 'sJL6WA-aGkQ',

1762

'only_matching': True,

1763

},

1764

{

1765

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1766

'only_matching': True,

1767

},

1768

{

1769

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1770

'only_matching': True,

1771

},

1772

{

1773

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1774

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1775

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1780

'only_matching': True,

1781

},

1782

{

1783

# Video with unsupported adaptive stream type formats

1784

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1789

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1790

'duration': 433,

1791

'upload_date': '20130923',

1792

'uploader': 'Amelia Putri Harwita',

1793

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1794

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1795

'formats': 'maxcount:10',

1796

},

1797

'params': {

1798

'skip_download': True,

1799

'youtube_include_dash_manifest': False,

1800

},

1801

'skip': 'not actual anymore',

1802

},

1803

{

1804

# Youtube Music Auto-generated description

1805

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1810

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1811

'upload_date': '20190312',

1812

'uploader': 'Stephen - Topic',

1813

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1814

'artist': 'Stephen',

1815

'track': 'Voyeur Girl',

1816

'album': 'it\'s too much love to know my dear',

1817

'release_date': '20190313',

1818

'release_year': 2019,

1819

'alt_title': 'Voyeur Girl',

1820

'view_count': int,

1821

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1822

'playable_in_embed': True,

1823

'like_count': int,

1824

'categories': ['Music'],

1825

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1826

'channel': 'Stephen',

1827

'availability': 'public',

1828

'creator': 'Stephen',

1829

'duration': 169,

1830

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1831

'age_limit': 0,

1832

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1833

'tags': 'count:11',

1834

'live_status': 'not_live',

1835

'channel_follower_count': int

1836

},

1837

'params': {

1838

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1843

'only_matching': True,

1844

},

1845

{

1846

# invalid -> valid video id redirection

1847

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1852

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1853

'upload_date': '20090125',

1854

'uploader': 'Prochorowka',

1855

'uploader_id': 'Prochorowka',

1856

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1857

'artist': 'Panjabi MC',

1858

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1859

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1860

},

1861

'params': {

1862

'skip_download': True,

1863

},

1864

'skip': 'Video unavailable',

1865

},

1866

{

1867

# empty description results in an empty string

1868

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1875

'uploader_id': 'ElevageOrVert',

1876

'uploader': 'ElevageOrVert',

1877

'view_count': int,

1878

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1879

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1880

'like_count': int,

1881

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1882

'tags': [],

1883

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1884

'availability': 'public',

1885

'age_limit': 0,

1886

'categories': ['Pets & Animals'],

1887

'duration': 7,

1888

'playable_in_embed': True,

1889

'live_status': 'not_live',

1890

'channel': 'ElevageOrVert',

1891

'channel_follower_count': int

1892

},

1893

'params': {

1894

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1899

# see [2] for an example with '};' inside ytInitialPlayerResponse

1900

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1901

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1902

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1907

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1908

'upload_date': '20130831',

1909

'uploader_id': 'kudvenkat',

1910

'uploader': 'kudvenkat',

1911

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1912

'like_count': int,

1913

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1914

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1915

'live_status': 'not_live',

1916

'categories': ['Education'],

1917

'availability': 'public',

1918

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1919

'tags': 'count:12',

1920

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1925

'comment_count': int,

1926

'channel_follower_count': int

1927

},

1928

'params': {

1929

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1934

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1935

'only_matching': True,

1936

},

1937

{

1938

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1939

'only_matching': True,

1940

},

1941

{

1942

# https://github.com/ytdl-org/youtube-dl/pull/28094

1943

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1949

'upload_date': '20141120',

1950

'uploader': 'The Cinematic Orchestra - Topic',

1951

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1952

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1953

'artist': 'The Cinematic Orchestra',

1954

'track': 'Burn Out',

1955

'album': 'Every Day',

1956

'like_count': int,

1957

'live_status': 'not_live',

1958

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1963

'creator': 'The Cinematic Orchestra',

1964

'channel': 'The Cinematic Orchestra',

1965

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1966

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1967

'availability': 'public',

1968

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1969

'categories': ['Music'],

1970

'playable_in_embed': True,

1971

'channel_follower_count': int

1972

},

1973

'params': {

1974

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1979

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1980

'only_matching': True,

1981

},

1982

{

1983

# controversial video, requires bpctr/contentCheckOk

1984

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1989

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1990

'uploader': 'CBS Mornings',

1991

'uploader_id': 'CBSThisMorning',

1992

'upload_date': '20140716',

1993

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1994

'duration': 170,

1995

'categories': ['News & Politics'],

1996

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1997

'view_count': int,

1998

'channel': 'CBS Mornings',

1999

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

2000

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

2001

'age_limit': 18,

2002

'availability': 'needs_auth',

2003

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2004

'like_count': int,

2005

'live_status': 'not_live',

2006

'playable_in_embed': True,

2007

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2012

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2017

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2018

'upload_date': '20201120',

2019

'uploader': 'Walk around Japan',

2020

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2021

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2022

'duration': 1456,

2023

'categories': ['Travel & Events'],

2024

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2025

'view_count': int,

2026

'channel': 'Walk around Japan',

2027

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2028

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2029

'age_limit': 0,

2030

'availability': 'public',

2031

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2032

'live_status': 'not_live',

2033

'playable_in_embed': True,

2034

'channel_follower_count': int

2035

},

2036

'params': {

2037

'skip_download': True,

2038

},

2039

}, {

2040

# Has multiple audio streams

2041

'url': 'WaOKSUlf4TM',

2042

'only_matching': True

2043

}, {

2044

# Requires Premium: has format 141 when requested using YTM url

2045

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2046

'only_matching': True

2047

}, {

2048

# multiple subtitles with same lang_code

2049

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2050

'only_matching': True,

2051

}, {

2052

# Force use android client fallback

2053

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2054

'info_dict': {

2055

'id': 'YOelRv7fMxY',

2056

'title': 'DIGGING A SECRET TUNNEL Part 1',

2057

'ext': '3gp',

2058

'upload_date': '20210624',

2059

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2060

'uploader': 'colinfurze',

2061

'uploader_id': 'colinfurze',

2062

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2063

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2064

'duration': 596,

2065

'categories': ['Entertainment'],

2066

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2067

'view_count': int,

2068

'channel': 'colinfurze',

2069

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2070

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2071

'age_limit': 0,

2072

'availability': 'public',

2073

'like_count': int,

2074

'live_status': 'not_live',

2075

'playable_in_embed': True,

2076

'channel_follower_count': int

2077

},

2078

'params': {

2079

'format': '17', # 3gp format available on android

2080

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2085

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2086

'only_matching': True,

2087

'params': {

2088

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2093

'only_matching': True,

2094

}, {

2095

'note': 'Storyboards',

2096

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2102

'uploader_id': 'scishow',

2103

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2104

'upload_date': '20140324',

2105

'uploader': 'SciShow',

2106

'like_count': int,

2107

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2108

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2109

'view_count': int,

2110

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2111

'playable_in_embed': True,

2112

'tags': 'count:12',

2113

'uploader_url': 'http://www.youtube.com/user/scishow',

2114

'availability': 'public',

2115

'channel': 'SciShow',

2116

'live_status': 'not_live',

2117

'duration': 248,

2118

'categories': ['Education'],

2119

'age_limit': 0,

2120

'channel_follower_count': int

2121

}, 'params': {'format': 'mhtml', 'skip_download': True}

2122

}, {

2123

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2124

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2129

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2130

'uploader': 'Leon Nguyen',

2131

'uploader_id': 'VNSXIII',

2132

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2133

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2134

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2139

'tags': 'count:23',

2140

'playable_in_embed': True,

2141

'live_status': 'not_live',

2142

'upload_date': '20220103',

2143

'like_count': int,

2144

'availability': 'public',

2145

'channel': 'Leon Nguyen',

2146

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2147

'comment_count': int,

2148

'channel_follower_count': int

2149

}

2150

}, {

2151

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2152

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2157

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2158

'uploader': 'Quackity',

2159

'uploader_id': 'QuackityHQ',

2160

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2161

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2162

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2167

'tags': 'count:26',

2168

'playable_in_embed': True,

2169

'live_status': 'not_live',

2170

'release_timestamp': 1641172509,

2171

'release_date': '20220103',

2172

'upload_date': '20220103',

2173

'like_count': int,

2174

'availability': 'public',

2175

'channel': 'Quackity',

2176

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2177

'channel_follower_count': int

2178

}

2179

},

2180

{ # continuous livestream. Microformat upload date should be preferred.

2181

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2182

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2183

'info_dict': {

2184

'id': 'kgx4WGK0oNU',

2185

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2186

'ext': 'mp4',

2187

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2188

'availability': 'public',

2189

'age_limit': 0,

2190

'release_timestamp': 1637975704,

2191

'upload_date': '20210619',

2192

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2193

'live_status': 'is_live',

2194

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2195

'uploader': '阿鲍Abao',

2196

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2197

'channel': 'Abao in Tokyo',

2198

'channel_follower_count': int,

2199

'release_date': '20211127',

2200

'tags': 'count:39',

2201

'categories': ['People & Blogs'],

2202

'like_count': int,

2203

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2204

'view_count': int,

2205

'playable_in_embed': True,

2206

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2207

},

2208

'params': {'skip_download': True}

2209

}, {

2210

# Story. Requires specific player params to work.

2211

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2216

'view_count': int,

2217

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2218

'upload_date': '20220526',

2219

'categories': ['Education'],

2220

'title': 'Story',

2221

'channel': 'IT\'S HISTORY',

2222

'description': '',

2223

'uploader_id': 'BlastfromthePast',

2224

'duration': 12,

2225

'uploader': 'IT\'S HISTORY',

2226

'playable_in_embed': True,

2227

'age_limit': 0,

2228

'live_status': 'not_live',

2229

'tags': [],

2230

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2231

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2232

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2233

},

2234

'skip': 'stories get removed after some period of time',

2235

}, {

2236

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2241

'upload_date': '20220323',

2242

'like_count': int,

2243

'availability': 'unlisted',

2244

'channel': 'nao20010128nao',

2245

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2246

'age_limit': 0,

2247

'uploader': 'nao20010128nao',

2248

'uploader_id': 'nao20010128nao',

2249

'categories': ['Music'],

2250

'view_count': int,

2251

'description': '',

2252

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2253

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2254

'live_status': 'not_live',

2255

'playable_in_embed': True,

2256

'channel_follower_count': int,

2257

'duration': 6,

2258

'tags': [],

2259

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

2260

}

2261

}, {

2262

'note': '6 channel audio',

2263

'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',

2264

'only_matching': True,

}

]

_WEBPAGE_TESTS = [

# YouTube <object> embed

2270

{

2271

'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',

2272

'md5': '873c81d308b979f0e23ee7e620b312a3',

'info_dict': {

'id': 'msN87y-iEx0',

'ext': 'mp4',

'title': 'Feynman: Mirrors FUN TO IMAGINE 6',

2277

'upload_date': '20080526',

2278

'description': 'md5:873c81d308b979f0e23ee7e620b312a3',

2279

'uploader': 'Christopher Sykes',

2280

'uploader_id': 'ChristopherJSykes',

2281

'age_limit': 0,

2282

'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],

2283

'channel_id': 'UCCeo--lls1vna5YJABWAcVA',

2284

'playable_in_embed': True,

2285

'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',

2286

'like_count': int,

2287

'comment_count': int,

2288

'channel': 'Christopher Sykes',

2289

'live_status': 'not_live',

2290

'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',

2291

'availability': 'public',

2292

'duration': 195,

2293

'view_count': int,

2294

'categories': ['Science & Technology'],

2295

'channel_follower_count': int,

2296

'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',

2297

},

2298

'params': {

2299

'skip_download': True,

}

},

]

@classmethod

def suitable(cls, url):

2306

from ..utils import parse_qs

2307

2308

qs = parse_qs(url)

2309

if qs.get('list', [None])[0]:

2310

return False

2311

return super().suitable(url)

2312

2313

def __init__(self, *args, **kwargs):

2314

super().__init__(*args, **kwargs)

2315

self._code_cache = {}

2316

self._player_cache = {}

2317

2318

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2319

lock = threading.Lock()

2320

2321

is_live = True

2322

start_time = time.time()

2323

formats = [f for f in formats if f.get('is_from_start')]

2324

2325

def refetch_manifest(format_id, delay):

2326

nonlocal formats, start_time, is_live

2327

if time.time() <= start_time + delay:

2328

return

2329

2330

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2331

video_details = traverse_obj(

2332

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2333

microformats = traverse_obj(

2334

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2335

expected_type=dict, default=[])

2336

_, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)

2337

start_time = time.time()

2338

2339

def mpd_feed(format_id, delay):

2340

"""

2341

@returns (manifest_url, manifest_stream_number, is_live) or None

2342

"""

2343

with lock:

2344

refetch_manifest(format_id, delay)

2345

2346

f = next((f for f in formats if f['format_id'] == format_id), None)

2347

if not f:

2348

if not is_live:

2349

self.to_screen(f'{video_id}: Video is no longer live')

2350

else:

2351

self.report_warning(

2352

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2353

return None

2354

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2359

f['fragments'] = functools.partial(

2360

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2361

2362

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2363

FETCH_SPAN, MAX_DURATION = 5, 432000

2364

2365

mpd_url, stream_number, is_live = None, None, True

2366

2367

begin_index = 0

2368

download_start_time = ctx.get('start') or time.time()

2369

2370

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2371

if lack_early_segments:

2372

self.report_warning(bug_reports_message(

2373

'Starting download from the last 120 hours of the live stream since '

2374

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2375

lack_early_segments = True

2376

2377

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2378

fragments, fragment_base_url = None, None

2379

2380

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2381

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2382

# Obtain from MPD's maximum seq value

2383

old_mpd_url = mpd_url

2384

last_error = ctx.pop('last_error', None)

2385

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2386

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2387

or (mpd_url, stream_number, False))

2388

if not refresh_sequence:

2389

if expire_fast and not is_live:

2390

return False, last_seq

2391

elif old_mpd_url == mpd_url:

2392

return True, last_seq

2393

try:

2394

fmts, _ = self._extract_mpd_formats_and_subtitles(

2395

mpd_url, None, note=False, errnote=False, fatal=False)

2396

except ExtractorError:

2397

fmts = None

2398

if not fmts:

2399

no_fragment_score += 2

2400

return False, last_seq

2401

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2402

fragments = fmt_info['fragments']

2403

fragment_base_url = fmt_info['fragment_base_url']

2404

assert fragment_base_url

2405

2406

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2407

return True, _last_seq

2408

2409

while is_live:

2410

fetch_time = time.time()

2411

if no_fragment_score > 30:

2412

return

2413

if last_segment_url:

2414

# Obtain from "X-Head-Seqnum" header value from each segment

2415

try:

2416

urlh = self._request_webpage(

2417

last_segment_url, None, note=False, errnote=False, fatal=False)

2418

except ExtractorError:

2419

urlh = None

2420

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2421

if last_seq is None:

2422

no_fragment_score += 2

2423

last_segment_url = None

2424

continue

2425

else:

2426

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2427

no_fragment_score += 2

2428

if not should_continue:

2429

continue

2430

2431

if known_idx > last_seq:

2432

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2438

# skip from the start when it's negative value

2439

known_idx = last_seq + begin_index

2440

if lack_early_segments:

2441

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2442

try:

2443

for idx in range(known_idx, last_seq):

2444

# do not update sequence here or you'll get skipped some part of it

2445

should_continue, _ = _extract_sequence_from_mpd(False, False)

2446

if not should_continue:

2447

known_idx = idx - 1

2448

raise ExtractorError('breaking out of outer loop')

2449

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2450

yield {

2451

'url': last_segment_url,

2452

'fragment_count': last_seq,

2453

}

2454

if known_idx == last_seq:

2455

no_fragment_score += 5

2456

else:

2457

no_fragment_score = 0

2458

known_idx = last_seq

2459

except ExtractorError:

2460

continue

2461

2462

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2463

2464

def _extract_player_url(self, *ytcfgs, webpage=None):

2465

player_url = traverse_obj(

2466

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2467

get_all=False, expected_type=str)

2468

if not player_url:

2469

return

2470

return urljoin('https://www.youtube.com', player_url)

2471

2472

def _download_player_url(self, video_id, fatal=False):

2473

res = self._download_webpage(

2474

'https://www.youtube.com/iframe_api',

2475

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2476

if res:

2477

player_version = self._search_regex(

2478

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2479

if player_version:

2480

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2481

2482

def _signature_cache_id(self, example_sig):

2483

""" Return a string representation of a signature """

2484

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2485

2486

@classmethod

2487

def _extract_player_info(cls, player_url):

2488

for player_re in cls._PLAYER_INFO_RE:

2489

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2494

return id_m.group('id')

2495

2496

def _load_player(self, video_id, player_url, fatal=True):

2497

player_id = self._extract_player_info(player_url)

2498

if player_id not in self._code_cache:

2499

code = self._download_webpage(

2500

player_url, video_id, fatal=fatal,

2501

note='Downloading player ' + player_id,

2502

errnote='Download of %s failed' % player_url)

2503

if code:

2504

self._code_cache[player_id] = code

2505

return self._code_cache.get(player_id)

2506

2507

def _extract_signature_function(self, video_id, player_url, example_sig):

2508

player_id = self._extract_player_info(player_url)

2509

2510

# Read from filesystem cache

2511

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2512

assert os.path.basename(func_id) == func_id

2513

2514

self.write_debug(f'Extracting signature function {func_id}')

2515

cache_spec = self.cache.load('youtube-sigfuncs', func_id)

2516

if cache_spec is not None:

2517

return lambda s: ''.join(s[i] for i in cache_spec)

2518

2519

code = self._load_player(video_id, player_url)

2520

if code:

2521

res = self._parse_sig_js(code)

2522

2523

test_string = ''.join(map(chr, range(len(example_sig))))

2524

cache_res = res(test_string)

2525

cache_spec = [ord(c) for c in cache_res]

2526

2527

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2528

return res

2529

2530

def _print_sig_code(self, func, example_sig):

2531

if not self.get_param('youtube_print_sig_code'):

2532

return

2533

2534

def gen_sig_code(idxs):

2535

def _genslice(start, end, step):

2536

starts = '' if start == 0 else str(start)

2537

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2538

steps = '' if step == 1 else (':%d' % step)

2539

return f's[{starts}{ends}{steps}]'

2540

2541

step = None

2542

# Quelch pyflakes warnings - start will be set when step is set

2543

start = '(Never used)'

2544

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2549

step = None

2550

continue

2551

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2561

2562

test_string = ''.join(map(chr, range(len(example_sig))))

2563

cache_res = func(test_string)

2564

cache_spec = [ord(c) for c in cache_res]

2565

expr_code = ' + '.join(gen_sig_code(cache_spec))

2566

signature_id_tuple = '(%s)' % (

2567

', '.join(str(len(p)) for p in example_sig.split('.')))

2568

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2569

' return %s\n') % (signature_id_tuple, expr_code)

2570

self.to_screen('Extracted signature function:\n' + code)

2571

2572

def _parse_sig_js(self, jscode):

2573

funcname = self._search_regex(

2574

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2575

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2576

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2577

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2578

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2579

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2580

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2581

# Obsolete patterns

2582

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2583

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2584

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2585

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2586

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2587

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2588

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2589

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2590

jscode, 'Initial JS player signature function name', group='sig')

2591

2592

jsi = JSInterpreter(jscode)

2593

initial_function = jsi.extract_function(funcname)

2594

return lambda s: initial_function([s])

2595

2596

def _decrypt_signature(self, s, video_id, player_url):

2597

"""Turn the encrypted s field into a working signature"""

2598

try:

2599

player_id = (player_url, self._signature_cache_id(s))

2600

if player_id not in self._player_cache:

2601

func = self._extract_signature_function(video_id, player_url, s)

2602

self._player_cache[player_id] = func

2603

func = self._player_cache[player_id]

2604

self._print_sig_code(func, s)

2605

return func(s)

2606

except Exception as e:

2607

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2608

2609

def _decrypt_nsig(self, s, video_id, player_url):

2610

"""Turn the encrypted n field into a working signature"""

2611

if player_url is None:

2612

raise ExtractorError('Cannot decrypt nsig without player_url')

2613

player_url = urljoin('https://www.youtube.com', player_url)

2614

2615

sig_id = ('nsig_value', s)

2616

if sig_id in self._player_cache:

2617

return self._player_cache[sig_id]

2618

2619

try:

2620

player_id = ('nsig', player_url)

2621

if player_id not in self._player_cache:

2622

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2623

func = self._player_cache[player_id]

2624

self._player_cache[sig_id] = func(s)

2625

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2626

return self._player_cache[sig_id]

2627

except Exception as e:

2628

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2629

2630

def _extract_n_function_name(self, jscode):

2631

nfunc, idx = self._search_regex(

2632

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2633

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2634

if not idx:

2635

return nfunc

2636

return json.loads(js_to_json(self._search_regex(

2637

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2638

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2639

2640

def _extract_n_function(self, video_id, player_url):

2641

player_id = self._extract_player_info(player_url)

2642

func_code = self.cache.load('youtube-nsig', player_id)

2643

2644

if func_code:

2645

jsi = JSInterpreter(func_code)

2646

else:

2647

jscode = self._load_player(video_id, player_url)

2648

funcname = self._extract_n_function_name(jscode)

2649

jsi = JSInterpreter(jscode)

2650

func_code = jsi.extract_function_code(funcname)

2651

self.cache.store('youtube-nsig', player_id, func_code)

2652

2653

if self.get_param('youtube_print_sig_code'):

2654

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2655

2656

func = jsi.extract_function_from_code(*func_code)

2657

return lambda s: func([s])

2658

2659

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2660

"""

2661

Extract signatureTimestamp (sts)

2662

Required to tell API what sig/player version is in use.

2663

"""

2664

sts = None

2665

if isinstance(ytcfg, dict):

2666

sts = int_or_none(ytcfg.get('STS'))

2667

2668

if not sts:

2669

# Attempt to extract from player

2670

if player_url is None:

2671

error_msg = 'Cannot extract signature timestamp without player_url.'

2672

if fatal:

2673

raise ExtractorError(error_msg)

2674

self.report_warning(error_msg)

2675

return

2676

code = self._load_player(video_id, player_url, fatal=fatal)

2677

if code:

2678

sts = int_or_none(self._search_regex(

2679

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2680

'JS player signature timestamp', group='sts', fatal=fatal))

2681

return sts

2682

2683

def _mark_watched(self, video_id, player_responses):

2684

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2685

label = 'fully ' if is_full else ''

2686

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2687

expected_type=url_or_none)

2688

if not url:

2689

self.report_warning(f'Unable to mark {label}watched')

2690

return

2691

parsed_url = urllib.parse.urlparse(url)

2692

qs = urllib.parse.parse_qs(parsed_url.query)

2693

2694

# cpn generation algorithm is reverse engineered from base.js.

2695

# In fact it works even with dummy cpn.

2696

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2697

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2698

2699

# # more consistent results setting it to right before the end

2700

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2711

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2718

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2719

2720

self._download_webpage(

2721

url, video_id, f'Marking {label}watched',

2722

'Unable to mark watched', fatal=False)

2723

2724

@classmethod

2725

def _extract_from_webpage(cls, url, webpage):

2726

# Invidious Instances

2727

# https://github.com/yt-dlp/yt-dlp/issues/195

2728

# https://github.com/iv-org/invidious/pull/1730

2729

mobj = re.search(

2730

r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',

2731

webpage)

2732

if mobj:

2733

yield cls.url_result(mobj.group('url'), cls)

2734

raise cls.StopExtraction()

2735

2736

yield from super()._extract_from_webpage(url, webpage)

2737

2738

# lazyYT YouTube embed

2739

for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):

2740

yield cls.url_result(unescapeHTML(id_), cls, id_)

2741

2742

# Wordpress "YouTube Video Importer" plugin

2743

for m in re.findall(r'''(?x)<div[^>]+

2744

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2745

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):

2746

yield cls.url_result(m[-1], cls, m[-1])

2747

2748

@classmethod

2749

def extract_id(cls, url):

2750

video_id = cls.get_temp_id(url)

2751

if not video_id:

2752

raise ExtractorError(f'Invalid URL: {url}')

2753

return video_id

2754

2755

def _extract_chapters_from_json(self, data, duration):

2756

chapter_list = traverse_obj(

2757

data, (

2758

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2759

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2760

), expected_type=list)

2761

2762

return self._extract_chapters(

2763

chapter_list,

2764

chapter_time=lambda chapter: float_or_none(

2765

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2766

chapter_title=lambda chapter: traverse_obj(

2767

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2768

duration=duration)

2769

2770

def _extract_chapters_from_engagement_panel(self, data, duration):

2771

content_list = traverse_obj(

2772

data,

2773

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2774

expected_type=list, default=[])

2775

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2776

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2777

2778

return next(filter(None, (

2779

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2780

chapter_time, chapter_title, duration)

2781

for contents in content_list)), [])

2782

2783

def _extract_chapters_from_description(self, description, duration):

2784

return self._extract_chapters(

2785

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2786

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2787

duration=duration, strict=False)

2788

2789

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2794

'title': chapter_title(chapter),

2795

} for chapter in chapter_list or []]

2796

if not strict:

2797

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2798

2799

chapters = [{'start_time': 0}]

2800

for idx, chapter in enumerate(chapter_list):

2801

if chapter['start_time'] is None:

2802

self.report_warning(f'Incomplete chapter {idx}')

2803

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2804

chapters.append(chapter)

2805

else:

2806

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2807

return chapters[1:]

2808

2809

def _extract_comment(self, comment_renderer, parent=None):

2810

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2815

2816

# note: timestamp is an estimate calculated from the current time and time_text

2817

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2818

author = self._get_text(comment_renderer, 'authorText')

2819

author_id = try_get(comment_renderer,

2820

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

2821

2822

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2823

lambda x: x['likeCount']), str)) or 0

2824

author_thumbnail = try_get(comment_renderer,

2825

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

2826

2827

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2828

is_favorited = 'creatorHeart' in (try_get(

2829

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2834

'time_text': time_text,

2835

'like_count': votes,

2836

'is_favorited': is_favorited,

2837

'author': author,

2838

'author_id': author_id,

2839

'author_thumbnail': author_thumbnail,

2840

'author_is_uploader': author_is_uploader,

2841

'parent': parent or 'root'

2842

}

2843

2844

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2845

2846

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2847

2848

def extract_header(contents):

2849

_continuation = None

2850

for content in contents:

2851

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2852

expected_comment_count = self._get_count(

2853

comments_header_renderer, 'countText', 'commentsCount')

2854

2855

if expected_comment_count:

2856

tracker['est_total'] = expected_comment_count

2857

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2858

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2859

2860

sort_menu_item = try_get(

2861

comments_header_renderer,

2862

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2863

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2864

2865

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2866

if not _continuation:

2867

continue

2868

2869

sort_text = str_or_none(sort_menu_item.get('title'))

2870

if not sort_text:

2871

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2872

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2877

if not parent:

2878

tracker['current_page_thread'] = 0

2879

for content in contents:

2880

if not parent and tracker['total_parent_comments'] >= max_parents:

2881

yield

2882

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2883

comment_renderer = get_first(

2884

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2885

expected_type=dict, default={})

2886

2887

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2892

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2893

yield comment

2894

2895

# Attempt to get the replies

2896

comment_replies_renderer = try_get(

2897

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2898

2899

if comment_replies_renderer:

2900

tracker['current_page_thread'] += 1

2901

comment_entries_iter = self._comment_entries(

2902

comment_replies_renderer, ytcfg, video_id,

2903

parent=comment.get('id'), tracker=tracker)

2904

yield from itertools.islice(comment_entries_iter, min(

2905

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2906

2907

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2913

total_parent_comments=0,

2914

total_reply_comments=0)

2915

2916

# TODO: Deprecated

2917

# YouTube comments have a max depth of 2

2918

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2919

if max_depth:

2920

self._downloader.deprecation_warning(

2921

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2922

if max_depth == 1 and parent:

2923

return

2924

2925

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2926

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2927

2928

continuation = self._extract_continuation(root_continuation_data)

2929

2930

response = None

2931

is_forced_continuation = False

2932

is_first_continuation = parent is None

2933

if is_first_continuation and not continuation:

2934

# Sometimes you can get comments by generating the continuation yourself,

2935

# even if YouTube initially reports them being disabled - e.g. stories comments.

2936

# Note: if the comment section is actually disabled, YouTube may return a response with

2937

# required check_get_keys missing. So we will disable that check initially in this case.

2938

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2939

is_forced_continuation = True

2940

2941

for page_num in itertools.count(0):

2942

if not continuation:

2943

break

2944

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2945

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2946

if page_num == 0:

2947

if is_first_continuation:

2948

note_prefix = 'Downloading comment section API JSON'

2949

else:

2950

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2951

tracker['current_page_thread'], comment_prog_str)

2952

else:

2953

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2954

' ' if parent else '', ' replies' if parent else '',

2955

page_num, comment_prog_str)

2956

2957

response = self._extract_response(

2958

item_id=None, query=continuation,

2959

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2960

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2961

is_forced_continuation = False

2962

continuation_contents = traverse_obj(

2963

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2964

2965

continuation = None

2966

for continuation_section in continuation_contents:

2967

continuation_items = traverse_obj(

2968

continuation_section,

2969

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2970

get_all=False, expected_type=list) or []

2971

if is_first_continuation:

2972

continuation = extract_header(continuation_items)

2973

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2987

if message and not parent and tracker['running_total'] == 0:

2988

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2989

2990

@staticmethod

2991

def _generate_comment_continuation(video_id):

2992

"""

2993

Generates initial comment section continuation token from given video id

2994

"""

2995

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2996

return base64.b64encode(token.encode()).decode()

2997

2998

def _get_comments(self, ytcfg, video_id, contents, webpage):

2999

"""Entry for comment extraction"""

3000

def _real_comment_extract(contents):

3001

renderer = next((

3002

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

3003

if item.get('sectionIdentifier') == 'comment-item-section'), None)

3004

yield from self._comment_entries(renderer, ytcfg, video_id)

3005

3006

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

3007

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

3008

3009

@staticmethod

3010

def _get_checkok_params():

3011

return {'contentCheckOk': True, 'racyCheckOk': True}

3012

3013

@classmethod

3014

def _generate_player_context(cls, sts=None):

3015

context = {

3016

'html5Preference': 'HTML5_PREF_WANTS',

3017

}

3018

if sts is not None:

3019

context['signatureTimestamp'] = sts

3020

return {

3021

'playbackContext': {

3022

'contentPlaybackContext': context

3023

},

3024

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

3029

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

3030

return True

3031

3032

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3033

AGE_GATE_REASONS = (

3034

'confirm your age', 'age-restricted', 'inappropriate', # reason

3035

'age_verification_required', 'age_check_required', # status

3036

)

3037

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3038

3039

@staticmethod

3040

def _is_unplayable(player_response):

3041

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3042

3043

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3044

3045

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3046

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3047

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3048

headers = self.generate_api_headers(

3049

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3054

}

3055

yt_query.update(self._generate_player_context(sts))

3056

return self._extract_response(

3057

item_id=video_id, ep='player', query=yt_query,

3058

ytcfg=player_ytcfg, headers=headers, fatal=True,

3059

default_client=client,

3060

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3061

) or None

3062

3063

def _get_requested_clients(self, url, smuggled_data):

3064

requested_clients = []

3065

default = ['android', 'web']

3066

allowed_clients = sorted(

3067

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3068

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3069

for client in self._configuration_arg('player_client'):

3070

if client in allowed_clients:

3071

requested_clients.append(client)

3072

elif client == 'default':

3073

requested_clients.extend(default)

3074

elif client == 'all':

3075

requested_clients.extend(allowed_clients)

3076

else:

3077

self.report_warning(f'Skipping unsupported client {client}')

3078

if not requested_clients:

3079

requested_clients = default

3080

3081

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3082

requested_clients.extend(

3083

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3084

3085

return orderedSet(requested_clients)

3086

3087

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3088

initial_pr = None

3089

if webpage:

3090

initial_pr = self._search_json(

3091

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3092

3093

all_clients = set(clients)

3094

clients = clients[::-1]

3095

prs = []

3096

3097

def append_client(*client_names):

3098

""" Append the first client name that exists but not already used """

3099

for client_name in client_names:

3100

actual_client = _split_innertube_client(client_name)[0]

3101

if actual_client in INNERTUBE_CLIENTS:

3102

if actual_client not in all_clients:

3103

clients.append(client_name)

3104

all_clients.add(actual_client)

3105

return

3106

3107

# Android player_response does not have microFormats which are needed for

3108

# extraction of some data. So we return the initial_pr with formats

3109

# stripped out even if not requested by the user

3110

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3111

if initial_pr:

3112

pr = dict(initial_pr)

3113

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3118

player_url = None

3119

while clients:

3120

client, base_client, variant = _split_innertube_client(clients.pop())

3121

player_ytcfg = master_ytcfg if client == 'web' else {}

3122

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3123

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3124

3125

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3126

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3127

if 'js' in self._configuration_arg('player_skip'):

3128

require_js_player = False

3129

player_url = None

3130

3131

if not player_url and not tried_iframe_fallback and require_js_player:

3132

player_url = self._download_player_url(video_id)

3133

tried_iframe_fallback = True

3134

3135

try:

3136

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3137

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3138

except ExtractorError as e:

3139

if last_error:

3140

self.report_warning(last_error)

last_error = e

continue

if pr:

# YouTube may return a different video player response than expected.

3146

# See: https://github.com/TeamNewPipe/NewPipe/issues/8713

3147

pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))

3148

if pr_video_id and pr_video_id != video_id:

3149

self.report_warning(

3150

f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())

else:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3155

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3156

append_client(f'{base_client}_creator')

3157

elif self._is_agegated(pr):

3158

if variant == 'tv_embedded':

3159

append_client(f'{base_client}_embedded')

3160

elif not variant:

3161

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3167

return prs, player_url

3168

3169

def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):

3170

itags, stream_ids = {}, []

3171

itag_qualities, res_qualities = {}, {0: -1}

3172

q = qualities([

3173

# Normally tiny is the smallest video-only formats. But

3174

# audio-only formats with unknown quality may get tagged as tiny

3175

'tiny',

3176

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3177

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3178

])

3179

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3180

3181

for fmt in streaming_formats:

3182

if fmt.get('targetDurationSec'):

3183

continue

3184

3185

itag = str_or_none(fmt.get('itag'))

3186

audio_track = fmt.get('audioTrack') or {}

3187

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3188

if stream_id in stream_ids:

3189

continue

3190

3191

quality = fmt.get('quality')

3192

height = int_or_none(fmt.get('height'))

3193

if quality == 'tiny' or not quality:

3194

quality = fmt.get('audioQuality', '').lower() or quality

3195

# The 3gp format (17) in android client has a quality of "small",

3196

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3202

if height:

3203

res_qualities[height] = quality

3204

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3205

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3206

# number of fragment that would subsequently requested with (`&sq=N`)

3207

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3208

continue

3209

3210

fmt_url = fmt.get('url')

3211

if not fmt_url:

3212

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3213

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3214

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3215

if not all((sc, fmt_url, player_url, encrypted_sig)):

3216

continue

3217

try:

3218

fmt_url += '&%s=%s' % (

3219

traverse_obj(sc, ('sp', -1)) or 'signature',

3220

self._decrypt_signature(encrypted_sig, video_id, player_url)

3221

)

3222

except ExtractorError as e:

3223

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3224

self.write_debug(e, only_once=True)

3225

continue

3226

3227

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3232

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3233

except ExtractorError as e:

3234

self.report_warning(

3235

'nsig extraction failed: You may experience throttling for some formats\n'

3236

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3237

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3242

stream_ids.append(stream_id)

3243

3244

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3245

language_preference = (

3246

10 if audio_track.get('audioIsDefault') and 10

3247

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3248

else -1)

3249

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3250

# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3251

# Make sure to avoid false positives with small duration differences.

3252

# E.g. __2ABJjxzNo, ySuUZEjARPY

3253

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3254

if is_damaged:

3255

self.report_warning(

3256

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3257

dct = {

3258

'asr': int_or_none(fmt.get('audioSampleRate')),

3259

'filesize': int_or_none(fmt.get('contentLength')),

3260

'format_id': itag,

3261

'format_note': join_nonempty(

3262

'%s%s' % (audio_track.get('displayName') or '',

3263

' (default)' if language_preference > 0 else ''),

3264

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3265

try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),

3266

try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),

3267

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3268

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3269

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3270

'fps': int_or_none(fmt.get('fps')) or None,

3271

'audio_channels': fmt.get('audioChannels'),

3272

'height': height,

3273

'quality': q(quality),

3274

'has_drm': bool(fmt.get('drmFamilies')),

3275

'tbr': tbr,

3276

'url': fmt_url,

3277

'width': int_or_none(fmt.get('width')),

3278

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3279

'desc' if language_preference < -1 else ''),

3280

'language_preference': language_preference,

3281

# Strictly de-prioritize damaged and 3gp formats

3282

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3283

}

3284

mime_mobj = re.match(

3285

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3286

if mime_mobj:

3287

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3288

dct.update(parse_codecs(mime_mobj.group(2)))

3289

no_audio = dct.get('acodec') == 'none'

3290

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3296

dct['downloader_options'] = {

3297

# Youtube throttles chunks >~10M

3298

'http_chunk_size': 10485760,

3299

}

3300

if dct.get('ext'):

3301

dct['container'] = dct['ext'] + '_dash'

3302

yield dct

3303

3304

live_from_start = is_live and self.get_param('live_from_start')

3305

skip_manifests = self._configuration_arg('skip')

3306

if not self.get_param('youtube_include_hls_manifest', True):

3307

skip_manifests.append('hls')

3308

if not self.get_param('youtube_include_dash_manifest', True):

3309

skip_manifests.append('dash')

3310

get_dash = 'dash' not in skip_manifests and (

3311

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3312

get_hls = not live_from_start and 'hls' not in skip_manifests

3313

3314

def process_manifest_format(f, proto, itag):

3315

if itag in itags:

3316

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3317

return False

3318

itag = f'{itag}-{proto}'

3319

if itag:

3320

f['format_id'] = itag

3321

itags[itag] = proto

3322

3323

f['quality'] = itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)

3324

if f['quality'] == -1 and f.get('height'):

3325

f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])

return True

subtitles = {}

for sd in streaming_data:

3330

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3331

if hls_manifest_url:

3332

fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)

3333

subtitles = self._merge_subtitles(subs, subtitles)

3334

for f in fmts:

3335

if process_manifest_format(f, 'hls', self._search_regex(

3336

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3337

yield f

3338

3339

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3340

if dash_manifest_url:

3341

formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)

3342

subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH

3343

for f in formats:

3344

if process_manifest_format(f, 'dash', f['format_id']):

3345

f['filesize'] = int_or_none(self._search_regex(

3346

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3347

if live_from_start:

3348

f['is_from_start'] = True

yield f

yield subtitles

def _extract_storyboard(self, player_responses, duration):

3354

spec = get_first(

3355

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3356

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3361

args = args.split('#')

3362

counts = list(map(int_or_none, args[:5]))

3363

if len(args) != 8 or not all(counts):

3364

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3365

continue

3366

width, height, frame_count, cols, rows = counts

3367

N, sigh = args[6:]

3368

3369

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3370

fragment_count = frame_count / (cols * rows)

3371

fragment_duration = duration / fragment_count

3372

yield {

3373

'format_id': f'sb{i}',

3374

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fps': frame_count / duration,

'rows': rows,

'columns': cols,

'fragments': [{

'url': url.replace('$M', str(j)),

3387

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3388

} for j in range(math.ceil(fragment_count))],

3389

}

3390

3391

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3392

webpage = None

3393

if 'webpage' not in self._configuration_arg('player_skip'):

3394

webpage = self._download_webpage(

3395

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3396

3397

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3398

3399

player_responses, player_url = self._extract_player_responses(

3400

self._get_requested_clients(url, smuggled_data),

3401

video_id, webpage, master_ytcfg)

3402

3403

return webpage, master_ytcfg, player_responses, player_url

3404

3405

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3406

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3407

is_live = get_first(video_details, 'isLive')

3408

if is_live is None:

3409

is_live = get_first(live_broadcast_details, 'isLiveNow')

3410

3411

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3412

*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)

3413

3414

return live_broadcast_details, is_live, streaming_data, formats, subtitles

3415

3416

def _real_extract(self, url):

3417

url, smuggled_data = unsmuggle_url(url, {})

3418

video_id = self._match_id(url)

3419

3420

base_url = self.http_scheme() + '//www.youtube.com/'

3421

webpage_url = base_url + 'watch?v=' + video_id

3422

3423

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3424

3425

playability_statuses = traverse_obj(

3426

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3427

3428

trailer_video_id = get_first(

3429

playability_statuses,

3430

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3431

expected_type=str)

3432

if trailer_video_id:

3433

return self.url_result(

3434

trailer_video_id, self.ie_key(), trailer_video_id)

3435

3436

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3437

if webpage else (lambda x: None))

3438

3439

video_details = traverse_obj(

3440

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3441

microformats = traverse_obj(

3442

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3443

expected_type=dict, default=[])

3444

video_title = (

3445

get_first(video_details, 'title')

3446

or self._get_text(microformats, (..., 'title'))

3447

or search_meta(['og:title', 'twitter:title', 'title']))

3448

video_description = get_first(video_details, 'shortDescription')

3449

3450

multifeed_metadata_list = get_first(

3451

player_responses,

3452

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3453

expected_type=str)

3454

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3455

if self.get_param('noplaylist'):

3456

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3461

# Unquote should take place before split on comma (,) since textual

3462

# fields may contain comma as well (see

3463

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3464

feed_data = urllib.parse.parse_qs(

3465

urllib.parse.unquote_plus(feed))

3466

3467

def feed_entry(name):

3468

return try_get(

3469

feed_data, lambda x: x[name][0], str)

3470

3471

feed_id = feed_entry('id')

3472

if not feed_id:

3473

continue

3474

feed_title = feed_entry('title')

3475

title = video_title

3476

if feed_title:

3477

title += ' (%s)' % feed_title

3478

entries.append({

3479

'_type': 'url_transparent',

3480

'ie_key': 'Youtube',

3481

'url': smuggle_url(

3482

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3483

{'force_singlefeed': True}),

3484

'title': title,

3485

})

3486

feed_ids.append(feed_id)

3487

self.to_screen(

3488

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3489

% (', '.join(feed_ids), video_id))

3490

return self.playlist_result(

3491

entries, video_id, video_title, video_description)

3492

3493

duration = int_or_none(

3494

get_first(video_details, 'lengthSeconds')

3495

or get_first(microformats, 'lengthSeconds')

3496

or parse_duration(search_meta('duration'))) or None

3497

3498

live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \

3499

self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3500

3501

if not formats:

3502

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3503

self.report_drm(video_id)

3504

pemr = get_first(

3505

playability_statuses,

3506

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3507

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3508

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3509

if subreason:

3510

if subreason == 'The uploader has not made this video available in your country.':

3511

countries = get_first(microformats, 'availableCountries')

3512

if not countries:

3513

regions_allowed = search_meta('regionsAllowed')

3514

countries = regions_allowed.split(',') if regions_allowed else None

3515

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3516

reason += f'. {subreason}'

3517

if reason:

3518

self.raise_no_formats(reason, expected=True)

3519

3520

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3521

if not keywords and webpage:

3522

keywords = [

3523

unescapeHTML(m.group('content'))

3524

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3525

for keyword in keywords:

3526

if keyword.startswith('yt:stretch='):

3527

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3528

if mobj:

3529

# NB: float is intentional for forcing float division

3530

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3535

f['stretched_ratio'] = ratio

3536

break

3537

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3538

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3539

if thumbnail_url:

3540

thumbnails.append({

3541

'url': thumbnail_url,

3542

})

3543

original_thumbnails = thumbnails.copy()

3544

3545

# The best resolution thumbnails sometimes does not appear in the webpage

3546

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3547

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3548

thumbnail_names = [

3549

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3550

# in resolution, these are not the custom thumbnail. So de-prioritize them

3551

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3552

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3553

]

3554

n_thumbnail_names = len(thumbnail_names)

3555

thumbnails.extend({

3556

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3557

video_id=video_id, name=name, ext=ext,

3558

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3559

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3560

for thumb in thumbnails:

3561

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3562

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3563

self._remove_duplicate_formats(thumbnails)

3564

self._downloader._sort_thumbnails(original_thumbnails)

3565

3566

category = get_first(microformats, 'category') or search_meta('genre')

3567

channel_id = str_or_none(

3568

get_first(video_details, 'channelId')

3569

or get_first(microformats, 'externalChannelId')

3570

or search_meta('channelId'))

3571

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3572

3573

live_content = get_first(video_details, 'isLiveContent')

3574

is_upcoming = get_first(video_details, 'isUpcoming')

3575

if is_live is None:

3576

if is_upcoming or live_content is False:

3577

is_live = False

3578

if is_upcoming is None and (live_content or is_live):

3579

is_upcoming = False

3580

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3581

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3582

if not duration and live_end_time and live_start_time:

3583

duration = live_end_time - live_start_time

3584

3585

if is_live and self.get_param('live_from_start'):

3586

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3587

3588

formats.extend(self._extract_storyboard(player_responses, duration))

3589

3590

# source_preference is lower for throttled/potentially damaged formats

3591

self._sort_formats(formats, (

3592

'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3597

'formats': formats,

3598

'thumbnails': thumbnails,

3599

# The best thumbnail that we are sure exists. Prevents unnecessary

3600

# URL checking if user don't care about getting the best possible thumbnail

3601

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3602

'description': video_description,

3603

'uploader': get_first(video_details, 'author'),

3604

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3605

'uploader_url': owner_profile_url,

3606

'channel_id': channel_id,

3607

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3608

'duration': duration,

3609

'view_count': int_or_none(

3610

get_first((video_details, microformats), (..., 'viewCount'))

3611

or search_meta('interactionCount')),

3612

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3613

'age_limit': 18 if (

3614

get_first(microformats, 'isFamilySafe') is False

3615

or search_meta('isFamilyFriendly') == 'false'

3616

or search_meta('og:restrictions:age') == '18+') else 0,

3617

'webpage_url': webpage_url,

3618

'categories': [category] if category else None,

3619

'tags': keywords,

3620

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3621

'is_live': is_live,

3622

'was_live': (False if is_live or is_upcoming or live_content is False

3623

else None if is_live is None or is_upcoming is None

3624

else live_content),

3625

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3626

'release_timestamp': live_start_time,

3627

}

3628

3629

if get_first(video_details, 'isPostLiveDvr'):

3630

self.write_debug('Video is in Post-Live Manifestless mode')

3631

info['live_status'] = 'post_live'

3632

if (duration or 0) > 4 * 3600:

3633

self.report_warning(

3634

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3635

'This is a known issue and patches are welcome')

3636

3637

subtitles = {}

3638

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3639

if pctr:

3640

def get_lang_code(track):

3641

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3642

or track.get('languageCode'))

3643

3644

# Converted into dicts to remove duplicates

3645

captions = {

3646

get_lang_code(sub): sub

3647

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3648

translation_languages = {

3649

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3650

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3651

3652

def process_language(container, base_url, lang_code, sub_name, query):

3653

lang_subs = container.setdefault(lang_code, [])

3654

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

# NB: Constructing the full subtitle dictionary is slow

3665

get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (

3666

self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))

3667

for lang_code, caption_track in captions.items():

3668

base_url = caption_track.get('baseUrl')

3669

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3670

if not base_url:

3671

continue

3672

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3673

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3678

if not caption_track.get('isTranslatable'):

3679

continue

3680

for trans_code, trans_name in translation_languages.items():

3681

if not trans_code:

3682

continue

3683

orig_trans_code = trans_code

3684

if caption_track.get('kind') != 'asr':

3685

if not get_translated_subs:

3686

continue

3687

trans_code += f'-{lang_code}'

3688

trans_name += format_field(lang_name, None, ' from %s')

3689

# Add an "-orig" label to the original language so that it can be distinguished.

3690

# The subs are returned without "-orig" as well for compatibility

3691

if lang_code == f'a-{orig_trans_code}':

3692

process_language(

3693

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3694

# Setting tlang=lang returns damaged subtitles.

3695

process_language(automatic_captions, base_url, trans_code, trans_name,

3696

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3697

3698

info['automatic_captions'] = automatic_captions

3699

info['subtitles'] = subtitles

3700

3701

parsed_url = urllib.parse.urlparse(url)

3702

for component in [parsed_url.fragment, parsed_url.query]:

3703

query = urllib.parse.parse_qs(component)

3704

for k, v in query.items():

3705

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3706

d_k += '_time'

3707

if d_k not in info and k in s_ks:

3708

info[d_k] = parse_duration(query[k][0])

3709

3710

# Youtube Music Auto-generated description

3711

if video_description:

3712

mobj = re.search(

3713

r'''(?xs)

3714

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3715

(?P<album>[^\n]+)

3716

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3717

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3718

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3719

.+\nAuto-generated\ by\ YouTube\.\s*$

3720

''', video_description)

3721

if mobj:

3722

release_year = mobj.group('release_year')

3723

release_date = mobj.group('release_date')

3724

if release_date:

3725

release_date = release_date.replace('-', '')

3726

if not release_year:

3727

release_year = release_date[:4]

3728

info.update({

3729

'album': mobj.group('album'.strip()),

3730

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3731

'track': mobj.group('track').strip(),

3732

'release_date': release_date,

3733

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

3739

if not initial_data:

3740

query = {'videoId': video_id}

3741

query.update(self._get_checkok_params())

3742

initial_data = self._extract_response(

3743

item_id=video_id, ep='next', fatal=False,

3744

ytcfg=master_ytcfg, query=query,

3745

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3746

note='Downloading initial data API JSON')

3747

3748

info['comment_count'] = traverse_obj(initial_data, (

3749

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

3750

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

3751

), (

3752

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

3753

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

3754

), expected_type=int_or_none, get_all=False)

3755

3756

try: # This will error if there is no livechat

3757

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3758

except (KeyError, IndexError, TypeError):

3759

pass

3760

else:

3761

info.setdefault('subtitles', {})['live_chat'] = [{

3762

# url is needed to set cookies

3763

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

3764

'video_id': video_id,

3765

'ext': 'json',

3766

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3772

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3773

or self._extract_chapters_from_description(video_description, duration)

3774

or None)

3775

3776

contents = traverse_obj(

3777

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3778

expected_type=list, default=[])

3779

3780

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3781

if vpir:

3782

stl = vpir.get('superTitleLink')

3783

if stl:

3784

stl = self._get_text(stl)

3785

if try_get(

3786

vpir,

3787

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3788

info['location'] = stl

3789

else:

3790

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3791

if mobj:

3792

info.update({

3793

'series': mobj.group(1),

3794

'season_number': int(mobj.group(2)),

3795

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3800

list) or []):

3801

tbr = tlb.get('toggleButtonRenderer') or {}

3802

for getter, regex in [(

3803

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3804

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3805

lambda x: x['accessibility'],

3806

lambda x: x['accessibilityData']['accessibilityData'],

3807

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3808

label = (try_get(tbr, getter, dict) or {}).get('label')

3809

if label:

3810

mobj = re.match(regex, label)

3811

if mobj:

3812

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3813

break

3814

sbr_tooltip = try_get(

3815

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3816

if sbr_tooltip:

3817

like_count, dislike_count = sbr_tooltip.split(' / ')

3818

info.update({

3819

'like_count': str_to_int(like_count),

3820

'dislike_count': str_to_int(dislike_count),

3821

})

3822

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3823

if vsir:

3824

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3825

info.update({

3826

'channel': self._get_text(vor, 'title'),

3827

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3832

list) or []

3833

multiple_songs = False

3834

for row in rows:

3835

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3836

multiple_songs = True

3837

break

3838

for row in rows:

3839

mrr = row.get('metadataRowRenderer') or {}

3840

mrr_title = mrr.get('title')

3841

if not mrr_title:

3842

continue

3843

mrr_title = self._get_text(mrr, 'title')

3844

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3845

if mrr_title == 'License':

3846

info['license'] = mrr_contents_text

3847

elif not multiple_songs:

3848

if mrr_title == 'Album':

3849

info['album'] = mrr_contents_text

3850

elif mrr_title == 'Artist':

3851

info['artist'] = mrr_contents_text

3852

elif mrr_title == 'Song':

3853

info['track'] = mrr_contents_text

3854

3855

fallbacks = {

3856

'channel': 'uploader',

3857

'channel_id': 'uploader_id',

3858

'channel_url': 'uploader_url',

3859

}

3860

3861

# The upload date for scheduled, live and past live streams / premieres in microformats

3862

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3863

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3864

upload_date = (

3865

unified_strdate(get_first(microformats, 'uploadDate'))

3866

or unified_strdate(search_meta('uploadDate')))

3867

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3868

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3869

info['upload_date'] = upload_date

3870

3871

for to, frm in fallbacks.items():

3872

if not info.get(to):

3873

info[to] = info.get(frm)

3874

3875

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3881

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3882

is_membersonly = None

3883

is_premium = None

3884

if initial_data and is_private is not None:

3885

is_membersonly = False

3886

is_premium = False

3887

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3888

badge_labels = set()

3889

for content in contents:

3890

if not isinstance(content, dict):

3891

continue

3892

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3893

for badge_label in badge_labels:

3894

if badge_label.lower() == 'members only':

3895

is_membersonly = True

3896

elif badge_label.lower() == 'premium':

3897

is_premium = True

3898

elif badge_label.lower() == 'unlisted':

3899

is_unlisted = True

3900

3901

info['availability'] = self._availability(

3902

is_private=is_private,

3903

needs_premium=is_premium,

3904

needs_subscription=is_membersonly,

3905

needs_auth=info['age_limit'] >= 18,

3906

is_unlisted=None if is_private is None else is_unlisted)

3907

3908

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3909

3910

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3916

3917

@staticmethod

3918

def passthrough_smuggled_data(func):

3919

def _smuggle(entries, smuggled_data):

3920

for entry in entries:

3921

# TODO: Convert URL to music.youtube instead.

3922

# Do we need to passthrough any other smuggled_data?

3923

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3924

yield entry

3925

3926

@functools.wraps(func)

3927

def wrapper(self, url):

3928

url, smuggled_data = unsmuggle_url(url, {})

3929

if self.is_music_url(url):

3930

smuggled_data['is_music_url'] = True

3931

info_dict = func(self, url, smuggled_data)

3932

if smuggled_data and info_dict.get('entries'):

3933

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3938

channel_id = self._html_search_meta(

3939

'channelId', webpage, 'channel id', default=None)

3940

if channel_id:

3941

return channel_id

3942

channel_url = self._html_search_meta(

3943

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3944

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3945

'twitter:app:url:googleplay'), webpage, 'channel url')

3946

return self._search_regex(

3947

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3948

channel_url, 'channel id')

3949

3950

@staticmethod

3951

def _extract_basic_item_renderer(item):

3952

# Modified from _extract_grid_item_renderer

3953

known_basic_renderers = (

3954

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3955

)

3956

for key, renderer in item.items():

3957

if not isinstance(renderer, dict):

3958

continue

3959

elif key in known_basic_renderers:

3960

return renderer

3961

elif key.startswith('grid') and key.endswith('Renderer'):

3962

return renderer

3963

3964

def _grid_entries(self, grid_renderer):

3965

for item in grid_renderer['items']:

3966

if not isinstance(item, dict):

3967

continue

3968

renderer = self._extract_basic_item_renderer(item)

3969

if not isinstance(renderer, dict):

3970

continue

3971

title = self._get_text(renderer, 'title')

3972

3973

# playlist

3974

playlist_id = renderer.get('playlistId')

3975

if playlist_id:

3976

yield self.url_result(

3977

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3978

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3983

if video_id:

3984

yield self._extract_video(renderer)

3985

continue

3986

# channel

3987

channel_id = renderer.get('channelId')

3988

if channel_id:

3989

yield self.url_result(

3990

'https://www.youtube.com/channel/%s' % channel_id,

3991

ie=YoutubeTabIE.ie_key(), video_title=title)

3992

continue

3993

# generic endpoint URL support

3994

ep_url = urljoin('https://www.youtube.com/', try_get(

3995

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3996

str))

3997

if ep_url:

3998

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3999

if ie.suitable(ep_url):

4000

yield self.url_result(

4001

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

4002

break

4003

4004

def _music_reponsive_list_entry(self, renderer):

4005

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

4006

if video_id:

4007

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

4008

ie=YoutubeIE.ie_key(), video_id=video_id)

4009

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

4010

if playlist_id:

4011

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

4012

if video_id:

4013

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

4014

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4015

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

4016

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4017

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

4018

if browse_id:

4019

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

4020

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

4021

4022

def _shelf_entries_from_content(self, shelf_renderer):

4023

content = shelf_renderer.get('content')

4024

if not isinstance(content, dict):

4025

return

4026

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

4027

if renderer:

4028

# TODO: add support for nested playlists so each shelf is processed

4029

# as separate playlist

4030

# TODO: this includes only first N items

4031

yield from self._grid_entries(renderer)

4032

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

4038

ep = try_get(

4039

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4040

str)

4041

shelf_url = urljoin('https://www.youtube.com', ep)

4042

if shelf_url:

4043

# Skipping links to another channels, note that checking for

4044

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

4045

# will not work

4046

if skip_channels and '/channels?' in shelf_url:

4047

return

4048

title = self._get_text(shelf_renderer, 'title')

4049

yield self.url_result(shelf_url, video_title=title)

4050

# Shelf may not contain shelf URL, fallback to extraction from content

4051

yield from self._shelf_entries_from_content(shelf_renderer)

4052

4053

def _playlist_entries(self, video_list_renderer):

4054

for content in video_list_renderer['contents']:

4055

if not isinstance(content, dict):

4056

continue

4057

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4058

if not isinstance(renderer, dict):

4059

continue

4060

video_id = renderer.get('videoId')

4061

if not video_id:

4062

continue

4063

yield self._extract_video(renderer)

4064

4065

def _rich_entries(self, rich_grid_renderer):

4066

renderer = try_get(

4067

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4068

video_id = renderer.get('videoId')

4069

if not video_id:

4070

return

4071

yield self._extract_video(renderer)

4072

4073

def _video_entry(self, video_renderer):

4074

video_id = video_renderer.get('videoId')

4075

if video_id:

4076

return self._extract_video(video_renderer)

4077

4078

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4079

url = urljoin('https://youtube.com', traverse_obj(

4080

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4081

if url:

4082

return self.url_result(

4083

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4084

4085

def _post_thread_entries(self, post_thread_renderer):

4086

post_renderer = try_get(

4087

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4088

if not post_renderer:

4089

return

4090

# video attachment

4091

video_renderer = try_get(

4092

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4093

video_id = video_renderer.get('videoId')

4094

if video_id:

4095

entry = self._extract_video(video_renderer)

4096

if entry:

4097

yield entry

4098

# playlist attachment

4099

playlist_id = try_get(

4100

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4101

if playlist_id:

4102

yield self.url_result(

4103

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4104

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4105

# inline video links

4106

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4107

for run in runs:

4108

if not isinstance(run, dict):

4109

continue

4110

ep_url = try_get(

4111

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4112

if not ep_url:

4113

continue

4114

if not YoutubeIE.suitable(ep_url):

4115

continue

4116

ep_video_id = YoutubeIE._match_id(ep_url)

4117

if video_id == ep_video_id:

4118

continue

4119

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4120

4121

def _post_thread_continuation_entries(self, post_thread_continuation):

4122

contents = post_thread_continuation.get('contents')

4123

if not isinstance(contents, list):

4124

return

4125

for content in contents:

4126

renderer = content.get('backstagePostThreadRenderer')

4127

if isinstance(renderer, dict):

4128

yield from self._post_thread_entries(renderer)

4129

continue

4130

renderer = content.get('videoRenderer')

4131

if isinstance(renderer, dict):

4132

yield self._video_entry(renderer)

4133

4134

r''' # unused

4135

def _rich_grid_entries(self, contents):

4136

for content in contents:

4137

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4138

if video_renderer:

4139

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4145

# continuation_list is modified in-place with continuation_list = [continuation_token]

4146

continuation_list[:] = [None]

4147

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4148

for content in contents:

4149

if not isinstance(content, dict):

4150

continue

4151

is_renderer = traverse_obj(

4152

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4153

expected_type=dict)

4154

if not is_renderer:

4155

renderer = content.get('richItemRenderer')

4156

if renderer:

4157

for entry in self._rich_entries(renderer):

4158

yield entry

4159

continuation_list[0] = self._extract_continuation(parent_renderer)

4160

continue

4161

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4162

for isr_content in isr_contents:

4163

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4168

'gridRenderer': self._grid_entries,

4169

'reelShelfRenderer': self._grid_entries,

4170

'shelfRenderer': self._shelf_entries,

4171

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4172

'backstagePostThreadRenderer': self._post_thread_entries,

4173

'videoRenderer': lambda x: [self._video_entry(x)],

4174

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4175

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4176

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4177

}

4178

for key, renderer in isr_content.items():

4179

if key not in known_renderers:

4180

continue

4181

for entry in known_renderers[key](renderer):

4182

if entry:

4183

yield entry

4184

continuation_list[0] = self._extract_continuation(renderer)

4185

break

4186

4187

if not continuation_list[0]:

4188

continuation_list[0] = self._extract_continuation(is_renderer)

4189

4190

if not continuation_list[0]:

4191

continuation_list[0] = self._extract_continuation(parent_renderer)

4192

4193

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4194

continuation_list = [None]

4195

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4196

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4201

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4202

yield from extract_entries(parent_renderer)

4203

continuation = continuation_list[0]

4204

4205

for page_num in itertools.count(1):

4206

if not continuation:

4207

break

4208

headers = self.generate_api_headers(

4209

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4210

response = self._extract_response(

4211

item_id=f'{item_id} page {page_num}',

4212

query=continuation, headers=headers, ytcfg=ytcfg,

4213

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4218

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4219

visitor_data = self._extract_visitor_data(response) or visitor_data

4220

4221

known_continuation_renderers = {

4222

'playlistVideoListContinuation': self._playlist_entries,

4223

'gridContinuation': self._grid_entries,

4224

'itemSectionContinuation': self._post_thread_continuation_entries,

4225

'sectionListContinuation': extract_entries, # for feeds

4226

}

4227

continuation_contents = try_get(

4228

response, lambda x: x['continuationContents'], dict) or {}

4229

continuation_renderer = None

4230

for key, value in continuation_contents.items():

4231

if key not in known_continuation_renderers:

4232

continue

4233

continuation_renderer = value

4234

continuation_list = [None]

4235

yield from known_continuation_renderers[key](continuation_renderer)

4236

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4237

break

4238

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4243

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4244

'gridVideoRenderer': (self._grid_entries, 'items'),

4245

'gridChannelRenderer': (self._grid_entries, 'items'),

4246

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4247

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4248

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4249

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4250

}

4251

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4252

continuation_items = try_get(

4253

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4254

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4255

video_items_renderer = None

4256

for key, value in continuation_item.items():

4257

if key not in known_renderers:

4258

continue

4259

video_items_renderer = {known_renderers[key][1]: continuation_items}

4260

continuation_list = [None]

4261

yield from known_renderers[key][0](video_items_renderer)

4262

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4263

break

4264

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4270

for tab in tabs:

4271

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4272

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4277

4278

def _extract_uploader(self, data):

4279

uploader = {}

4280

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4281

owner = try_get(

4282

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4283

if owner:

4284

owner_text = owner.get('text')

4285

uploader['uploader'] = self._search_regex(

4286

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4287

uploader['uploader_id'] = try_get(

4288

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4289

uploader['uploader_url'] = urljoin(

4290

'https://www.youtube.com/',

4291

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4292

return {k: v for k, v in uploader.items() if v is not None}

4293

4294

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4295

playlist_id = title = description = channel_url = channel_name = channel_id = None

4296

tags = []

4297

4298

selected_tab = self._extract_selected_tab(tabs)

4299

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4300

renderer = try_get(

4301

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4302

if renderer:

4303

channel_name = renderer.get('title')

4304

channel_url = renderer.get('channelUrl')

4305

channel_id = renderer.get('externalId')

4306

else:

4307

renderer = try_get(

4308

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4309

4310

if renderer:

4311

title = renderer.get('title')

4312

description = renderer.get('description', '')

4313

playlist_id = channel_id

4314

tags = renderer.get('keywords', '').split()

4315

4316

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4317

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4318

def _get_uncropped(url):

4319

return url_or_none((url or '').split('=')[0] + '=s0')

4320

4321

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4322

if avatar_thumbnails:

4323

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4324

if uncropped_avatar:

4325

avatar_thumbnails.append({

4326

'url': uncropped_avatar,

4327

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4332

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4333

for banner in channel_banners:

4334

banner['preference'] = -10

4335

4336

if channel_banners:

4337

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4338

if uncropped_banner:

4339

channel_banners.append({

4340

'url': uncropped_banner,

4341

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4346

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4347

4348

if playlist_id is None:

4349

playlist_id = item_id

4350

4351

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4352

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4353

if title is None:

4354

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4355

title += format_field(selected_tab, 'title', ' - %s')

4356

title += format_field(selected_tab, 'expandedText', ' - %s')

4357

4358

metadata = {

4359

'playlist_id': playlist_id,

4360

'playlist_title': title,

4361

'playlist_description': description,

4362

'uploader': channel_name,

4363

'uploader_id': channel_id,

4364

'uploader_url': channel_url,

4365

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4366

'tags': tags,

4367

'view_count': self._get_count(playlist_stats, 1),

4368

'availability': self._extract_availability(data),

4369

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4370

'playlist_count': self._get_count(playlist_stats, 0),

4371

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4372

}

4373

if not channel_id:

4374

metadata.update(self._extract_uploader(data))

4375

metadata.update({

4376

'channel': metadata['uploader'],

4377

'channel_id': metadata['uploader_id'],

4378

'channel_url': metadata['uploader_url']})

4379

return self.playlist_result(

4380

self._entries(

4381

selected_tab, playlist_id, ytcfg,

4382

self._extract_account_syncid(ytcfg, data),

4383

self._extract_visitor_data(data, ytcfg)),

4384

**metadata)

4385

4386

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4387

first_id = last_id = response = None

4388

for page_num in itertools.count(1):

4389

videos = list(self._playlist_entries(playlist))

4390

if not videos:

4391

return

4392

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4393

if start >= len(videos):

4394

return

4395

yield from videos[start:]

4396

first_id = first_id or videos[0]['id']

4397

last_id = videos[-1]['id']

4398

watch_endpoint = try_get(

4399

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4400

headers = self.generate_api_headers(

4401

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4402

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4403

query = {

4404

'playlistId': playlist_id,

4405

'videoId': watch_endpoint.get('videoId') or last_id,

4406

'index': watch_endpoint.get('index') or len(videos),

4407

'params': watch_endpoint.get('params') or 'OAE%3D'

4408

}

4409

response = self._extract_response(

4410

item_id='%s page %d' % (playlist_id, page_num),

4411

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4412

check_get_keys='contents'

4413

)

4414

playlist = try_get(

4415

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4416

4417

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4418

title = playlist.get('title') or try_get(

4419

data, lambda x: x['titleText']['simpleText'], str)

4420

playlist_id = playlist.get('playlistId') or item_id

4421

4422

# Delegating everything except mix playlists to regular tab-based playlist URL

4423

playlist_url = urljoin(url, try_get(

4424

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4425

str))

4426

4427

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4428

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4429

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4430

4431

if playlist_url and playlist_url != url and not is_known_unviewable:

4432

return self.url_result(

4433

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4434

video_title=title)

4435

4436

return self.playlist_result(

4437

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4438

playlist_id=playlist_id, playlist_title=title)

4439

4440

def _extract_availability(self, data):

4441

"""

4442

Gets the availability of a given playlist/tab.

4443

Note: Unless YouTube tells us explicitly, we do not assume it is public

4444

@param data: response

4445

"""

4446

is_private = is_unlisted = None

4447

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4448

badge_labels = self._extract_badges(renderer)

4449

4450

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4451

privacy_dropdown_entries = try_get(

4452

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4453

for renderer_dict in privacy_dropdown_entries:

4454

is_selected = try_get(

4455

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4456

if not is_selected:

4457

continue

4458

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4459

if label:

4460

badge_labels.add(label.lower())

4461

break

4462

4463

for badge_label in badge_labels:

4464

if badge_label == 'unlisted':

4465

is_unlisted = True

4466

elif badge_label == 'private':

4467

is_private = True

4468

elif badge_label == 'public':

4469

is_unlisted = is_private = False

4470

return self._availability(is_private, False, False, False, is_unlisted)

4471

4472

@staticmethod

4473

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4474

sidebar_renderer = try_get(

4475

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4476

for item in sidebar_renderer:

4477

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4482

"""

4483

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4484

"""

4485

browse_id = params = None

4486

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4487

if not renderer:

4488

return

4489

menu_renderer = try_get(

4490

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4491

for menu_item in menu_renderer:

4492

if not isinstance(menu_item, dict):

4493

continue

4494

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4495

text = try_get(

4496

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4497

if not text or text.lower() != 'show unavailable videos':

4498

continue

4499

browse_endpoint = try_get(

4500

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4501

browse_id = browse_endpoint.get('browseId')

4502

params = browse_endpoint.get('params')

4503

break

4504

4505

headers = self.generate_api_headers(

4506

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4507

visitor_data=self._extract_visitor_data(data, ytcfg))

4508

query = {

4509

'params': params or 'wgYCCAA=',

4510

'browseId': browse_id or 'VL%s' % item_id

4511

}

4512

return self._extract_response(

4513

item_id=item_id, headers=headers, query=query,

4514

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4515

note='Downloading API JSON with unavailable videos')

4516

4517

@functools.cached_property

4518

def skip_webpage(self):

4519

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4520

4521

def _extract_webpage(self, url, item_id, fatal=True):

4522

webpage, data = None, None

4523

for retry in self.RetryManager(fatal=fatal):

4524

try:

4525

webpage = self._download_webpage(url, item_id, note='Downloading webpage')

4526

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4527

except ExtractorError as e:

4528

if isinstance(e.cause, network_exceptions):

4529

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4530

retry.error = e

4531

continue

4532

self._error_or_warning(e, fatal=fatal)

break

try:

self._extract_and_report_alerts(data)

4537

except ExtractorError as e:

4538

self._error_or_warning(e, fatal=fatal)

4539

break

4540

4541

# Sometimes youtube returns a webpage with incomplete ytInitialData

4542

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4543

if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):

4544

retry.error = ExtractorError('Incomplete yt initial data received')

continue

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4550

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4551

if not ytcfg and self.is_authenticated:

4552

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4553

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4554

raise ExtractorError(

4555

f'{msg}. If you are not downloading private content, or '

4556

'your cookies are only for the first account and channel,'

4557

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4558

expected=True)

4559

self.report_warning(msg, only_once=True)

4560

4561

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4562

data = None

4563

if not self.skip_webpage:

4564

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4565

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4566

# Reject webpage data if redirected to home page without explicitly requesting

4567

selected_tab = self._extract_selected_tab(traverse_obj(

4568

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4569

if (url != 'https://www.youtube.com/feed/recommended'

4570

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4571

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4572

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4573

if fatal:

4574

raise ExtractorError(msg, expected=True)

4575

self.report_warning(msg, only_once=True)

4576

if not data:

4577

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4578

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4579

return data, ytcfg

4580

4581

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4582

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4583

resolve_response = self._extract_response(

4584

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4585

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4586

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4587

for ep_key, ep in endpoints.items():

4588

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4589

if params:

4590

return self._extract_response(

4591

item_id=item_id, query=params, ep=ep, headers=headers,

4592

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4593

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4594

err_note = 'Failed to resolve url (does the playlist exist?)'

4595

if fatal:

4596

raise ExtractorError(err_note, expected=True)

4597

self.report_warning(err_note, item_id)

4598

4599

_SEARCH_PARAMS = None

4600

4601

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4602

data = {'query': query}

4603

if params is NO_DEFAULT:

4604

params = self._SEARCH_PARAMS

4605

if params:

4606

data['params'] = params

4607

4608

content_keys = (

4609

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4610

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4611

# ytmusic search

4612

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4613

('continuationContents', ),

4614

)

4615

display_id = f'query "{query}"'

4616

check_get_keys = tuple({keys[0] for keys in content_keys})

4617

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4618

self._report_playlist_authcheck(ytcfg, fatal=False)

4619

4620

continuation_list = [None]

4621

search = None

4622

for page_num in itertools.count(1):

4623

data.update(continuation_list[0] or {})

4624

headers = self.generate_api_headers(

4625

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4626

search = self._extract_response(

4627

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4628

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4629

slr_contents = traverse_obj(search, *content_keys)

4630

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4631

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4636

IE_DESC = 'YouTube Tabs'

4637

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4646

(?P<not_channel>

4647

feed/|hashtag/|

4648

(?:playlist|watch)\?.*?\blist=

4649

)|

4650

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4655

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4656

}

4657

IE_NAME = 'youtube:tab'

4658

4659

_TESTS = [{

4660

'note': 'playlists, multipage',

4661

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4662

'playlist_mincount': 94,

4663

'info_dict': {

4664

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4665

'title': 'Igor Kleiner - Playlists',

4666

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4667

'uploader': 'Igor Kleiner',

4668

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4669

'channel': 'Igor Kleiner',

4670

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4671

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4672

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4673

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4674

'channel_follower_count': int

4675

},

4676

}, {

4677

'note': 'playlists, multipage, different order',

4678

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4679

'playlist_mincount': 94,

4680

'info_dict': {

4681

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4682

'title': 'Igor Kleiner - Playlists',

4683

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4684

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4685

'uploader': 'Igor Kleiner',

4686

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4687

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4688

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4689

'channel': 'Igor Kleiner',

4690

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4691

'channel_follower_count': int

4692

},

4693

}, {

4694

'note': 'playlists, series',

4695

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4696

'playlist_mincount': 5,

4697

'info_dict': {

4698

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4699

'title': '3Blue1Brown - Playlists',

4700

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4701

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4702

'uploader': '3Blue1Brown',

4703

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4704

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4705

'channel': '3Blue1Brown',

4706

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4707

'tags': ['Mathematics'],

4708

'channel_follower_count': int

4709

},

4710

}, {

4711

'note': 'playlists, singlepage',

4712

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4713

'playlist_mincount': 4,

4714

'info_dict': {

4715

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4716

'title': 'ThirstForScience - Playlists',

4717

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4718

'uploader': 'ThirstForScience',

4719

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4720

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4721

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4722

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4723

'tags': 'count:13',

4724

'channel': 'ThirstForScience',

4725

'channel_follower_count': int

4726

}

4727

}, {

4728

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4729

'only_matching': True,

4730

}, {

4731

'note': 'basic, single video playlist',

4732

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4733

'info_dict': {

4734

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4735

'uploader': 'Sergey M.',

4736

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4737

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4742

'channel': 'Sergey M.',

4743

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4744

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4745

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4750

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4751

'info_dict': {

4752

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4753

'uploader': 'Sergey M.',

4754

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4755

'title': 'youtube-dl empty playlist',

4756

'tags': [],

4757

'channel': 'Sergey M.',

4758

'description': '',

4759

'modified_date': '20160902',

4760

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4761

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4762

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4768

'info_dict': {

4769

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4770

'title': 'lex will - Home',

4771

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4772

'uploader': 'lex will',

4773

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4774

'channel': 'lex will',

4775

'tags': ['bible', 'history', 'prophesy'],

4776

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4777

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4778

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4779

'channel_follower_count': int

4780

},

4781

'playlist_mincount': 2,

4782

}, {

4783

'note': 'Videos tab',

4784

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4785

'info_dict': {

4786

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4787

'title': 'lex will - Videos',

4788

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4789

'uploader': 'lex will',

4790

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4791

'tags': ['bible', 'history', 'prophesy'],

4792

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4793

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4794

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4795

'channel': 'lex will',

4796

'channel_follower_count': int

4797

},

4798

'playlist_mincount': 975,

4799

}, {

4800

'note': 'Videos tab, sorted by popular',

4801

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4802

'info_dict': {

4803

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4804

'title': 'lex will - Videos',

4805

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4806

'uploader': 'lex will',

4807

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4808

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4809

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4810

'channel': 'lex will',

4811

'tags': ['bible', 'history', 'prophesy'],

4812

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4813

'channel_follower_count': int

4814

},

4815

'playlist_mincount': 199,

4816

}, {

4817

'note': 'Playlists tab',

4818

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4819

'info_dict': {

4820

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4821

'title': 'lex will - Playlists',

4822

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4823

'uploader': 'lex will',

4824

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4825

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4826

'channel': 'lex will',

4827

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4828

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4829

'tags': ['bible', 'history', 'prophesy'],

4830

'channel_follower_count': int

4831

},

4832

'playlist_mincount': 17,

4833

}, {

4834

'note': 'Community tab',

4835

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4836

'info_dict': {

4837

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4838

'title': 'lex will - Community',

4839

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4840

'uploader': 'lex will',

4841

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4842

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4843

'channel': 'lex will',

4844

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4845

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4846

'tags': ['bible', 'history', 'prophesy'],

4847

'channel_follower_count': int

4848

},

4849

'playlist_mincount': 18,

4850

}, {

4851

'note': 'Channels tab',

4852

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4853

'info_dict': {

4854

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4855

'title': 'lex will - Channels',

4856

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4857

'uploader': 'lex will',

4858

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4859

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4860

'channel': 'lex will',

4861

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4862

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4863

'tags': ['bible', 'history', 'prophesy'],

4864

'channel_follower_count': int

4865

},

4866

'playlist_mincount': 12,

4867

}, {

4868

'note': 'Search tab',

4869

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4870

'playlist_mincount': 40,

4871

'info_dict': {

4872

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4873

'title': '3Blue1Brown - Search - linear algebra',

4874

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4875

'uploader': '3Blue1Brown',

4876

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4877

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4878

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4879

'tags': ['Mathematics'],

4880

'channel': '3Blue1Brown',

4881

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4882

'channel_follower_count': int

4883

},

4884

}, {

4885

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4886

'only_matching': True,

4887

}, {

4888

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4889

'only_matching': True,

4890

}, {

4891

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4892

'only_matching': True,

4893

}, {

4894

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4895

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4896

'info_dict': {

4897

'title': '29C3: Not my department',

4898

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4899

'uploader': 'Christiaan008',

4900

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4901

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4902

'tags': [],

4903

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4904

'view_count': int,

4905

'modified_date': '20150605',

4906

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4907

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4908

'channel': 'Christiaan008',

4909

},

4910

'playlist_count': 96,

4911

}, {

4912

'note': 'Large playlist',

4913

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4914

'info_dict': {

4915

'title': 'Uploads from Cauchemar',

4916

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4917

'uploader': 'Cauchemar',

4918

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4919

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4920

'tags': [],

4921

'modified_date': r're:\d{8}',

4922

'channel': 'Cauchemar',

4923

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4924

'view_count': int,

4925

'description': '',

4926

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4927

},

4928

'playlist_mincount': 1123,

4929

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4930

}, {

4931

'note': 'even larger playlist, 8832 videos',

4932

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4933

'only_matching': True,

4934

}, {

4935

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4936

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4937

'info_dict': {

4938

'title': 'Uploads from Interstellar Movie',

4939

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4940

'uploader': 'Interstellar Movie',

4941

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4942

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4943

'tags': [],

4944

'view_count': int,

4945

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4946

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4947

'channel': 'Interstellar Movie',

4948

'description': '',

4949

'modified_date': r're:\d{8}',

4950

},

4951

'playlist_mincount': 21,

4952

}, {

4953

'note': 'Playlist with "show unavailable videos" button',

4954

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4955

'info_dict': {

4956

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4957

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4958

'uploader': 'Phim Siêu Nhân Nhật Bản',

4959

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4960

'view_count': int,

4961

'channel': 'Phim Siêu Nhân Nhật Bản',

4962

'tags': [],

4963

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4964

'description': '',

4965

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4966

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4967

'modified_date': r're:\d{8}',

4968

},

4969

'playlist_mincount': 200,

4970

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4971

}, {

4972

'note': 'Playlist with unavailable videos in page 7',

4973

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4974

'info_dict': {

4975

'title': 'Uploads from BlankTV',

4976

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4977

'uploader': 'BlankTV',

4978

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4979

'channel': 'BlankTV',

4980

'channel_url': 'https://www.youtube.com/c/blanktv',

4981

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4982

'view_count': int,

4983

'tags': [],

4984

'uploader_url': 'https://www.youtube.com/c/blanktv',

4985

'modified_date': r're:\d{8}',

4986

'description': '',

4987

},

4988

'playlist_mincount': 1000,

4989

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4990

}, {

4991

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4992

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4993

'info_dict': {

4994

'title': 'Data Analysis with Dr Mike Pound',

4995

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4996

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4997

'uploader': 'Computerphile',

4998

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4999

'uploader_url': 'https://www.youtube.com/user/Computerphile',

5000

'tags': [],

5001

'view_count': int,

5002

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

5003

'channel_url': 'https://www.youtube.com/user/Computerphile',

5004

'channel': 'Computerphile',

5005

},

5006

'playlist_mincount': 11,

5007

}, {

5008

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

5009

'only_matching': True,

5010

}, {

5011

'note': 'Playlist URL that does not actually serve a playlist',

5012

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

5017

'uploader': 'STREEM',

5018

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

5019

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

5020

'upload_date': '20150526',

5021

'license': 'Standard YouTube License',

5022

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

5023

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

5030

},

5031

'skip': 'This video is not available.',

5032

'add_ie': [YoutubeIE.ie_key()],

5033

}, {

5034

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

5035

'only_matching': True,

5036

}, {

5037

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5038

'only_matching': True,

5039

}, {

5040

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5041

'info_dict': {

5042

'id': 'Wq15eF5vCbI', # This will keep changing

5043

'ext': 'mp4',

5044

'title': str,

5045

'uploader': 'Sky News',

5046

'uploader_id': 'skynews',

5047

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5048

'upload_date': r're:\d{8}',

5049

'description': str,

5050

'categories': ['News & Politics'],

5051

'tags': list,

5052

'like_count': int,

5053

'release_timestamp': 1642502819,

5054

'channel': 'Sky News',

5055

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5056

'age_limit': 0,

5057

'view_count': int,

5058

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5059

'playable_in_embed': True,

5060

'release_date': '20220118',

5061

'availability': 'public',

5062

'live_status': 'is_live',

5063

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5064

'channel_follower_count': int

5065

},

5066

'params': {

5067

'skip_download': True,

5068

},

5069

'expected_warnings': ['Ignoring subtitle tracks found in '],

5070

}, {

5071

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5076

'uploader': 'The Young Turks',

5077

'uploader_id': 'TheYoungTurks',

5078

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5079

'upload_date': '20150715',

5080

'license': 'Standard YouTube License',

5081

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5082

'categories': ['News & Politics'],

5083

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5088

},

5089

'only_matching': True,

5090

}, {

5091

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5092

'only_matching': True,

5093

}, {

5094

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5095

'only_matching': True,

5096

}, {

5097

'note': 'A channel that is not live. Should raise error',

5098

'url': 'https://www.youtube.com/user/numberphile/live',

5099

'only_matching': True,

5100

}, {

5101

'url': 'https://www.youtube.com/feed/trending',

5102

'only_matching': True,

5103

}, {

5104

'url': 'https://www.youtube.com/feed/library',

5105

'only_matching': True,

5106

}, {

5107

'url': 'https://www.youtube.com/feed/history',

5108

'only_matching': True,

5109

}, {

5110

'url': 'https://www.youtube.com/feed/subscriptions',

5111

'only_matching': True,

5112

}, {

5113

'url': 'https://www.youtube.com/feed/watch_later',

5114

'only_matching': True,

5115

}, {

5116

'note': 'Recommended - redirects to home page.',

5117

'url': 'https://www.youtube.com/feed/recommended',

5118

'only_matching': True,

5119

}, {

5120

'note': 'inline playlist with not always working continuations',

5121

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5122

'only_matching': True,

5123

}, {

5124

'url': 'https://www.youtube.com/course',

5125

'only_matching': True,

5126

}, {

5127

'url': 'https://www.youtube.com/zsecurity',

5128

'only_matching': True,

5129

}, {

5130

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5131

'only_matching': True,

5132

}, {

5133

'url': 'https://www.youtube.com/TheYoungTurks/live',

5134

'only_matching': True,

5135

}, {

5136

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5143

}, {

5144

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5145

'only_matching': True,

5146

}, {

5147

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5148

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5149

'only_matching': True

5150

}, {

5151

'note': '/browse/ should redirect to /channel/',

5152

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5153

'only_matching': True

5154

}, {

5155

'note': 'VLPL, should redirect to playlist?list=PL...',

5156

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5157

'info_dict': {

5158

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5159

'uploader': 'NoCopyrightSounds',

5160

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5161

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5162

'title': 'NCS : All Releases 💿',

5163

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5164

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5165

'modified_date': r're:\d{8}',

5166

'view_count': int,

5167

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5168

'tags': [],

5169

'channel': 'NoCopyrightSounds',

5170

},

5171

'playlist_mincount': 166,

5172

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5173

}, {

5174

'note': 'Topic, should redirect to playlist?list=UU...',

5175

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5176

'info_dict': {

5177

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5178

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5179

'title': 'Uploads from Royalty Free Music - Topic',

5180

'uploader': 'Royalty Free Music - Topic',

5181

'tags': [],

5182

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5183

'channel': 'Royalty Free Music - Topic',

5184

'view_count': int,

5185

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5186

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5187

'modified_date': r're:\d{8}',

5188

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5189

'description': '',

5190

},

5191

'expected_warnings': [

5192

'The URL does not have a videos tab',

5193

r'[Uu]navailable videos (are|will be) hidden',

5194

],

5195

'playlist_mincount': 101,

5196

}, {

5197

'note': 'Topic without a UU playlist',

5198

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5199

'info_dict': {

5200

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5201

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5202

'tags': [],

5203

},

5204

'expected_warnings': [

5205

'the playlist redirect gave error',

5206

],

5207

'playlist_mincount': 9,

5208

}, {

5209

'note': 'Youtube music Album',

5210

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5211

'info_dict': {

5212

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5213

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5218

'modified_date': r're:\d{8}',

5219

},

5220

'playlist_count': 50,

5221

}, {

5222

'note': 'unlisted single video playlist',

5223

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5224

'info_dict': {

5225

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5226

'uploader': 'colethedj',

5227

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5228

'title': 'yt-dlp unlisted playlist test',

5229

'availability': 'unlisted',

5230

'tags': [],

5231

'modified_date': '20220418',

5232

'channel': 'colethedj',

5233

'view_count': int,

5234

'description': '',

5235

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5236

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5237

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5242

'url': 'https://www.youtube.com/feed/recommended',

5243

'info_dict': {

5244

'id': 'recommended',

5245

'title': 'recommended',

5246

'tags': [],

5247

},

5248

'playlist_mincount': 50,

5249

'params': {

5250

'skip_download': True,

5251

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5252

},

5253

}, {

5254

'note': 'API Fallback: /videos tab, sorted by oldest first',

5255

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5256

'info_dict': {

5257

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5258

'title': 'Cody\'sLab - Videos',

5259

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5260

'uploader': 'Cody\'sLab',

5261

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5262

'channel': 'Cody\'sLab',

5263

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5264

'tags': [],

5265

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5266

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5267

'channel_follower_count': int

5268

},

5269

'playlist_mincount': 650,

5270

'params': {

5271

'skip_download': True,

5272

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5273

},

5274

}, {

5275

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5276

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5277

'info_dict': {

5278

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5279

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5280

'title': 'Uploads from Royalty Free Music - Topic',

5281

'uploader': 'Royalty Free Music - Topic',

5282

'modified_date': r're:\d{8}',

5283

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5284

'description': '',

5285

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5286

'tags': [],

5287

'channel': 'Royalty Free Music - Topic',

5288

'view_count': int,

5289

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5290

},

5291

'expected_warnings': [

5292

'does not have a videos tab',

5293

r'[Uu]navailable videos (are|will be) hidden',

5294

],

5295

'playlist_mincount': 101,

5296

'params': {

5297

'skip_download': True,

5298

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5299

},

5300

}, {

5301

'note': 'non-standard redirect to regional channel',

5302

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5303

'only_matching': True

5304

}, {

5305

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5306

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5307

'info_dict': {

5308

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5309

'modified_date': '20220407',

5310

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5311

'tags': [],

5312

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5313

'uploader': 'pukkandan',

5314

'availability': 'unlisted',

5315

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5316

'channel': 'pukkandan',

5317

'description': 'Test for collaborative playlist',

5318

'title': 'yt-dlp test - collaborative playlist',

5319

'view_count': int,

5320

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5321

},

5322

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5327

return False if YoutubeIE.suitable(url) else super().suitable(url)

5328

5329

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5330

5331

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5332

def _real_extract(self, url, smuggled_data):

5333

item_id = self._match_id(url)

5334

url = urllib.parse.urlunparse(

5335

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5336

compat_opts = self.get_param('compat_opts', [])

5337

5338

def get_mobj(url):

5339

mobj = self._URL_RE.match(url).groupdict()

5340

mobj.update((k, '') for k, v in mobj.items() if v is None)

5341

return mobj

5342

5343

mobj, redirect_warning = get_mobj(url), None

5344

# Youtube returns incomplete data if tabname is not lower case

5345

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5346

if is_channel:

5347

if smuggled_data.get('is_music_url'):

5348

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5349

item_id = item_id[2:]

5350

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5351

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5352

mdata = self._extract_tab_endpoint(

5353

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5354

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5355

get_all=False, expected_type=str)

5356

if not murl:

5357

raise ExtractorError('Failed to resolve album to playlist')

5358

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5359

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5360

pre = f'https://www.youtube.com/channel/{item_id}'

5361

5362

original_tab_name = tab

5363

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5364

# Home URLs should redirect to /videos/

5365

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5366

'To download only the videos in the home page, add a "/featured" to the URL')

5367

tab = '/videos'

5368

5369

url = ''.join((pre, tab, post))

5370

mobj = get_mobj(url)

5371

5372

# Handle both video/playlist URLs

5373

qs = parse_qs(url)

5374

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5375

5376

if not video_id and mobj['not_channel'].startswith('watch'):

5377

if not playlist_id:

5378

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5379

raise ExtractorError('Unable to recognize tab page')

5380

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5381

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5382

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5383

mobj = get_mobj(url)

5384

5385

if video_id and playlist_id:

5386

if self.get_param('noplaylist'):

5387

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5388

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5389

ie=YoutubeIE.ie_key(), video_id=video_id)

5390

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5391

5392

data, ytcfg = self._extract_data(url, item_id)

5393

5394

# YouTube may provide a non-standard redirect to the regional channel

5395

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5396

redirect_url = traverse_obj(

5397

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5398

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5399

redirect_url = ''.join((

5400

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5401

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5402

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5403

5404

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5405

if tabs:

5406

selected_tab = self._extract_selected_tab(tabs)

5407

selected_tab_name = selected_tab.get('title', '').lower()

5408

if selected_tab_name == 'home':

5409

selected_tab_name = 'featured'

5410

requested_tab_name = mobj['tab'][1:]

5411

if 'no-youtube-channel-redirect' not in compat_opts:

5412

if requested_tab_name == 'live': # Live tab should have redirected to the video

5413

raise UserNotLive(video_id=mobj['id'])

5414

if requested_tab_name not in ('', selected_tab_name):

5415

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5416

if not original_tab_name:

5417

if item_id[:2] == 'UC':

5418

# Topic channels don't have /videos. Use the equivalent playlist instead

5419

pl_id = f'UU{item_id[2:]}'

5420

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5421

try:

5422

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5423

except ExtractorError:

5424

redirect_warning += ' and the playlist redirect gave error'

5425

else:

5426

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5427

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5428

if selected_tab_name and selected_tab_name != requested_tab_name:

5429

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5430

else:

5431

raise ExtractorError(redirect_warning, expected=True)

5432

5433

if redirect_warning:

5434

self.to_screen(redirect_warning)

5435

self.write_debug(f'Final URL: {url}')

5436

5437

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5438

if 'no-youtube-unavailable-videos' not in compat_opts:

5439

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5440

self._extract_and_report_alerts(data, only_once=True)

5441

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5442

if tabs:

5443

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5444

5445

playlist = traverse_obj(

5446

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5447

if playlist:

5448

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5449

5450

video_id = traverse_obj(

5451

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5452

if video_id:

5453

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5454

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5455

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5456

ie=YoutubeIE.ie_key(), video_id=video_id)

5457

5458

raise ExtractorError('Unable to recognize tab page')

5459

5460

5461

class YoutubePlaylistIE(InfoExtractor):

5462

IE_DESC = 'YouTube playlists'

5463

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5474

)''' % {

5475

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5476

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5477

}

5478

IE_NAME = 'youtube:playlist'

5479

_TESTS = [{

5480

'note': 'issue #673',

5481

'url': 'PLBB231211A4F62143',

5482

'info_dict': {

5483

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5484

'id': 'PLBB231211A4F62143',

5485

'uploader': 'Wickman',

5486

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5487

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5488

'view_count': int,

5489

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5490

'modified_date': r're:\d{8}',

5491

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5492

'channel': 'Wickman',

5493

'tags': [],

5494

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5495

},

5496

'playlist_mincount': 29,

5497

}, {

5498

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5499

'info_dict': {

5500

'title': 'YDL_safe_search',

5501

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5502

},

5503

'playlist_count': 2,

5504

'skip': 'This playlist is private',

5505

}, {

5506

'note': 'embedded',

5507

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5512

'uploader': 'milan',

5513

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5514

'description': '',

5515

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5516

'tags': [],

5517

'modified_date': '20140919',

5518

'view_count': int,

5519

'channel': 'milan',

5520

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5521

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5522

},

5523

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5524

}, {

5525

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5526

'playlist_mincount': 455,

5527

'info_dict': {

5528

'title': '2018 Chinese New Singles (11/6 updated)',

5529

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5530

'uploader': 'LBK',

5531

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5532

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5533

'channel': 'LBK',

5534

'view_count': int,

5535

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5536

'tags': [],

5537

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5538

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5539

'modified_date': r're:\d{8}',

5540

},

5541

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5542

}, {

5543

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5544

'only_matching': True,

5545

}, {

5546

# music album playlist

5547

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5548

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5553

if YoutubeTabIE.suitable(url):

5554

return False

5555

from ..utils import parse_qs

5556

qs = parse_qs(url)

5557

if qs.get('v', [None])[0]:

5558

return False

5559

return super().suitable(url)

5560

5561

def _real_extract(self, url):

5562

playlist_id = self._match_id(url)

5563

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5564

url = update_url_query(

5565

'https://www.youtube.com/playlist',

5566

parse_qs(url) or {'list': playlist_id})

5567

if is_music_url:

5568

url = smuggle_url(url, {'is_music_url': True})

5569

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5570

5571

5572

class YoutubeYtBeIE(InfoExtractor):

5573

IE_DESC = 'youtu.be'

5574

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5575

_TESTS = [{

5576

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5581

'uploader': 'Backus-Page House Museum',

5582

'uploader_id': 'backuspagemuseum',

5583

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5584

'upload_date': '20161008',

5585

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5586

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5591

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5592

'channel': 'Backus-Page House Museum',

5593

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5594

'live_status': 'not_live',

5595

'view_count': int,

5596

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5597

'availability': 'public',

5598

'duration': 59,

5599

'comment_count': int,

5600

'channel_follower_count': int

},

'params': {

'noplaylist': True,

'skip_download': True,

5605

},

5606

}, {

5607

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5608

'only_matching': True,

5609

}]

5610

5611

def _real_extract(self, url):

5612

mobj = self._match_valid_url(url)

5613

video_id = mobj.group('id')

5614

playlist_id = mobj.group('playlist_id')

5615

return self.url_result(

5616

update_url_query('https://www.youtube.com/watch', {

5617

'v': video_id,

5618

'list': playlist_id,

5619

'feature': 'youtu.be',

5620

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5621

5622

5623

class YoutubeLivestreamEmbedIE(InfoExtractor):

5624

IE_DESC = 'YouTube livestream embeds'

5625

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5626

_TESTS = [{

5627

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5628

'only_matching': True,

5629

}]

5630

5631

def _real_extract(self, url):

5632

channel_id = self._match_id(url)

5633

return self.url_result(

5634

f'https://www.youtube.com/channel/{channel_id}/live',

5635

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5636

5637

5638

class YoutubeYtUserIE(InfoExtractor):

5639

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5640

IE_NAME = 'youtube:user'

5641

_VALID_URL = r'ytuser:(?P<id>.+)'

5642

_TESTS = [{

5643

'url': 'ytuser:phihag',

5644

'only_matching': True,

5645

}]

5646

5647

def _real_extract(self, url):

5648

user_id = self._match_id(url)

5649

return self.url_result(

5650

'https://www.youtube.com/user/%s/videos' % user_id,

5651

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5652

5653

5654

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5655

IE_NAME = 'youtube:favorites'

5656

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5657

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5658

_LOGIN_REQUIRED = True

5659

_TESTS = [{

5660

'url': ':ytfav',

5661

'only_matching': True,

5662

}, {

5663

'url': ':ytfavorites',

5664

'only_matching': True,

5665

}]

5666

5667

def _real_extract(self, url):

5668

return self.url_result(

5669

'https://www.youtube.com/playlist?list=LL',

5670

ie=YoutubeTabIE.ie_key())

5671

5672

5673

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5674

IE_NAME = 'youtube:notif'

5675

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5676

_VALID_URL = r':ytnotif(?:ication)?s?'

5677

_LOGIN_REQUIRED = True

5678

_TESTS = [{

5679

'url': ':ytnotif',

5680

'only_matching': True,

5681

}, {

5682

'url': ':ytnotifications',

5683

'only_matching': True,

5684

}]

5685

5686

def _extract_notification_menu(self, response, continuation_list):

5687

notification_list = traverse_obj(

5688

response,

5689

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5690

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5691

expected_type=list) or []

5692

continuation_list[0] = None

5693

for item in notification_list:

5694

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5695

if entry:

5696

yield entry

5697

continuation = item.get('continuationItemRenderer')

5698

if continuation:

5699

continuation_list[0] = continuation

5700

5701

def _extract_notification_renderer(self, notification):

5702

video_id = traverse_obj(

5703

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5704

url = f'https://www.youtube.com/watch?v={video_id}'

5705

channel_id = None

5706

if not video_id:

5707

browse_ep = traverse_obj(

5708

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5709

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5710

post_id = self._search_regex(

5711

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5712

'post id', default=None)

5713

if not channel_id or not post_id:

5714

return

5715

# The direct /post url redirects to this in the browser

5716

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5717

5718

channel = traverse_obj(

5719

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5720

expected_type=str)

5721

notification_title = self._get_text(notification, 'shortMessage')

5722

if notification_title:

5723

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5724

# TODO: handle recommended videos

5725

title = self._search_regex(

5726

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5727

'video title', default=None)

5728

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5729

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5735

'video_id': video_id,

5736

'title': title,

5737

'channel_id': channel_id,

5738

'channel': channel,

5739

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5740

'upload_date': upload_date,

5741

}

5742

5743

def _notification_menu_entries(self, ytcfg):

5744

continuation_list = [None]

5745

response = None

5746

for page in itertools.count(1):

5747

ctoken = traverse_obj(

5748

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5749

response = self._extract_response(

5750

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5751

ep='notification/get_notification_menu', check_get_keys='actions',

5752

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5753

yield from self._extract_notification_menu(response, continuation_list)

5754

if not continuation_list[0]:

5755

break

5756

5757

def _real_extract(self, url):

5758

display_id = 'notifications'

5759

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5760

self._report_playlist_authcheck(ytcfg)

5761

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5762

5763

5764

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5765

IE_DESC = 'YouTube search'

5766

IE_NAME = 'youtube:search'

5767

_SEARCH_KEY = 'ytsearch'

5768

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5769

_TESTS = [{

5770

'url': 'ytsearch5:youtube-dl test video',

5771

'playlist_count': 5,

5772

'info_dict': {

5773

'id': 'youtube-dl test video',

5774

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5780

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5781

_SEARCH_KEY = 'ytsearchdate'

5782

IE_DESC = 'YouTube search, newest videos first'

5783

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5784

_TESTS = [{

5785

'url': 'ytsearchdate5:youtube-dl test video',

5786

'playlist_count': 5,

5787

'info_dict': {

5788

'id': 'youtube-dl test video',

5789

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5795

IE_DESC = 'YouTube search URLs with sorting and filter support'

5796

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5797

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5798

_TESTS = [{

5799

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5800

'playlist_mincount': 5,

5801

'info_dict': {

5802

'id': 'youtube-dl test video',

5803

'title': 'youtube-dl test video',

5804

}

5805

}, {

5806

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5807

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5814

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

# The test suite does not have support for nested playlists

5819

# 'entries': [{

5820

# 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

# 'title': '#cats',

# }],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5826

'only_matching': True,

5827

}]

5828

5829

def _real_extract(self, url):

5830

qs = parse_qs(url)

5831

query = (qs.get('search_query') or qs.get('q'))[0]

5832

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5833

5834

5835

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5836

IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'

5837

IE_NAME = 'youtube:music:search_url'

5838

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5839

_TESTS = [{

5840

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5841

'playlist_count': 16,

5842

'info_dict': {

5843

'id': 'royalty free music',

5844

'title': 'royalty free music',

5845

}

5846

}, {

5847

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5848

'playlist_mincount': 30,

5849

'info_dict': {

5850

'id': 'royalty free music - songs',

5851

'title': 'royalty free music - songs',

5852

},

5853

'params': {'extract_flat': 'in_playlist'}

5854

}, {

5855

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5856

'playlist_mincount': 30,

5857

'info_dict': {

5858

'id': 'royalty free music - community playlists',

5859

'title': 'royalty free music - community playlists',

5860

},

5861

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5866

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5867

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5868

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5869

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5870

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5871

}

5872

5873

def _real_extract(self, url):

5874

qs = parse_qs(url)

5875

query = (qs.get('search_query') or qs.get('q'))[0]

5876

params = qs.get('sp', (None,))[0]

5877

if params:

5878

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5879

else:

5880

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

5881

params = self._SECTIONS.get(section)

5882

if not params:

5883

section = None

5884

title = join_nonempty(query, section, delim=' - ')

5885

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5886

5887

5888

class YoutubeFeedsInfoExtractor(InfoExtractor):

5889

"""

5890

Base class for feed extractors

5891

Subclasses must re-define the _FEED_NAME property.

5892

"""

5893

_LOGIN_REQUIRED = True

5894

_FEED_NAME = 'feeds'

5895

5896

def _real_initialize(self):

5897

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5902

5903

def _real_extract(self, url):

5904

return self.url_result(

5905

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5906

5907

5908

class YoutubeWatchLaterIE(InfoExtractor):

5909

IE_NAME = 'youtube:watchlater'

5910

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5911

_VALID_URL = r':ytwatchlater'

5912

_TESTS = [{

5913

'url': ':ytwatchlater',

5914

'only_matching': True,

5915

}]

5916

5917

def _real_extract(self, url):

5918

return self.url_result(

5919

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5920

5921

5922

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5923

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5924

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5925

_FEED_NAME = 'recommended'

5926

_LOGIN_REQUIRED = False

5927

_TESTS = [{

5928

'url': ':ytrec',

5929

'only_matching': True,

5930

}, {

5931

'url': ':ytrecommended',

5932

'only_matching': True,

5933

}, {

5934

'url': 'https://youtube.com',

5935

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5940

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5941

_VALID_URL = r':ytsub(?:scription)?s?'

5942

_FEED_NAME = 'subscriptions'

5943

_TESTS = [{

5944

'url': ':ytsubs',

5945

'only_matching': True,

5946

}, {

5947

'url': ':ytsubscriptions',

5948

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5953

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5954

_VALID_URL = r':ythis(?:tory)?'

5955

_FEED_NAME = 'history'

5956

_TESTS = [{

5957

'url': ':ythistory',

5958

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5963

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5964

IE_NAME = 'youtube:stories'

5965

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5966

_TESTS = [{

5967

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5968

'only_matching': True,

5969

}]

5970

5971

def _real_extract(self, url):

5972

playlist_id = f'RLTD{self._match_id(url)}'

5973

return self.url_result(

5974

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5975

ie=YoutubeTabIE, video_id=playlist_id)

5976

5977

5978

class YoutubeTruncatedURLIE(InfoExtractor):

5979

IE_NAME = 'youtube:truncated_url'

5980

IE_DESC = False # Do not list

5981

_VALID_URL = r'''(?x)

5982

(?:https?://)?

5983

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5984

(?:watch\?(?:

5985

feature=[a-z_]+|

5986

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5999

'only_matching': True,

6000

}, {

6001

'url': 'https://www.youtube.com/watch?',

6002

'only_matching': True,

6003

}, {

6004

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

6005

'only_matching': True,

6006

}, {

6007

'url': 'https://www.youtube.com/watch?feature=foo',

6008

'only_matching': True,

6009

}, {

6010

'url': 'https://www.youtube.com/watch?hl=en-GB',

6011

'only_matching': True,

6012

}, {

6013

'url': 'https://www.youtube.com/watch?t=2372',

6014

'only_matching': True,

6015

}]

6016

6017

def _real_extract(self, url):

6018

raise ExtractorError(

6019

'Did you forget to quote the URL? Remember that & is a meta '

6020

'character in most shells, so you want to put the URL in quotes, '

6021

'like youtube-dl '

6022

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

6023

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

6028

IE_NAME = 'youtube:clip'

6029

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

6030

_TESTS = [{

6031

# FIXME: Other metadata should be extracted from the clip, not from the base video

6032

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

6033

'info_dict': {

6034

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

6035

'ext': 'mp4',

6036

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

'age_limit': 0,

'availability': 'public',

6041

'categories': ['Gaming'],

6042

'channel': 'Scott The Woz',

6043

'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',

6044

'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',

6045

'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',

6046

'like_count': int,

6047

'playable_in_embed': True,

6048

'tags': 'count:17',

6049

'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',

6050

'title': 'Mobile Games on Console - Scott The Woz',

6051

'upload_date': '20210920',

6052

'uploader': 'Scott The Woz',

6053

'uploader_id': 'scottthewoz',

6054

'uploader_url': 'http://www.youtube.com/user/scottthewoz',

6055

'view_count': int,

6056

'live_status': 'not_live',

6057

'channel_follower_count': int

}

}]

def _real_extract(self, url):

6062

clip_id = self._match_id(url)

6063

_, data = self._extract_webpage(url, clip_id)

6064

6065

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6066

if not video_id:

6067

raise ExtractorError('Unable to find video ID')

6068

6069

clip_data = traverse_obj(data, (

6070

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6071

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6072

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6073

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6074

6075

return {

6076

'_type': 'url_transparent',

6077

'url': f'https://www.youtube.com/watch?v={video_id}',

6078

'ie_key': YoutubeIE.ie_key(),

6079

'id': clip_id,

6080

'section_start': int(clip_data['startTimeMs']) / 1000,

6081

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6086

IE_NAME = 'youtube:truncated_id'

6087

IE_DESC = False # Do not list

6088

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6089

6090

_TESTS = [{

6091

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6092

'only_matching': True,

6093

}]

6094

6095

def _real_extract(self, url):

6096

video_id = self._match_id(url)

6097

raise ExtractorError(

6098

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6099

expected=True)