jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	import base64
	2	import calendar
	3	import copy
	4	import datetime
	5	import hashlib
	6	import itertools
	7	import json
	8	import math
	9	import os.path
	10	import random
	11	import re
	12	import sys
	13	import threading
	14	import time
	15	import traceback
	16	import urllib.error
	17	import urllib.parse
	18
	19	from .common import InfoExtractor, SearchInfoExtractor
	20	from ..compat import functools
	21	from ..jsinterp import JSInterpreter
	22	from ..utils import (
	23	NO_DEFAULT,
	24	ExtractorError,
	25	bug_reports_message,
	26	classproperty,
	27	clean_html,
	28	datetime_from_str,
	29	dict_get,
	30	error_to_compat_str,
	31	float_or_none,
	32	format_field,
	33	get_first,
	34	int_or_none,
	35	is_html,
	36	join_nonempty,
	37	js_to_json,
	38	mimetype2ext,
	39	network_exceptions,
	40	orderedSet,
	41	parse_codecs,
	42	parse_count,
	43	parse_duration,
	44	parse_iso8601,
	45	parse_qs,
	46	qualities,
	47	remove_end,
	48	remove_start,
	49	smuggle_url,
	50	str_or_none,
	51	str_to_int,
	52	strftime_or_none,
	53	traverse_obj,
	54	try_get,
	55	unescapeHTML,
	56	unified_strdate,
	57	unified_timestamp,
	58	unsmuggle_url,
	59	update_url_query,
	60	url_or_none,
	61	urljoin,
	62	variadic,
	63	)
	64
	65	# any clients starting with _ cannot be explicitly requested by the user
	66	INNERTUBE_CLIENTS = {
	67	'web': {
	68	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	69	'INNERTUBE_CONTEXT': {
	70	'client': {
	71	'clientName': 'WEB',
	72	'clientVersion': '2.20211221.00.00',
	73	}
	74	},
	75	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	76	},
	77	'web_embedded': {
	78	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	79	'INNERTUBE_CONTEXT': {
	80	'client': {
	81	'clientName': 'WEB_EMBEDDED_PLAYER',
	82	'clientVersion': '1.20211215.00.01',
	83	},
	84	},
	85	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	86	},
	87	'web_music': {
	88	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	89	'INNERTUBE_HOST': 'music.youtube.com',
	90	'INNERTUBE_CONTEXT': {
	91	'client': {
	92	'clientName': 'WEB_REMIX',
	93	'clientVersion': '1.20211213.00.00',
	94	}
	95	},
	96	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	97	},
	98	'web_creator': {
	99	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	100	'INNERTUBE_CONTEXT': {
	101	'client': {
	102	'clientName': 'WEB_CREATOR',
	103	'clientVersion': '1.20211220.02.00',
	104	}
	105	},
	106	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	107	},
	108	'android': {
	109	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	110	'INNERTUBE_CONTEXT': {
	111	'client': {
	112	'clientName': 'ANDROID',
	113	'clientVersion': '16.49',
	114	}
	115	},
	116	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	117	'REQUIRE_JS_PLAYER': False
	118	},
	119	'android_embedded': {
	120	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	121	'INNERTUBE_CONTEXT': {
	122	'client': {
	123	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	124	'clientVersion': '16.49',
	125	},
	126	},
	127	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	128	'REQUIRE_JS_PLAYER': False
	129	},
	130	'android_music': {
	131	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	132	'INNERTUBE_CONTEXT': {
	133	'client': {
	134	'clientName': 'ANDROID_MUSIC',
	135	'clientVersion': '4.57',
	136	}
	137	},
	138	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	139	'REQUIRE_JS_PLAYER': False
	140	},
	141	'android_creator': {
	142	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	143	'INNERTUBE_CONTEXT': {
	144	'client': {
	145	'clientName': 'ANDROID_CREATOR',
	146	'clientVersion': '21.47',
	147	},
	148	},
	149	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	150	'REQUIRE_JS_PLAYER': False
	151	},
	152	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	153	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	154	'ios': {
	155	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	156	'INNERTUBE_CONTEXT': {
	157	'client': {
	158	'clientName': 'IOS',
	159	'clientVersion': '16.46',
	160	'deviceModel': 'iPhone14,3',
	161	}
	162	},
	163	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	164	'REQUIRE_JS_PLAYER': False
	165	},
	166	'ios_embedded': {
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS_MESSAGES_EXTENSION',
	170	'clientVersion': '16.46',
	171	'deviceModel': 'iPhone14,3',
	172	},
	173	},
	174	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	175	'REQUIRE_JS_PLAYER': False
	176	},
	177	'ios_music': {
	178	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	179	'INNERTUBE_CONTEXT': {
	180	'client': {
	181	'clientName': 'IOS_MUSIC',
	182	'clientVersion': '4.57',
	183	},
	184	},
	185	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	186	'REQUIRE_JS_PLAYER': False
	187	},
	188	'ios_creator': {
	189	'INNERTUBE_CONTEXT': {
	190	'client': {
	191	'clientName': 'IOS_CREATOR',
	192	'clientVersion': '21.47',
	193	},
	194	},
	195	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	196	'REQUIRE_JS_PLAYER': False
	197	},
	198	# mweb has 'ultralow' formats
	199	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	200	'mweb': {
	201	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	202	'INNERTUBE_CONTEXT': {
	203	'client': {
	204	'clientName': 'MWEB',
	205	'clientVersion': '2.20211221.01.00',
	206	}
	207	},
	208	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	209	},
	210	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	211	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	212	'tv_embedded': {
	213	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	214	'INNERTUBE_CONTEXT': {
	215	'client': {
	216	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	217	'clientVersion': '2.0',
	218	},
	219	},
	220	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	221	},
	222	}
	223
	224
	225	def _split_innertube_client(client_name):
	226	variant, *base = client_name.rsplit('.', 1)
	227	if base:
	228	return variant, base[0], variant
	229	base, *variant = client_name.split('_', 1)
	230	return client_name, base, variant[0] if variant else None
	231
	232
	233	def build_innertube_clients():
	234	THIRD_PARTY = {
	235	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	236	}
	237	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	238	priority = qualities(BASE_CLIENTS[::-1])
	239
	240	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	241	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	242	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	243	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	244	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	245
	246	_, base_client, variant = _split_innertube_client(client)
	247	ytcfg['priority'] = 10 * priority(base_client)
	248
	249	if not variant:
	250	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	251	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	252	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	253	embedscreen['priority'] -= 3
	254	elif variant == 'embedded':
	255	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	256	ytcfg['priority'] -= 2
	257	else:
	258	ytcfg['priority'] -= 3
	259
	260
	261	build_innertube_clients()
	262
	263
	264	class YoutubeBaseInfoExtractor(InfoExtractor):
	265	"""Provide base functions for Youtube extractors"""
	266
	267	_RESERVED_NAMES = (
	268	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	269	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	270	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	271	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	272
	273	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	274
	275	# _NETRC_MACHINE = 'youtube'
	276
	277	# If True it will raise an error if no login info is provided
	278	_LOGIN_REQUIRED = False
	279
	280	_INVIDIOUS_SITES = (
	281	# invidious-redirect websites
	282	r'(?:www\.)?redirect\.invidious\.io',
	283	r'(?:(?:www\|dev)\.)?invidio\.us',
	284	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
	285	r'(?:www\.)?invidious\.pussthecat\.org',
	286	r'(?:www\.)?invidious\.zee\.li',
	287	r'(?:www\.)?invidious\.ethibox\.fr',
	288	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	289	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	290	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	291	# youtube-dl invidious instances list
	292	r'(?:(?:www\|no)\.)?invidiou\.sh',
	293	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	294	r'(?:www\.)?invidious\.kabi\.tk',
	295	r'(?:www\.)?invidious\.mastodon\.host',
	296	r'(?:www\.)?invidious\.zapashcanon\.fr',
	297	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	298	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	299	r'(?:www\.)?invidious\.himiko\.cloud',
	300	r'(?:www\.)?invidious\.reallyancient\.tech',
	301	r'(?:www\.)?invidious\.tube',
	302	r'(?:www\.)?invidiou\.site',
	303	r'(?:www\.)?invidious\.site',
	304	r'(?:www\.)?invidious\.xyz',
	305	r'(?:www\.)?invidious\.nixnet\.xyz',
	306	r'(?:www\.)?invidious\.048596\.xyz',
	307	r'(?:www\.)?invidious\.drycat\.fr',
	308	r'(?:www\.)?inv\.skyn3t\.in',
	309	r'(?:www\.)?tube\.poal\.co',
	310	r'(?:www\.)?tube\.connect\.cafe',
	311	r'(?:www\.)?vid\.wxzm\.sx',
	312	r'(?:www\.)?vid\.mint\.lgbt',
	313	r'(?:www\.)?vid\.puffyan\.us',
	314	r'(?:www\.)?yewtu\.be',
	315	r'(?:www\.)?yt\.elukerio\.org',
	316	r'(?:www\.)?yt\.lelux\.fi',
	317	r'(?:www\.)?invidious\.ggc-project\.de',
	318	r'(?:www\.)?yt\.maisputain\.ovh',
	319	r'(?:www\.)?ytprivate\.com',
	320	r'(?:www\.)?invidious\.13ad\.de',
	321	r'(?:www\.)?invidious\.toot\.koeln',
	322	r'(?:www\.)?invidious\.fdn\.fr',
	323	r'(?:www\.)?watch\.nettohikari\.com',
	324	r'(?:www\.)?invidious\.namazso\.eu',
	325	r'(?:www\.)?invidious\.silkky\.cloud',
	326	r'(?:www\.)?invidious\.exonip\.de',
	327	r'(?:www\.)?invidious\.riverside\.rocks',
	328	r'(?:www\.)?invidious\.blamefran\.net',
	329	r'(?:www\.)?invidious\.moomoo\.de',
	330	r'(?:www\.)?ytb\.trom\.tf',
	331	r'(?:www\.)?yt\.cyberhost\.uk',
	332	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	333	r'(?:www\.)?qklhadlycap4cnod\.onion',
	334	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	335	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	336	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	337	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	338	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	339	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	340	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	341	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	342	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	343	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	344	# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
	345	r'(?:www\.)?piped\.kavin\.rocks',
	346	r'(?:www\.)?piped\.silkky\.cloud',
	347	r'(?:www\.)?piped\.tokhmi\.xyz',
	348	r'(?:www\.)?piped\.moomoo\.me',
	349	r'(?:www\.)?il\.ax',
	350	r'(?:www\.)?piped\.syncpundit\.com',
	351	r'(?:www\.)?piped\.mha\.fi',
	352	r'(?:www\.)?piped\.mint\.lgbt',
	353	r'(?:www\.)?piped\.privacy\.com\.de',
	354	)
	355
	356	def _initialize_consent(self):
	357	cookies = self._get_cookies('https://www.youtube.com/')
	358	if cookies.get('__Secure-3PSID'):
	359	return
	360	consent_id = None
	361	consent = cookies.get('CONSENT')
	362	if consent:
	363	if 'YES' in consent.value:
	364	return
	365	consent_id = self._search_regex(
	366	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	367	if not consent_id:
	368	consent_id = random.randint(100, 999)
	369	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	370
	371	def _initialize_pref(self):
	372	cookies = self._get_cookies('https://www.youtube.com/')
	373	pref_cookie = cookies.get('PREF')
	374	pref = {}
	375	if pref_cookie:
	376	try:
	377	pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
	378	except ValueError:
	379	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	380	pref.update({'hl': 'en', 'tz': 'UTC'})
	381	self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
	382
	383	def _real_initialize(self):
	384	self._initialize_pref()
	385	self._initialize_consent()
	386	self._check_login_required()
	387
	388	def _check_login_required(self):
	389	if self._LOGIN_REQUIRED and not self._cookies_passed:
	390	self.raise_login_required('Login details are needed to download this content', method='cookies')
	391
	392	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s='
	393	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
	394
	395	def _get_default_ytcfg(self, client='web'):
	396	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	397
	398	def _get_innertube_host(self, client='web'):
	399	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	400
	401	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	402	# try_get but with fallback to default ytcfg client values when present
	403	_func = lambda y: try_get(y, getter, expected_type)
	404	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	405
	406	def _extract_client_name(self, ytcfg, default_client='web'):
	407	return self._ytcfg_get_safe(
	408	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	409	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
	410
	411	def _extract_client_version(self, ytcfg, default_client='web'):
	412	return self._ytcfg_get_safe(
	413	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	414	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
	415
	416	def _select_api_hostname(self, req_api_hostname, default_client=None):
	417	return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
	418	or req_api_hostname or self._get_innertube_host(default_client or 'web'))
	419
	420	def _extract_api_key(self, ytcfg=None, default_client='web'):
	421	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
	422
	423	def _extract_context(self, ytcfg=None, default_client='web'):
	424	context = get_first(
	425	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	426	# Enforce language and tz for extraction
	427	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	428	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	429	return context
	430
	431	_SAPISID = None
	432
	433	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	434	time_now = round(time.time())
	435	if self._SAPISID is None:
	436	yt_cookies = self._get_cookies('https://www.youtube.com')
	437	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	438	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	439	sapisid_cookie = dict_get(
	440	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	441	if sapisid_cookie and sapisid_cookie.value:
	442	self._SAPISID = sapisid_cookie.value
	443	self.write_debug('Extracted SAPISID cookie')
	444	# SAPISID cookie is required if not already present
	445	if not yt_cookies.get('SAPISID'):
	446	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	447	self._set_cookie(
	448	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	449	else:
	450	self._SAPISID = False
	451	if not self._SAPISID:
	452	return None
	453	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	454	sapisidhash = hashlib.sha1(
	455	f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
	456	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	457
	458	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	459	note='Downloading API JSON', errnote='Unable to download API page',
	460	context=None, api_key=None, api_hostname=None, default_client='web'):
	461
	462	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	463	data.update(query)
	464	real_headers = self.generate_api_headers(default_client=default_client)
	465	real_headers.update({'content-type': 'application/json'})
	466	if headers:
	467	real_headers.update(headers)
	468	api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
	469	or api_key or self._extract_api_key(default_client=default_client))
	470	return self._download_json(
	471	f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
	472	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	473	data=json.dumps(data).encode('utf8'), headers=real_headers,
	474	query={'key': api_key, 'prettyPrint': 'false'})
	475
	476	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	477	return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
	478
	479	@staticmethod
	480	def _extract_session_index(*data):
	481	"""
	482	Index of current account in account list.
	483	See: https://github.com/yt-dlp/yt-dlp/pull/519
	484	"""
	485	for ytcfg in data:
	486	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	487	if session_index is not None:
	488	return session_index
	489
	490	# Deprecated?
	491	def _extract_identity_token(self, ytcfg=None, webpage=None):
	492	if ytcfg:
	493	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
	494	if token:
	495	return token
	496	if webpage:
	497	return self._search_regex(
	498	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	499	'identity token', default=None, fatal=False)
	500

1

import base64

import calendar

import copy

import datetime

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import threading

import time

import traceback

import urllib.error

import urllib.parse

from .common import InfoExtractor, SearchInfoExtractor

20

from ..compat import functools

21

from ..jsinterp import JSInterpreter

22

from ..utils import (

NO_DEFAULT,

ExtractorError,

bug_reports_message,

classproperty,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicitly requested by the user

66

INNERTUBE_CLIENTS = {

67

'web': {

68

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

69

'INNERTUBE_CONTEXT': {

70

'client': {

71

'clientName': 'WEB',

72

'clientVersion': '2.20211221.00.00',

73

}

74

},

75

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

76

},

77

'web_embedded': {

78

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

79

'INNERTUBE_CONTEXT': {

80

'client': {

81

'clientName': 'WEB_EMBEDDED_PLAYER',

82

'clientVersion': '1.20211215.00.01',

83

},

84

},

85

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

86

},

87

'web_music': {

88

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

89

'INNERTUBE_HOST': 'music.youtube.com',

90

'INNERTUBE_CONTEXT': {

91

'client': {

92

'clientName': 'WEB_REMIX',

93

'clientVersion': '1.20211213.00.00',

94

}

95

},

96

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

97

},

98

'web_creator': {

99

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

100

'INNERTUBE_CONTEXT': {

101

'client': {

102

'clientName': 'WEB_CREATOR',

103

'clientVersion': '1.20211220.02.00',

104

}

105

},

106

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

107

},

108

'android': {

109

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

110

'INNERTUBE_CONTEXT': {

111

'client': {

112

'clientName': 'ANDROID',

113

'clientVersion': '16.49',

114

}

115

},

116

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

117

'REQUIRE_JS_PLAYER': False

118

},

119

'android_embedded': {

120

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

121

'INNERTUBE_CONTEXT': {

122

'client': {

123

'clientName': 'ANDROID_EMBEDDED_PLAYER',

124

'clientVersion': '16.49',

125

},

126

},

127

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

128

'REQUIRE_JS_PLAYER': False

129

},

130

'android_music': {

131

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

132

'INNERTUBE_CONTEXT': {

133

'client': {

134

'clientName': 'ANDROID_MUSIC',

135

'clientVersion': '4.57',

136

}

137

},

138

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

139

'REQUIRE_JS_PLAYER': False

140

},

141

'android_creator': {

142

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

143

'INNERTUBE_CONTEXT': {

144

'client': {

145

'clientName': 'ANDROID_CREATOR',

146

'clientVersion': '21.47',

147

},

148

},

149

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

150

'REQUIRE_JS_PLAYER': False

151

},

152

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

153

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

154

'ios': {

155

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

156

'INNERTUBE_CONTEXT': {

157

'client': {

158

'clientName': 'IOS',

159

'clientVersion': '16.46',

160

'deviceModel': 'iPhone14,3',

161

}

162

},

163

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

164

'REQUIRE_JS_PLAYER': False

165

},

166

'ios_embedded': {

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS_MESSAGES_EXTENSION',

170

'clientVersion': '16.46',

171

'deviceModel': 'iPhone14,3',

172

},

173

},

174

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

175

'REQUIRE_JS_PLAYER': False

176

},

177

'ios_music': {

178

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

179

'INNERTUBE_CONTEXT': {

180

'client': {

181

'clientName': 'IOS_MUSIC',

182

'clientVersion': '4.57',

183

},

184

},

185

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

186

'REQUIRE_JS_PLAYER': False

187

},

188

'ios_creator': {

189

'INNERTUBE_CONTEXT': {

190

'client': {

191

'clientName': 'IOS_CREATOR',

192

'clientVersion': '21.47',

193

},

194

},

195

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

196

'REQUIRE_JS_PLAYER': False

197

},

198

# mweb has 'ultralow' formats

199

# See: https://github.com/yt-dlp/yt-dlp/pull/557

200

'mweb': {

201

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

202

'INNERTUBE_CONTEXT': {

203

'client': {

204

'clientName': 'MWEB',

205

'clientVersion': '2.20211221.01.00',

206

}

207

},

208

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

209

},

210

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

211

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

212

'tv_embedded': {

213

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

214

'INNERTUBE_CONTEXT': {

215

'client': {

216

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

217

'clientVersion': '2.0',

218

},

219

},

220

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

226

variant, *base = client_name.rsplit('.', 1)

227

if base:

228

return variant, base[0], variant

229

base, *variant = client_name.split('_', 1)

230

return client_name, base, variant[0] if variant else None

231

232

233

def build_innertube_clients():

234

THIRD_PARTY = {

235

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

236

}

237

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

238

priority = qualities(BASE_CLIENTS[::-1])

239

240

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

241

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

242

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

243

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

244

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

245

246

_, base_client, variant = _split_innertube_client(client)

247

ytcfg['priority'] = 10 * priority(base_client)

248

249

if not variant:

250

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

251

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

252

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

253

embedscreen['priority'] -= 3

254

elif variant == 'embedded':

255

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

256

ytcfg['priority'] -= 2

257

else:

258

ytcfg['priority'] -= 3

259

260

261

build_innertube_clients()

262

263

264

class YoutubeBaseInfoExtractor(InfoExtractor):

265

"""Provide base functions for Youtube extractors"""

266

267

_RESERVED_NAMES = (

268

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

274

275

# _NETRC_MACHINE = 'youtube'

276

277

# If True it will raise an error if no login info is provided

278

_LOGIN_REQUIRED = False

279

280

_INVIDIOUS_SITES = (

281

# invidious-redirect websites

282

r'(?:www\.)?redirect\.invidious\.io',

283

r'(?:(?:www|dev)\.)?invidio\.us',

284

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md

285

r'(?:www\.)?invidious\.pussthecat\.org',

286

r'(?:www\.)?invidious\.zee\.li',

287

r'(?:www\.)?invidious\.ethibox\.fr',

288

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

289

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

290

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

291

# youtube-dl invidious instances list

292

r'(?:(?:www|no)\.)?invidiou\.sh',

293

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

294

r'(?:www\.)?invidious\.kabi\.tk',

295

r'(?:www\.)?invidious\.mastodon\.host',

296

r'(?:www\.)?invidious\.zapashcanon\.fr',

297

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

298

r'(?:www\.)?invidious\.tinfoil-hat\.net',

299

r'(?:www\.)?invidious\.himiko\.cloud',

300

r'(?:www\.)?invidious\.reallyancient\.tech',

301

r'(?:www\.)?invidious\.tube',

302

r'(?:www\.)?invidiou\.site',

303

r'(?:www\.)?invidious\.site',

304

r'(?:www\.)?invidious\.xyz',

305

r'(?:www\.)?invidious\.nixnet\.xyz',

306

r'(?:www\.)?invidious\.048596\.xyz',

307

r'(?:www\.)?invidious\.drycat\.fr',

308

r'(?:www\.)?inv\.skyn3t\.in',

309

r'(?:www\.)?tube\.poal\.co',

310

r'(?:www\.)?tube\.connect\.cafe',

311

r'(?:www\.)?vid\.wxzm\.sx',

312

r'(?:www\.)?vid\.mint\.lgbt',

313

r'(?:www\.)?vid\.puffyan\.us',

314

r'(?:www\.)?yewtu\.be',

315

r'(?:www\.)?yt\.elukerio\.org',

316

r'(?:www\.)?yt\.lelux\.fi',

317

r'(?:www\.)?invidious\.ggc-project\.de',

318

r'(?:www\.)?yt\.maisputain\.ovh',

319

r'(?:www\.)?ytprivate\.com',

320

r'(?:www\.)?invidious\.13ad\.de',

321

r'(?:www\.)?invidious\.toot\.koeln',

322

r'(?:www\.)?invidious\.fdn\.fr',

323

r'(?:www\.)?watch\.nettohikari\.com',

324

r'(?:www\.)?invidious\.namazso\.eu',

325

r'(?:www\.)?invidious\.silkky\.cloud',

326

r'(?:www\.)?invidious\.exonip\.de',

327

r'(?:www\.)?invidious\.riverside\.rocks',

328

r'(?:www\.)?invidious\.blamefran\.net',

329

r'(?:www\.)?invidious\.moomoo\.de',

330

r'(?:www\.)?ytb\.trom\.tf',

331

r'(?:www\.)?yt\.cyberhost\.uk',

332

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

333

r'(?:www\.)?qklhadlycap4cnod\.onion',

334

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

335

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

336

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

337

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

338

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

339

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

340

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

341

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

342

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

343

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

344

# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances

345

r'(?:www\.)?piped\.kavin\.rocks',

346

r'(?:www\.)?piped\.silkky\.cloud',

347

r'(?:www\.)?piped\.tokhmi\.xyz',

348

r'(?:www\.)?piped\.moomoo\.me',

349

r'(?:www\.)?il\.ax',

350

r'(?:www\.)?piped\.syncpundit\.com',

351

r'(?:www\.)?piped\.mha\.fi',

352

r'(?:www\.)?piped\.mint\.lgbt',

353

r'(?:www\.)?piped\.privacy\.com\.de',

354

)

355

356

def _initialize_consent(self):

357

cookies = self._get_cookies('https://www.youtube.com/')

358

if cookies.get('__Secure-3PSID'):

359

return

360

consent_id = None

361

consent = cookies.get('CONSENT')

362

if consent:

363

if 'YES' in consent.value:

364

return

365

consent_id = self._search_regex(

366

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

367

if not consent_id:

368

consent_id = random.randint(100, 999)

369

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

370

371

def _initialize_pref(self):

372

cookies = self._get_cookies('https://www.youtube.com/')

373

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(urllib.parse.parse_qsl(pref_cookie.value))

378

except ValueError:

379

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

380

pref.update({'hl': 'en', 'tz': 'UTC'})

381

self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))

382

383

def _real_initialize(self):

384

self._initialize_pref()

385

self._initialize_consent()

386

self._check_login_required()

387

388

def _check_login_required(self):

389

if self._LOGIN_REQUIRED and not self._cookies_passed:

390

self.raise_login_required('Login details are needed to download this content', method='cookies')

391

392

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='

393

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='

394

395

def _get_default_ytcfg(self, client='web'):

396

return copy.deepcopy(INNERTUBE_CLIENTS[client])

397

398

def _get_innertube_host(self, client='web'):

399

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

400

401

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

402

# try_get but with fallback to default ytcfg client values when present

403

_func = lambda y: try_get(y, getter, expected_type)

404

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

405

406

def _extract_client_name(self, ytcfg, default_client='web'):

407

return self._ytcfg_get_safe(

408

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

409

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)

410

411

def _extract_client_version(self, ytcfg, default_client='web'):

412

return self._ytcfg_get_safe(

413

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

414

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)

415

416

def _select_api_hostname(self, req_api_hostname, default_client=None):

417

return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]

418

or req_api_hostname or self._get_innertube_host(default_client or 'web'))

419

420

def _extract_api_key(self, ytcfg=None, default_client='web'):

421

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)

422

423

def _extract_context(self, ytcfg=None, default_client='web'):

424

context = get_first(

425

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

426

# Enforce language and tz for extraction

427

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

428

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

434

time_now = round(time.time())

435

if self._SAPISID is None:

436

yt_cookies = self._get_cookies('https://www.youtube.com')

437

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

438

# See: https://github.com/yt-dlp/yt-dlp/issues/393

439

sapisid_cookie = dict_get(

440

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

441

if sapisid_cookie and sapisid_cookie.value:

442

self._SAPISID = sapisid_cookie.value

443

self.write_debug('Extracted SAPISID cookie')

444

# SAPISID cookie is required if not already present

445

if not yt_cookies.get('SAPISID'):

446

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

447

self._set_cookie(

448

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

449

else:

450

self._SAPISID = False

451

if not self._SAPISID:

452

return None

453

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

454

sapisidhash = hashlib.sha1(

455

f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()

456

return f'SAPISIDHASH {time_now}_{sapisidhash}'

457

458

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

459

note='Downloading API JSON', errnote='Unable to download API page',

460

context=None, api_key=None, api_hostname=None, default_client='web'):

461

462

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

463

data.update(query)

464

real_headers = self.generate_api_headers(default_client=default_client)

465

real_headers.update({'content-type': 'application/json'})

466

if headers:

467

real_headers.update(headers)

468

api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]

469

or api_key or self._extract_api_key(default_client=default_client))

470

return self._download_json(

471

f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',

472

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

473

data=json.dumps(data).encode('utf8'), headers=real_headers,

474

query={'key': api_key, 'prettyPrint': 'false'})

475

476

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

477

return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)

478

479

@staticmethod

480

def _extract_session_index(*data):

481

"""

482

Index of current account in account list.

483

See: https://github.com/yt-dlp/yt-dlp/pull/519

484

"""

485

for ytcfg in data:

486

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

487

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

492

if ytcfg:

493

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)

if token:

return token

if webpage:

return self._search_regex(

498

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

499

'identity token', default=None, fatal=False)

500

501

@staticmethod

502

def _extract_account_syncid(*args):

503

"""

504

Extract syncId required to download private playlists of secondary channels

505

@params response and/or ytcfg

506

"""

507

for data in args:

508

# ytcfg includes channel_syncid if on secondary channel

509

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

514

lambda x: x['DATASYNC_ID']), str) or '').split('||')

515

if len(sync_ids) >= 2 and sync_ids[1]:

516

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

517

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

522

"""

523

Extracts visitorData from an API response or ytcfg

524

Appears to be used to track session state

525

"""

526

return get_first(

527

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

528

expected_type=str)

529

530

@functools.cached_property

531

def is_authenticated(self):

532

return bool(self._generate_sapisidhash_header())

533

534

def extract_ytcfg(self, video_id, webpage):

535

if not webpage:

536

return {}

537

return self._parse_json(

538

self._search_regex(

539

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

540

default='{}'), video_id, fatal=False) or {}

541

542

def generate_api_headers(

543

self, *, ytcfg=None, account_syncid=None, session_index=None,

544

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

545

546

origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))

547

headers = {

548

'X-YouTube-Client-Name': str(

549

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

550

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

551

'Origin': origin,

552

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

553

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

554

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

555

}

556

if session_index is None:

557

session_index = self._extract_session_index(ytcfg)

558

if account_syncid or session_index is not None:

559

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

560

561

auth = self._generate_sapisidhash_header(origin)

562

if auth is not None:

563

headers['Authorization'] = auth

564

headers['X-Origin'] = origin

565

return {h: v for h, v in headers.items() if v is not None}

566

567

def _download_ytcfg(self, client, video_id):

568

url = {

569

'web': 'https://www.youtube.com',

570

'web_music': 'https://music.youtube.com',

571

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(

576

url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')

577

return self.extract_ytcfg(video_id, webpage) or {}

578

579

@staticmethod

580

def _build_api_continuation_query(continuation, ctp=None):

581

query = {

582

'continuation': continuation

583

}

584

# TODO: Inconsistency with clickTrackingParams.

585

# Currently we have a fixed ctp contained within context (from ytcfg)

586

# and a ctp in root query for continuation.

587

if ctp:

588

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

593

next_continuation = try_get(

594

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

595

lambda x: x['continuation']['reloadContinuationData']), dict)

596

if not next_continuation:

597

return

598

continuation = next_continuation.get('continuation')

599

if not continuation:

600

return

601

ctp = next_continuation.get('clickTrackingParams')

602

return cls._build_api_continuation_query(continuation, ctp)

603

604

@classmethod

605

def _extract_continuation_ep_data(cls, continuation_ep: dict):

606

if isinstance(continuation_ep, dict):

607

continuation = try_get(

608

continuation_ep, lambda x: x['continuationCommand']['token'], str)

609

if not continuation:

610

return

611

ctp = continuation_ep.get('clickTrackingParams')

612

return cls._build_api_continuation_query(continuation, ctp)

613

614

@classmethod

615

def _extract_continuation(cls, renderer):

616

next_continuation = cls._extract_next_continuation_data(renderer)

617

if next_continuation:

618

return next_continuation

619

620

contents = []

621

for key in ('contents', 'items'):

622

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

623

624

for content in contents:

625

if not isinstance(content, dict):

626

continue

627

continuation_ep = try_get(

628

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

629

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

630

dict)

631

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

637

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

638

if not isinstance(alert_dict, dict):

639

continue

640

for alert in alert_dict.values():

641

alert_type = alert.get('type')

642

if not alert_type:

643

continue

644

message = cls._get_text(alert, 'text')

645

if message:

646

yield alert_type, message

647

648

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

649

errors = []

650

warnings = []

651

for alert_type, alert_message in alerts:

652

if alert_type.lower() == 'error' and fatal:

653

errors.append([alert_type, alert_message])

654

else:

655

warnings.append([alert_type, alert_message])

656

657

for alert_type, alert_message in (warnings + errors[:-1]):

658

self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)

659

if errors:

660

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

661

662

def _extract_and_report_alerts(self, data, *args, **kwargs):

663

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

664

665

def _extract_badges(self, renderer: dict):

666

badges = set()

667

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

668

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)

669

if label:

670

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

675

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

680

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

681

obj = [obj]

682

for item in obj:

683

text = try_get(item, lambda x: x['simpleText'], str)

684

if text:

685

return text

686

runs = try_get(item, lambda x: x['runs'], list) or []

687

if not runs and isinstance(item, list):

688

runs = item

689

690

runs = runs[:min(len(runs), max_runs or len(runs))]

691

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

696

count_text = self._get_text(data, *path_list) or ''

697

count = parse_count(count_text)

698

if count is None:

699

count = str_to_int(

700

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

705

"""

706

Extract thumbnails from thumbnails dict

707

@param path_list: path list to level that contains 'thumbnails' key

708

"""

709

thumbnails = []

710

for path in path_list or [()]:

711

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

712

thumbnail_url = url_or_none(thumbnail.get('url'))

713

if not thumbnail_url:

714

continue

715

# Sometimes youtube gives a wrong thumbnail URL. See:

716

# https://github.com/yt-dlp/yt-dlp/issues/233

717

# https://github.com/ytdl-org/youtube-dl/issues/28023

718

if 'maxresdefault' in thumbnail_url:

719

thumbnail_url = thumbnail_url.split('?')[0]

720

thumbnails.append({

721

'url': thumbnail_url,

722

'height': int_or_none(thumbnail.get('height')),

723

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

729

"""

730

Extracts a relative time from string and converts to dt object

731

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

736

if start:

737

return datetime_from_str(start)

738

try:

739

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

744

"""@returns (timestamp, time_text)"""

745

text = self._get_text(renderer, *path_list) or ''

746

dt = self.extract_relative_time(text)

747

timestamp = None

748

if isinstance(dt, datetime.datetime):

749

timestamp = calendar.timegm(dt.timetuple())

750

751

if timestamp is None:

752

timestamp = (

753

unified_timestamp(text) or unified_timestamp(

754

self._search_regex(

755

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

756

text.lower(), 'time text', default=None)))

757

758

if text and timestamp is None:

759

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

760

return timestamp, text

761

762

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

763

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

764

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

769

if check_get_keys is None:

770

check_get_keys = []

771

while count < retries:

772

count += 1

773

if last_error:

774

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

775

try:

776

response = self._call_api(

777

ep=ep, fatal=True, headers=headers,

778

video_id=item_id, query=query,

779

context=self._extract_context(ytcfg, default_client),

780

api_key=self._extract_api_key(ytcfg, default_client),

781

api_hostname=api_hostname, default_client=default_client,

782

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

783

except ExtractorError as e:

784

if isinstance(e.cause, network_exceptions):

785

if isinstance(e.cause, urllib.error.HTTPError):

786

first_bytes = e.cause.read(512)

787

if not is_html(first_bytes):

788

yt_error = try_get(

789

self._parse_json(

790

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

791

lambda x: x['error']['message'], str)

792

if yt_error:

793

self._report_alerts([('ERROR', yt_error)], fatal=False)

794

# Downloading page may result in intermittent 5xx HTTP error

795

# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289

796

# We also want to catch all other network exceptions since errors in later pages can be troublesome

797

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

798

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

799

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

811

except ExtractorError as e:

812

# YouTube servers may return errors we want to retry on in a 200 OK response

813

# See: https://github.com/yt-dlp/yt-dlp/issues/839

814

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

820

return

821

if not check_get_keys or dict_get(response, check_get_keys):

822

break

823

# Youtube sometimes sends incomplete data

824

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

825

last_error = 'Incomplete data received'

826

if count >= retries:

827

if fatal:

828

raise ExtractorError(last_error)

829

else:

830

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

836

return re.match(r'https?://music\.youtube\.com/', url) is not None

837

838

def _extract_video(self, renderer):

839

video_id = renderer.get('videoId')

840

title = self._get_text(renderer, 'title')

841

description = self._get_text(renderer, 'descriptionSnippet')

842

duration = parse_duration(self._get_text(

843

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

844

if duration is None:

845

duration = parse_duration(self._search_regex(

846

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

847

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

848

video_id, default=None, group='duration'))

849

850

view_count = self._get_count(renderer, 'viewCountText')

851

852

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

853

channel_id = traverse_obj(

854

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

855

expected_type=str, get_all=False)

856

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

857

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

858

overlay_style = traverse_obj(

859

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

860

get_all=False, expected_type=str)

861

badges = self._extract_badges(renderer)

862

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

863

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

864

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

865

expected_type=str)) or ''

866

url = f'https://www.youtube.com/watch?v={video_id}'

867

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

868

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

877

'duration': duration,

878

'view_count': view_count,

879

'uploader': uploader,

880

'channel_id': channel_id,

881

'thumbnails': thumbnails,

882

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

883

if self._configuration_arg('approximate_date', ie_key='youtubetab')

884

else None),

885

'live_status': ('is_upcoming' if scheduled_timestamp is not None

886

else 'was_live' if 'streamed' in time_text.lower()

887

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

888

else None),

889

'release_timestamp': scheduled_timestamp,

890

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

895

IE_DESC = 'YouTube'

896

_VALID_URL = r"""(?x)^

897

(

898

(?:https?://|//) # http(s):// or protocol-independent URL

899

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

900

(?:www\.)?deturl\.com/www\.youtube\.com|

901

(?:www\.)?pwnyoutube\.com|

902

(?:www\.)?hooktube\.com|

903

(?:www\.)?yourepeat\.com|

904

tube\.majestyc\.net|

905

%(invidious)s|

906

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

907

(?:.*?\#/)? # handle anchor (#/) redirect urls

908

(?: # the various things that can precede the ID:

909

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

910

|(?: # or the v= param in all its forms

911

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

912

(?:\?|\#!?) # the params delimiter ? or # or #!

913

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

919

vid\.plus| # or vid.plus/xxxx

920

zwearz\.com/watch| # or zwearz.com/watch/xxxx

921

%(invidious)s

922

)/

923

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

924

)

925

)? # all until now is optional -> you can pass the naked ID

926

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

927

(?(1).+)? # if we found the ID, everything can follow

928

(?:\#|$)""" % {

929

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

930

}

931

_PLAYER_INFO_RE = (

932

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

933

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

934

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

935

)

936

_formats = {

937

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

938

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

939

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

940

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

941

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

942

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

943

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

944

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

945

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

946

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

947

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

948

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

949

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

950

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

951

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

952

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

953

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

954

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

959

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

960

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

961

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

962

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

963

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

964

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

965

966

# Apple HTTP Live Streaming

967

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

968

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

969

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

970

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

971

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

972

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

973

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

974

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

975

976

# DASH mp4 video

977

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

978

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

979

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

980

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

981

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

982

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

983

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

984

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

985

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

986

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

987

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

988

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

989

990

# Dash mp4 audio

991

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

992

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

993

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

994

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

995

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

996

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

997

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

998

999

# Dash webm

1000

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1001

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1002

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1003

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1004

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1005

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

1006

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

1007

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1008

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1009

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1010

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1011

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1012

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1013

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1014

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1015

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1016

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1017

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1018

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1019

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1020

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1021

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1022

1023

# Dash webm audio

1024

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1025

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1026

1027

# Dash webm audio with opus inside

1028

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1029

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1030

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1031

1032

# RTMP (unnamed)

1033

'_rtmp': {'protocol': 'rtmp'},

1034

1035

# av01 video only formats sometimes served with "unknown" codecs

1036

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1037

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1038

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1039

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1040

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1041

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1042

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1043

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1044

}

1045

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1057

'uploader': 'Philipp Hagemeister',

1058

'uploader_id': 'phihag',

1059

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1060

'channel': 'Philipp Hagemeister',

1061

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1062

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1063

'upload_date': '20121002',

1064

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1065

'categories': ['Science & Technology'],

1066

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1071

'playable_in_embed': True,

1072

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1073

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1082

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1087

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1088

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1089

'uploader': 'SET India',

1090

'uploader_id': 'setindia',

1091

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1092

'age_limit': 18,

1093

},

1094

'skip': 'Private video',

1095

},

1096

{

1097

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1098

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1103

'uploader': 'Philipp Hagemeister',

1104

'uploader_id': 'phihag',

1105

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1106

'channel': 'Philipp Hagemeister',

1107

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1108

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1109

'upload_date': '20121002',

1110

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1111

'categories': ['Science & Technology'],

1112

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1117

'playable_in_embed': True,

1118

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1119

'live_status': 'not_live',

1120

'age_limit': 0,

1121

'channel_follower_count': int

1122

},

1123

'params': {

1124

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1129

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1134

'uploader_id': '8KVIDEO',

1135

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1136

'description': '',

1137

'uploader': '8KVIDEO',

1138

'title': 'UHDTV TEST 8K VIDEO.mp4'

1139

},

1140

'params': {

1141

'youtube_include_dash_manifest': True,

1142

'format': '141',

1143

},

1144

'skip': 'format 141 not served anymore',

1145

},

1146

# DASH manifest with encrypted signature

1147

{

1148

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1153

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1154

'duration': 244,

1155

'uploader': 'AfrojackVEVO',

1156

'uploader_id': 'AfrojackVEVO',

1157

'upload_date': '20131011',

1158

'abr': 129.495,

1159

'like_count': int,

1160

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1161

'playable_in_embed': True,

1162

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1163

'view_count': int,

1164

'track': 'The Spark',

1165

'live_status': 'not_live',

1166

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1167

'channel': 'Afrojack',

1168

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1169

'tags': 'count:19',

1170

'availability': 'public',

1171

'categories': ['Music'],

1172

'age_limit': 0,

1173

'alt_title': 'The Spark',

1174

'channel_follower_count': int

1175

},

1176

'params': {

1177

'youtube_include_dash_manifest': True,

1178

'format': '141/bestaudio[ext=m4a]',

1179

},

1180

},

1181

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1182

{

1183

'note': 'Embed allowed age-gate video',

1184

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1189

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1190

'duration': 142,

1191

'uploader': 'The Witcher',

1192

'uploader_id': 'WitcherGame',

1193

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1194

'upload_date': '20140605',

1195

'age_limit': 18,

1196

'categories': ['Gaming'],

1197

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1198

'availability': 'needs_auth',

1199

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1200

'like_count': int,

1201

'channel': 'The Witcher',

1202

'live_status': 'not_live',

1203

'tags': 'count:17',

1204

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1205

'playable_in_embed': True,

1206

'view_count': int,

1207

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1212

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1217

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1218

'upload_date': '20200408',

1219

'uploader_id': 'FlyingKitty900',

1220

'uploader': 'FlyingKitty',

1221

'age_limit': 18,

1222

'availability': 'needs_auth',

1223

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1224

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1225

'channel': 'FlyingKitty',

1226

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1227

'view_count': int,

1228

'categories': ['Entertainment'],

1229

'live_status': 'not_live',

1230

'tags': ['Flyingkitty', 'godzilla 2'],

1231

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1232

'like_count': int,

1233

'duration': 177,

1234

'playable_in_embed': True,

1235

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1240

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1241

'info_dict': {

1242

'id': 'Tq92D6wQ1mg',

1243

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1244

'ext': 'mp4',

1245

'upload_date': '20191228',

1246

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1247

'uploader': 'Projekt Melody',

1248

'description': 'md5:17eccca93a786d51bc67646756894066',

1249

'age_limit': 18,

1250

'like_count': int,

1251

'availability': 'needs_auth',

1252

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1253

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1254

'view_count': int,

1255

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1256

'channel': 'Projekt Melody',

1257

'live_status': 'not_live',

1258

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1259

'playable_in_embed': True,

1260

'categories': ['Entertainment'],

1261

'duration': 106,

1262

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1263

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1268

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1273

'uploader': 'Herr Lurik',

1274

'uploader_id': 'st3in234',

1275

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1276

'upload_date': '20130730',

1277

'track': 'Such mich find mich',

1278

'age_limit': 0,

1279

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1280

'like_count': int,

1281

'playable_in_embed': False,

1282

'creator': 'OOMPH!',

1283

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1284

'view_count': int,

1285

'alt_title': 'Such mich find mich',

1286

'duration': 210,

1287

'channel': 'Herr Lurik',

1288

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1289

'categories': ['Music'],

1290

'availability': 'public',

1291

'uploader_url': 'http://www.youtube.com/user/st3in234',

1292

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1293

'live_status': 'not_live',

1294

'artist': 'OOMPH!',

1295

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1300

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1301

'only_matching': True,

1302

},

1303

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1304

# YouTube Red ad is not captured for creator

1305

{

1306

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1312

'uploader_id': 'deadmau5',

1313

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1314

'creator': 'deadmau5',

1315

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1316

'uploader': 'deadmau5',

1317

'title': 'Deadmau5 - Some Chords (HD)',

1318

'alt_title': 'Some Chords',

1319

'availability': 'public',

1320

'tags': 'count:14',

1321

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1322

'view_count': int,

1323

'live_status': 'not_live',

1324

'channel': 'deadmau5',

1325

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1326

'like_count': int,

1327

'track': 'Some Chords',

1328

'artist': 'deadmau5',

1329

'playable_in_embed': True,

1330

'age_limit': 0,

1331

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1332

'categories': ['Music'],

1333

'album': 'Some Chords',

1334

'channel_follower_count': int

1335

},

1336

'expected_warnings': [

1337

'DASH manifest missing',

1338

]

1339

},

1340

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1341

{

1342

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1348

'uploader_id': 'olympic',

1349

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1350

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1351

'uploader': 'Olympics',

1352

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1353

'like_count': int,

1354

'release_timestamp': 1343767800,

1355

'playable_in_embed': True,

1356

'categories': ['Sports'],

1357

'release_date': '20120731',

1358

'channel': 'Olympics',

1359

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1360

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1361

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1362

'age_limit': 0,

1363

'availability': 'public',

1364

'live_status': 'was_live',

1365

'view_count': int,

1366

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1367

'channel_follower_count': int

1368

},

1369

'params': {

1370

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1380

'duration': 85,

1381

'upload_date': '20110310',

1382

'uploader_id': 'AllenMeow',

1383

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1384

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1385

'uploader': '孫ᄋᄅ',

1386

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1387

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1392

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1393

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1394

'view_count': int,

1395

'categories': ['People & Blogs'],

1396

'like_count': int,

1397

'live_status': 'not_live',

1398

'availability': 'unlisted',

1399

'channel_follower_count': int

1400

},

1401

},

1402

# url_encoded_fmt_stream_map is empty string

1403

{

1404

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1409

'description': '',

1410

'upload_date': '20150404',

1411

'uploader_id': 'spbelect',

1412

'uploader': 'Наблюдатели Петербурга',

1413

},

1414

'params': {

1415

'skip_download': 'requires avconv',

1416

},

1417

'skip': 'This live event has ended.',

1418

},

1419

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1420

{

1421

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1426

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1427

'duration': 220,

1428

'upload_date': '20150625',

1429

'uploader_id': 'dorappi2000',

1430

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1431

'uploader': 'dorappi2000',

1432

'formats': 'mincount:31',

1433

},

1434

'skip': 'not actual anymore',

1435

},

1436

# DASH manifest with segment_list

1437

{

1438

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1439

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1444

'uploader': 'Airtek',

1445

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1446

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1447

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1448

},

1449

'params': {

1450

'youtube_include_dash_manifest': True,

1451

'format': '135', # bestvideo

1452

},

1453

'skip': 'This live event has ended.',

1454

},

1455

{

1456

# Multifeed videos (multiple cameras), URL is for Main Camera

1457

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1458

'info_dict': {

1459

'id': 'jvGDaLqkpTg',

1460

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1461

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1468

'description': 'md5:e03b909557865076822aa169218d6a5d',

1469

'duration': 10643,

1470

'upload_date': '20161111',

1471

'uploader': 'Team PGP',

1472

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1473

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1480

'description': 'md5:e03b909557865076822aa169218d6a5d',

1481

'duration': 10991,

1482

'upload_date': '20161111',

1483

'uploader': 'Team PGP',

1484

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1485

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1492

'description': 'md5:e03b909557865076822aa169218d6a5d',

1493

'duration': 10995,

1494

'upload_date': '20161111',

1495

'uploader': 'Team PGP',

1496

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1497

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1504

'description': 'md5:e03b909557865076822aa169218d6a5d',

1505

'duration': 10990,

1506

'upload_date': '20161111',

1507

'uploader': 'Team PGP',

1508

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1509

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1514

},

1515

'skip': 'Not multifeed anymore',

1516

},

1517

{

1518

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1519

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1520

'info_dict': {

1521

'id': 'gVfLd0zydlo',

1522

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1523

},

1524

'playlist_count': 2,

1525

'skip': 'Not multifeed anymore',

1526

},

1527

{

1528

'url': 'https://vid.plus/FlRa-iH7PGw',

1529

'only_matching': True,

1530

},

1531

{

1532

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1533

'only_matching': True,

1534

},

1535

{

1536

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1537

# Also tests cut-off URL expansion in video description (see

1538

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1539

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1540

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1545

'alt_title': 'Dark Walk',

1546

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1547

'duration': 133,

1548

'upload_date': '20151119',

1549

'uploader_id': 'IronSoulElf',

1550

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1551

'uploader': 'IronSoulElf',

1552

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1553

'track': 'Dark Walk',

1554

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1555

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1556

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1557

'categories': ['Film & Animation'],

1558

'view_count': int,

1559

'live_status': 'not_live',

1560

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1561

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1562

'tags': 'count:13',

1563

'availability': 'public',

1564

'channel': 'IronSoulElf',

1565

'playable_in_embed': True,

1566

'like_count': int,

1567

'age_limit': 0,

1568

'channel_follower_count': int

1569

},

1570

'params': {

1571

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1576

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1577

'only_matching': True,

1578

},

1579

{

1580

# Video with yt:stretch=17:0

1581

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1586

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1587

'upload_date': '20151107',

1588

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1589

'uploader': 'CH GAMER DROID',

1590

},

1591

'params': {

1592

'skip_download': True,

1593

},

1594

'skip': 'This video does not exist.',

1595

},

1596

{

1597

# Video with incomplete 'yt:stretch=16:'

1598

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1599

'only_matching': True,

1600

},

1601

{

1602

# Video licensed under Creative Commons

1603

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1608

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1609

'duration': 721,

1610

'upload_date': '20150128',

1611

'uploader_id': 'BerkmanCenter',

1612

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1613

'uploader': 'The Berkman Klein Center for Internet & Society',

1614

'license': 'Creative Commons Attribution license (reuse allowed)',

1615

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1616

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1617

'like_count': int,

1618

'age_limit': 0,

1619

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1620

'channel': 'The Berkman Klein Center for Internet & Society',

1621

'availability': 'public',

1622

'view_count': int,

1623

'categories': ['Education'],

1624

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1625

'live_status': 'not_live',

1626

'playable_in_embed': True,

1627

'channel_follower_count': int

1628

},

1629

'params': {

1630

'skip_download': True,

},

},

{

# Channel-like uploader_url

1635

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1640

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1641

'duration': 4060,

1642

'upload_date': '20151120',

1643

'uploader': 'Bernie Sanders',

1644

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1645

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1646

'license': 'Creative Commons Attribution license (reuse allowed)',

1647

'playable_in_embed': True,

1648

'tags': 'count:12',

1649

'like_count': int,

1650

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1651

'age_limit': 0,

1652

'availability': 'public',

1653

'categories': ['News & Politics'],

1654

'channel': 'Bernie Sanders',

1655

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1656

'view_count': int,

1657

'live_status': 'not_live',

1658

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1659

'channel_follower_count': int

1660

},

1661

'params': {

1662

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1667

'only_matching': True,

1668

},

1669

{

1670

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1671

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1672

'only_matching': True,

1673

},

1674

{

1675

# Rental video preview

1676

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1681

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1682

'upload_date': '20150811',

1683

'uploader': 'FlixMatrix',

1684

'uploader_id': 'FlixMatrixKaravan',

1685

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1686

'license': 'Standard YouTube License',

1687

},

1688

'params': {

1689

'skip_download': True,

1690

},

1691

'skip': 'This video is not available.',

1692

},

1693

{

1694

# YouTube Red video with episode data

1695

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1700

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1701

'duration': 2085,

1702

'upload_date': '20170118',

1703

'uploader': 'Vsauce',

1704

'uploader_id': 'Vsauce',

1705

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1706

'series': 'Mind Field',

1707

'season_number': 1,

1708

'episode_number': 1,

1709

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1710

'tags': 'count:12',

1711

'view_count': int,

1712

'availability': 'public',

1713

'age_limit': 0,

1714

'channel': 'Vsauce',

1715

'episode': 'Episode 1',

1716

'categories': ['Entertainment'],

1717

'season': 'Season 1',

1718

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1719

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1720

'like_count': int,

1721

'playable_in_embed': True,

1722

'live_status': 'not_live',

1723

'channel_follower_count': int

1724

},

1725

'params': {

1726

'skip_download': True,

1727

},

1728

'expected_warnings': [

1729

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1734

# as inappropriate or offensive to some audiences.

1735

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1740

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1741

'duration': 965,

1742

'upload_date': '20140124',

1743

'uploader': 'New Century Foundation',

1744

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1745

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1746

},

1747

'params': {

1748

'skip_download': True,

1749

},

1750

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1755

'only_matching': True,

1756

},

1757

{

1758

# geo restricted to JP

1759

'url': 'sJL6WA-aGkQ',

1760

'only_matching': True,

1761

},

1762

{

1763

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1764

'only_matching': True,

1765

},

1766

{

1767

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1768

'only_matching': True,

1769

},

1770

{

1771

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1772

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1773

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1778

'only_matching': True,

1779

},

1780

{

1781

# Video with unsupported adaptive stream type formats

1782

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1787

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1788

'duration': 433,

1789

'upload_date': '20130923',

1790

'uploader': 'Amelia Putri Harwita',

1791

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1792

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1793

'formats': 'maxcount:10',

1794

},

1795

'params': {

1796

'skip_download': True,

1797

'youtube_include_dash_manifest': False,

1798

},

1799

'skip': 'not actual anymore',

1800

},

1801

{

1802

# Youtube Music Auto-generated description

1803

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1808

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1809

'upload_date': '20190312',

1810

'uploader': 'Stephen - Topic',

1811

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1812

'artist': 'Stephen',

1813

'track': 'Voyeur Girl',

1814

'album': 'it\'s too much love to know my dear',

1815

'release_date': '20190313',

1816

'release_year': 2019,

1817

'alt_title': 'Voyeur Girl',

1818

'view_count': int,

1819

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1820

'playable_in_embed': True,

1821

'like_count': int,

1822

'categories': ['Music'],

1823

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1824

'channel': 'Stephen',

1825

'availability': 'public',

1826

'creator': 'Stephen',

1827

'duration': 169,

1828

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1829

'age_limit': 0,

1830

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1831

'tags': 'count:11',

1832

'live_status': 'not_live',

1833

'channel_follower_count': int

1834

},

1835

'params': {

1836

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1841

'only_matching': True,

1842

},

1843

{

1844

# invalid -> valid video id redirection

1845

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1850

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1851

'upload_date': '20090125',

1852

'uploader': 'Prochorowka',

1853

'uploader_id': 'Prochorowka',

1854

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1855

'artist': 'Panjabi MC',

1856

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1857

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1858

},

1859

'params': {

1860

'skip_download': True,

1861

},

1862

'skip': 'Video unavailable',

1863

},

1864

{

1865

# empty description results in an empty string

1866

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1873

'uploader_id': 'ElevageOrVert',

1874

'uploader': 'ElevageOrVert',

1875

'view_count': int,

1876

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1877

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1878

'like_count': int,

1879

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1880

'tags': [],

1881

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1882

'availability': 'public',

1883

'age_limit': 0,

1884

'categories': ['Pets & Animals'],

1885

'duration': 7,

1886

'playable_in_embed': True,

1887

'live_status': 'not_live',

1888

'channel': 'ElevageOrVert',

1889

'channel_follower_count': int

1890

},

1891

'params': {

1892

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1897

# see [2] for an example with '};' inside ytInitialPlayerResponse

1898

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1899

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1900

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1905

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1906

'upload_date': '20130831',

1907

'uploader_id': 'kudvenkat',

1908

'uploader': 'kudvenkat',

1909

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1910

'like_count': int,

1911

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1912

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1913

'live_status': 'not_live',

1914

'categories': ['Education'],

1915

'availability': 'public',

1916

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1917

'tags': 'count:12',

1918

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1923

'channel_follower_count': int

1924

},

1925

'params': {

1926

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1931

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1932

'only_matching': True,

1933

},

1934

{

1935

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1936

'only_matching': True,

1937

},

1938

{

1939

# https://github.com/ytdl-org/youtube-dl/pull/28094

1940

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1946

'upload_date': '20141120',

1947

'uploader': 'The Cinematic Orchestra - Topic',

1948

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1949

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1950

'artist': 'The Cinematic Orchestra',

1951

'track': 'Burn Out',

1952

'album': 'Every Day',

1953

'like_count': int,

1954

'live_status': 'not_live',

1955

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1960

'creator': 'The Cinematic Orchestra',

1961

'channel': 'The Cinematic Orchestra',

1962

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1963

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1964

'availability': 'public',

1965

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1966

'categories': ['Music'],

1967

'playable_in_embed': True,

1968

'channel_follower_count': int

1969

},

1970

'params': {

1971

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1976

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1977

'only_matching': True,

1978

},

1979

{

1980

# controversial video, requires bpctr/contentCheckOk

1981

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1986

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1987

'uploader': 'CBS Mornings',

1988

'uploader_id': 'CBSThisMorning',

1989

'upload_date': '20140716',

1990

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1991

'duration': 170,

1992

'categories': ['News & Politics'],

1993

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1994

'view_count': int,

1995

'channel': 'CBS Mornings',

1996

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1997

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1998

'age_limit': 18,

1999

'availability': 'needs_auth',

2000

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

2001

'like_count': int,

2002

'live_status': 'not_live',

2003

'playable_in_embed': True,

2004

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

2009

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2014

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2015

'upload_date': '20201120',

2016

'uploader': 'Walk around Japan',

2017

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2018

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2019

'duration': 1456,

2020

'categories': ['Travel & Events'],

2021

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2022

'view_count': int,

2023

'channel': 'Walk around Japan',

2024

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2025

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2026

'age_limit': 0,

2027

'availability': 'public',

2028

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2029

'live_status': 'not_live',

2030

'playable_in_embed': True,

2031

'channel_follower_count': int

2032

},

2033

'params': {

2034

'skip_download': True,

2035

},

2036

}, {

2037

# Has multiple audio streams

2038

'url': 'WaOKSUlf4TM',

2039

'only_matching': True

2040

}, {

2041

# Requires Premium: has format 141 when requested using YTM url

2042

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2043

'only_matching': True

2044

}, {

2045

# multiple subtitles with same lang_code

2046

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2047

'only_matching': True,

2048

}, {

2049

# Force use android client fallback

2050

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2051

'info_dict': {

2052

'id': 'YOelRv7fMxY',

2053

'title': 'DIGGING A SECRET TUNNEL Part 1',

2054

'ext': '3gp',

2055

'upload_date': '20210624',

2056

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2057

'uploader': 'colinfurze',

2058

'uploader_id': 'colinfurze',

2059

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2060

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2061

'duration': 596,

2062

'categories': ['Entertainment'],

2063

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2064

'view_count': int,

2065

'channel': 'colinfurze',

2066

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2067

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2068

'age_limit': 0,

2069

'availability': 'public',

2070

'like_count': int,

2071

'live_status': 'not_live',

2072

'playable_in_embed': True,

2073

'channel_follower_count': int

2074

},

2075

'params': {

2076

'format': '17', # 3gp format available on android

2077

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2082

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2083

'only_matching': True,

2084

'params': {

2085

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2090

'only_matching': True,

2091

}, {

2092

'note': 'Storyboards',

2093

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2099

'uploader_id': 'scishow',

2100

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2101

'upload_date': '20140324',

2102

'uploader': 'SciShow',

2103

'like_count': int,

2104

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2105

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2106

'view_count': int,

2107

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2108

'playable_in_embed': True,

2109

'tags': 'count:12',

2110

'uploader_url': 'http://www.youtube.com/user/scishow',

2111

'availability': 'public',

2112

'channel': 'SciShow',

2113

'live_status': 'not_live',

2114

'duration': 248,

2115

'categories': ['Education'],

2116

'age_limit': 0,

2117

'channel_follower_count': int

2118

}, 'params': {'format': 'mhtml', 'skip_download': True}

2119

}, {

2120

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2121

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2126

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2127

'uploader': 'Leon Nguyen',

2128

'uploader_id': 'VNSXIII',

2129

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2130

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2131

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2136

'tags': 'count:23',

2137

'playable_in_embed': True,

2138

'live_status': 'not_live',

2139

'upload_date': '20220103',

2140

'like_count': int,

2141

'availability': 'public',

2142

'channel': 'Leon Nguyen',

2143

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2144

'channel_follower_count': int

2145

}

2146

}, {

2147

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2148

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2153

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2154

'uploader': 'Quackity',

2155

'uploader_id': 'QuackityHQ',

2156

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2157

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2158

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2163

'tags': 'count:26',

2164

'playable_in_embed': True,

2165

'live_status': 'not_live',

2166

'release_timestamp': 1641172509,

2167

'release_date': '20220103',

2168

'upload_date': '20220103',

2169

'like_count': int,

2170

'availability': 'public',

2171

'channel': 'Quackity',

2172

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2173

'channel_follower_count': int

2174

}

2175

},

2176

{ # continuous livestream. Microformat upload date should be preferred.

2177

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2178

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2179

'info_dict': {

2180

'id': 'kgx4WGK0oNU',

2181

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2182

'ext': 'mp4',

2183

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2184

'availability': 'public',

2185

'age_limit': 0,

2186

'release_timestamp': 1637975704,

2187

'upload_date': '20210619',

2188

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2189

'live_status': 'is_live',

2190

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2191

'uploader': '阿鲍Abao',

2192

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2193

'channel': 'Abao in Tokyo',

2194

'channel_follower_count': int,

2195

'release_date': '20211127',

2196

'tags': 'count:39',

2197

'categories': ['People & Blogs'],

2198

'like_count': int,

2199

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2200

'view_count': int,

2201

'playable_in_embed': True,

2202

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2203

},

2204

'params': {'skip_download': True}

2205

}, {

2206

# Story. Requires specific player params to work.

2207

# Note: stories get removed after some period of time

2208

'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',

'info_dict': {

'id': 'vv8qTUWmulI',

'ext': 'mp4',

'availability': 'unlisted',

2213

'view_count': int,

2214

'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',

2215

'upload_date': '20220526',

2216

'categories': ['Education'],

2217

'title': 'Story',

2218

'channel': 'IT\'S HISTORY',

2219

'description': '',

2220

'uploader_id': 'BlastfromthePast',

2221

'duration': 12,

2222

'uploader': 'IT\'S HISTORY',

2223

'playable_in_embed': True,

2224

'age_limit': 0,

2225

'live_status': 'not_live',

2226

'tags': [],

2227

'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',

2228

'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',

2229

'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',

2230

}

2231

}, {

2232

'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',

'info_dict': {

'id': 'tjjjtzRLHvA',

'ext': 'mp4',

'title': 'ハッシュタグ無し };if window.ytcsi',

2237

'upload_date': '20220323',

2238

'like_count': int,

2239

'availability': 'unlisted',

2240

'channel': 'nao20010128nao',

2241

'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',

2242

'age_limit': 0,

2243

'uploader': 'nao20010128nao',

2244

'uploader_id': 'nao20010128nao',

2245

'categories': ['Music'],

2246

'view_count': int,

2247

'description': '',

2248

'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',

2249

'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',

2250

'live_status': 'not_live',

2251

'playable_in_embed': True,

2252

'channel_follower_count': int,

2253

'duration': 6,

2254

'tags': [],

2255

'uploader_url': 'http://www.youtube.com/user/nao20010128nao',

}

}

]

@classmethod

def suitable(cls, url):

2262

from ..utils import parse_qs

2263

2264

qs = parse_qs(url)

2265

if qs.get('list', [None])[0]:

2266

return False

2267

return super().suitable(url)

2268

2269

def __init__(self, *args, **kwargs):

2270

super().__init__(*args, **kwargs)

2271

self._code_cache = {}

2272

self._player_cache = {}

2273

2274

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2275

lock = threading.Lock()

2276

2277

is_live = True

2278

start_time = time.time()

2279

formats = [f for f in formats if f.get('is_from_start')]

2280

2281

def refetch_manifest(format_id, delay):

2282

nonlocal formats, start_time, is_live

2283

if time.time() <= start_time + delay:

2284

return

2285

2286

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2287

video_details = traverse_obj(

2288

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2289

microformats = traverse_obj(

2290

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2291

expected_type=dict, default=[])

2292

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2293

start_time = time.time()

2294

2295

def mpd_feed(format_id, delay):

2296

"""

2297

@returns (manifest_url, manifest_stream_number, is_live) or None

2298

"""

2299

with lock:

2300

refetch_manifest(format_id, delay)

2301

2302

f = next((f for f in formats if f['format_id'] == format_id), None)

2303

if not f:

2304

if not is_live:

2305

self.to_screen(f'{video_id}: Video is no longer live')

2306

else:

2307

self.report_warning(

2308

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2309

return None

2310

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2315

f['fragments'] = functools.partial(

2316

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2317

2318

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2319

FETCH_SPAN, MAX_DURATION = 5, 432000

2320

2321

mpd_url, stream_number, is_live = None, None, True

2322

2323

begin_index = 0

2324

download_start_time = ctx.get('start') or time.time()

2325

2326

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2327

if lack_early_segments:

2328

self.report_warning(bug_reports_message(

2329

'Starting download from the last 120 hours of the live stream since '

2330

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2331

lack_early_segments = True

2332

2333

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2334

fragments, fragment_base_url = None, None

2335

2336

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2337

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2338

# Obtain from MPD's maximum seq value

2339

old_mpd_url = mpd_url

2340

last_error = ctx.pop('last_error', None)

2341

expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403

2342

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2343

or (mpd_url, stream_number, False))

2344

if not refresh_sequence:

2345

if expire_fast and not is_live:

2346

return False, last_seq

2347

elif old_mpd_url == mpd_url:

2348

return True, last_seq

2349

try:

2350

fmts, _ = self._extract_mpd_formats_and_subtitles(

2351

mpd_url, None, note=False, errnote=False, fatal=False)

2352

except ExtractorError:

2353

fmts = None

2354

if not fmts:

2355

no_fragment_score += 2

2356

return False, last_seq

2357

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2358

fragments = fmt_info['fragments']

2359

fragment_base_url = fmt_info['fragment_base_url']

2360

assert fragment_base_url

2361

2362

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2363

return True, _last_seq

2364

2365

while is_live:

2366

fetch_time = time.time()

2367

if no_fragment_score > 30:

2368

return

2369

if last_segment_url:

2370

# Obtain from "X-Head-Seqnum" header value from each segment

2371

try:

2372

urlh = self._request_webpage(

2373

last_segment_url, None, note=False, errnote=False, fatal=False)

2374

except ExtractorError:

2375

urlh = None

2376

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2377

if last_seq is None:

2378

no_fragment_score += 2

2379

last_segment_url = None

2380

continue

2381

else:

2382

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2383

no_fragment_score += 2

2384

if not should_continue:

2385

continue

2386

2387

if known_idx > last_seq:

2388

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2394

# skip from the start when it's negative value

2395

known_idx = last_seq + begin_index

2396

if lack_early_segments:

2397

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2398

try:

2399

for idx in range(known_idx, last_seq):

2400

# do not update sequence here or you'll get skipped some part of it

2401

should_continue, _ = _extract_sequence_from_mpd(False, False)

2402

if not should_continue:

2403

known_idx = idx - 1

2404

raise ExtractorError('breaking out of outer loop')

2405

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2406

yield {

2407

'url': last_segment_url,

2408

'fragment_count': last_seq,

2409

}

2410

if known_idx == last_seq:

2411

no_fragment_score += 5

2412

else:

2413

no_fragment_score = 0

2414

known_idx = last_seq

2415

except ExtractorError:

2416

continue

2417

2418

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2419

2420

def _extract_player_url(self, *ytcfgs, webpage=None):

2421

player_url = traverse_obj(

2422

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2423

get_all=False, expected_type=str)

2424

if not player_url:

2425

return

2426

return urljoin('https://www.youtube.com', player_url)

2427

2428

def _download_player_url(self, video_id, fatal=False):

2429

res = self._download_webpage(

2430

'https://www.youtube.com/iframe_api',

2431

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2432

if res:

2433

player_version = self._search_regex(

2434

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2435

if player_version:

2436

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2437

2438

def _signature_cache_id(self, example_sig):

2439

""" Return a string representation of a signature """

2440

return '.'.join(str(len(part)) for part in example_sig.split('.'))

2441

2442

@classmethod

2443

def _extract_player_info(cls, player_url):

2444

for player_re in cls._PLAYER_INFO_RE:

2445

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2450

return id_m.group('id')

2451

2452

def _load_player(self, video_id, player_url, fatal=True):

2453

player_id = self._extract_player_info(player_url)

2454

if player_id not in self._code_cache:

2455

code = self._download_webpage(

2456

player_url, video_id, fatal=fatal,

2457

note='Downloading player ' + player_id,

2458

errnote='Download of %s failed' % player_url)

2459

if code:

2460

self._code_cache[player_id] = code

2461

return self._code_cache.get(player_id)

2462

2463

def _extract_signature_function(self, video_id, player_url, example_sig):

2464

player_id = self._extract_player_info(player_url)

2465

2466

# Read from filesystem cache

2467

func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'

2468

assert os.path.basename(func_id) == func_id

2469

2470

self.write_debug(f'Extracting signature function {func_id}')

2471

cache_spec = self.cache.load('youtube-sigfuncs', func_id)

2472

if cache_spec is not None:

2473

return lambda s: ''.join(s[i] for i in cache_spec)

2474

2475

code = self._load_player(video_id, player_url)

2476

if code:

2477

res = self._parse_sig_js(code)

2478

2479

test_string = ''.join(map(chr, range(len(example_sig))))

2480

cache_res = res(test_string)

2481

cache_spec = [ord(c) for c in cache_res]

2482

2483

self.cache.store('youtube-sigfuncs', func_id, cache_spec)

2484

return res

2485

2486

def _print_sig_code(self, func, example_sig):

2487

if not self.get_param('youtube_print_sig_code'):

2488

return

2489

2490

def gen_sig_code(idxs):

2491

def _genslice(start, end, step):

2492

starts = '' if start == 0 else str(start)

2493

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2494

steps = '' if step == 1 else (':%d' % step)

2495

return f's[{starts}{ends}{steps}]'

2496

2497

step = None

2498

# Quelch pyflakes warnings - start will be set when step is set

2499

start = '(Never used)'

2500

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2505

step = None

2506

continue

2507

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2517

2518

test_string = ''.join(map(chr, range(len(example_sig))))

2519

cache_res = func(test_string)

2520

cache_spec = [ord(c) for c in cache_res]

2521

expr_code = ' + '.join(gen_sig_code(cache_spec))

2522

signature_id_tuple = '(%s)' % (

2523

', '.join(str(len(p)) for p in example_sig.split('.')))

2524

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2525

' return %s\n') % (signature_id_tuple, expr_code)

2526

self.to_screen('Extracted signature function:\n' + code)

2527

2528

def _parse_sig_js(self, jscode):

2529

funcname = self._search_regex(

2530

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2531

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2532

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2533

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2534

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2535

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2536

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2537

# Obsolete patterns

2538

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2539

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2540

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2541

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2542

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2543

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2544

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2545

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2546

jscode, 'Initial JS player signature function name', group='sig')

2547

2548

jsi = JSInterpreter(jscode)

2549

initial_function = jsi.extract_function(funcname)

2550

return lambda s: initial_function([s])

2551

2552

def _decrypt_signature(self, s, video_id, player_url):

2553

"""Turn the encrypted s field into a working signature"""

2554

try:

2555

player_id = (player_url, self._signature_cache_id(s))

2556

if player_id not in self._player_cache:

2557

func = self._extract_signature_function(video_id, player_url, s)

2558

self._player_cache[player_id] = func

2559

func = self._player_cache[player_id]

2560

self._print_sig_code(func, s)

2561

return func(s)

2562

except Exception as e:

2563

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2564

2565

def _decrypt_nsig(self, s, video_id, player_url):

2566

"""Turn the encrypted n field into a working signature"""

2567

if player_url is None:

2568

raise ExtractorError('Cannot decrypt nsig without player_url')

2569

player_url = urljoin('https://www.youtube.com', player_url)

2570

2571

sig_id = ('nsig_value', s)

2572

if sig_id in self._player_cache:

2573

return self._player_cache[sig_id]

2574

2575

try:

2576

player_id = ('nsig', player_url)

2577

if player_id not in self._player_cache:

2578

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2579

func = self._player_cache[player_id]

2580

self._player_cache[sig_id] = func(s)

2581

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2582

return self._player_cache[sig_id]

2583

except Exception as e:

2584

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2585

2586

def _extract_n_function_name(self, jscode):

2587

nfunc, idx = self._search_regex(

2588

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2589

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2590

if not idx:

2591

return nfunc

2592

return json.loads(js_to_json(self._search_regex(

2593

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2594

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2595

2596

def _extract_n_function(self, video_id, player_url):

2597

player_id = self._extract_player_info(player_url)

2598

func_code = self.cache.load('youtube-nsig', player_id)

2599

2600

if func_code:

2601

jsi = JSInterpreter(func_code)

2602

else:

2603

jscode = self._load_player(video_id, player_url)

2604

funcname = self._extract_n_function_name(jscode)

2605

jsi = JSInterpreter(jscode)

2606

func_code = jsi.extract_function_code(funcname)

2607

self.cache.store('youtube-nsig', player_id, func_code)

2608

2609

if self.get_param('youtube_print_sig_code'):

2610

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2611

2612

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2613

2614

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2615

"""

2616

Extract signatureTimestamp (sts)

2617

Required to tell API what sig/player version is in use.

2618

"""

2619

sts = None

2620

if isinstance(ytcfg, dict):

2621

sts = int_or_none(ytcfg.get('STS'))

2622

2623

if not sts:

2624

# Attempt to extract from player

2625

if player_url is None:

2626

error_msg = 'Cannot extract signature timestamp without player_url.'

2627

if fatal:

2628

raise ExtractorError(error_msg)

2629

self.report_warning(error_msg)

2630

return

2631

code = self._load_player(video_id, player_url, fatal=fatal)

2632

if code:

2633

sts = int_or_none(self._search_regex(

2634

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2635

'JS player signature timestamp', group='sts', fatal=fatal))

2636

return sts

2637

2638

def _mark_watched(self, video_id, player_responses):

2639

for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):

2640

label = 'fully ' if is_full else ''

2641

url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),

2642

expected_type=url_or_none)

2643

if not url:

2644

self.report_warning(f'Unable to mark {label}watched')

2645

return

2646

parsed_url = urllib.parse.urlparse(url)

2647

qs = urllib.parse.parse_qs(parsed_url.query)

2648

2649

# cpn generation algorithm is reverse engineered from base.js.

2650

# In fact it works even with dummy cpn.

2651

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2652

cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))

2653

2654

# # more consistent results setting it to right before the end

2655

video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]

qs.update({

'ver': ['2'],

'cpn': [cpn],

'cmt': video_length,

'el': 'detailpage', # otherwise defaults to "shorts"

})

if is_full:

# these seem to mark watchtime "history" in the real world

2666

# they're required, so send in a single value

qs.update({

'st': video_length,

'et': video_length,

})

url = urllib.parse.urlunparse(

2673

parsed_url._replace(query=urllib.parse.urlencode(qs, True)))

2674

2675

self._download_webpage(

2676

url, video_id, f'Marking {label}watched',

2677

'Unable to mark watched', fatal=False)

2678

2679

@staticmethod

2680

def _extract_urls(webpage):

2681

# Embedded YouTube player

2682

entries = [

2683

unescapeHTML(mobj.group('url'))

2684

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2695

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2696

\1''', webpage)]

2697

2698

# lazyYT YouTube embed

2699

entries.extend(list(map(

2700

unescapeHTML,

2701

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2702

2703

# Wordpress "YouTube Video Importer" plugin

2704

matches = re.findall(r'''(?x)<div[^>]+

2705

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2706

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2707

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2713

urls = YoutubeIE._extract_urls(webpage)

2714

return urls[0] if urls else None

2715

2716

@classmethod

2717

def extract_id(cls, url):

2718

video_id = cls.get_temp_id(url)

2719

if not video_id:

2720

raise ExtractorError(f'Invalid URL: {url}')

2721

return video_id

2722

2723

def _extract_chapters_from_json(self, data, duration):

2724

chapter_list = traverse_obj(

2725

data, (

2726

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2727

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2728

), expected_type=list)

2729

2730

return self._extract_chapters(

2731

chapter_list,

2732

chapter_time=lambda chapter: float_or_none(

2733

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2734

chapter_title=lambda chapter: traverse_obj(

2735

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2736

duration=duration)

2737

2738

def _extract_chapters_from_engagement_panel(self, data, duration):

2739

content_list = traverse_obj(

2740

data,

2741

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2742

expected_type=list, default=[])

2743

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2744

chapter_title = lambda chapter: self._get_text(chapter, 'title')

2745

2746

return next(filter(None, (

2747

self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2748

chapter_time, chapter_title, duration)

2749

for contents in content_list)), [])

2750

2751

def _extract_chapters_from_description(self, description, duration):

2752

return self._extract_chapters(

2753

re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),

2754

chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],

2755

duration=duration, strict=False)

2756

2757

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):

if not duration:

return

chapter_list = [{

'start_time': chapter_time(chapter),

2762

'title': chapter_title(chapter),

2763

} for chapter in chapter_list or []]

2764

if not strict:

2765

chapter_list.sort(key=lambda c: c['start_time'] or 0)

2766

2767

chapters = [{'start_time': 0}]

2768

for idx, chapter in enumerate(chapter_list):

2769

if chapter['start_time'] is None:

2770

self.report_warning(f'Incomplete chapter {idx}')

2771

elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:

2772

chapters.append(chapter)

2773

else:

2774

self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')

2775

return chapters[1:]

2776

2777

def _extract_comment(self, comment_renderer, parent=None):

2778

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2783

2784

# note: timestamp is an estimate calculated from the current time and time_text

2785

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2786

author = self._get_text(comment_renderer, 'authorText')

2787

author_id = try_get(comment_renderer,

2788

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)

2789

2790

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2791

lambda x: x['likeCount']), str)) or 0

2792

author_thumbnail = try_get(comment_renderer,

2793

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)

2794

2795

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2796

is_favorited = 'creatorHeart' in (try_get(

2797

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2802

'time_text': time_text,

2803

'like_count': votes,

2804

'is_favorited': is_favorited,

2805

'author': author,

2806

'author_id': author_id,

2807

'author_thumbnail': author_thumbnail,

2808

'author_is_uploader': author_is_uploader,

2809

'parent': parent or 'root'

2810

}

2811

2812

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2813

2814

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2815

2816

def extract_header(contents):

2817

_continuation = None

2818

for content in contents:

2819

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2820

expected_comment_count = self._get_count(

2821

comments_header_renderer, 'countText', 'commentsCount')

2822

2823

if expected_comment_count:

2824

tracker['est_total'] = expected_comment_count

2825

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2826

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2827

2828

sort_menu_item = try_get(

2829

comments_header_renderer,

2830

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2831

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2832

2833

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2834

if not _continuation:

2835

continue

2836

2837

sort_text = str_or_none(sort_menu_item.get('title'))

2838

if not sort_text:

2839

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2840

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2845

if not parent:

2846

tracker['current_page_thread'] = 0

2847

for content in contents:

2848

if not parent and tracker['total_parent_comments'] >= max_parents:

2849

yield

2850

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2851

comment_renderer = get_first(

2852

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2853

expected_type=dict, default={})

2854

2855

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2860

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2861

yield comment

2862

2863

# Attempt to get the replies

2864

comment_replies_renderer = try_get(

2865

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2866

2867

if comment_replies_renderer:

2868

tracker['current_page_thread'] += 1

2869

comment_entries_iter = self._comment_entries(

2870

comment_replies_renderer, ytcfg, video_id,

2871

parent=comment.get('id'), tracker=tracker)

2872

yield from itertools.islice(comment_entries_iter, min(

2873

max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))

2874

2875

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2881

total_parent_comments=0,

2882

total_reply_comments=0)

2883

2884

# TODO: Deprecated

2885

# YouTube comments have a max depth of 2

2886

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2887

if max_depth:

2888

self._downloader.deprecation_warning(

2889

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2890

if max_depth == 1 and parent:

2891

return

2892

2893

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2894

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2895

2896

continuation = self._extract_continuation(root_continuation_data)

2897

2898

response = None

2899

is_forced_continuation = False

2900

is_first_continuation = parent is None

2901

if is_first_continuation and not continuation:

2902

# Sometimes you can get comments by generating the continuation yourself,

2903

# even if YouTube initially reports them being disabled - e.g. stories comments.

2904

# Note: if the comment section is actually disabled, YouTube may return a response with

2905

# required check_get_keys missing. So we will disable that check initially in this case.

2906

continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))

2907

is_forced_continuation = True

2908

2909

for page_num in itertools.count(0):

2910

if not continuation:

2911

break

2912

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2913

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2914

if page_num == 0:

2915

if is_first_continuation:

2916

note_prefix = 'Downloading comment section API JSON'

2917

else:

2918

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2919

tracker['current_page_thread'], comment_prog_str)

2920

else:

2921

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2922

' ' if parent else '', ' replies' if parent else '',

2923

page_num, comment_prog_str)

2924

2925

response = self._extract_response(

2926

item_id=None, query=continuation,

2927

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2928

check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)

2929

is_forced_continuation = False

2930

continuation_contents = traverse_obj(

2931

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2932

2933

continuation = None

2934

for continuation_section in continuation_contents:

2935

continuation_items = traverse_obj(

2936

continuation_section,

2937

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2938

get_all=False, expected_type=list) or []

2939

if is_first_continuation:

2940

continuation = extract_header(continuation_items)

2941

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2955

if message and not parent and tracker['running_total'] == 0:

2956

self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)

2957

2958

@staticmethod

2959

def _generate_comment_continuation(video_id):

2960

"""

2961

Generates initial comment section continuation token from given video id

2962

"""

2963

token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'

2964

return base64.b64encode(token.encode()).decode()

2965

2966

def _get_comments(self, ytcfg, video_id, contents, webpage):

2967

"""Entry for comment extraction"""

2968

def _real_comment_extract(contents):

2969

renderer = next((

2970

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2971

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2972

yield from self._comment_entries(renderer, ytcfg, video_id)

2973

2974

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2975

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2976

2977

@staticmethod

2978

def _get_checkok_params():

2979

return {'contentCheckOk': True, 'racyCheckOk': True}

2980

2981

@classmethod

2982

def _generate_player_context(cls, sts=None):

2983

context = {

2984

'html5Preference': 'HTML5_PREF_WANTS',

2985

}

2986

if sts is not None:

2987

context['signatureTimestamp'] = sts

2988

return {

2989

'playbackContext': {

2990

'contentPlaybackContext': context

2991

},

2992

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2997

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2998

return True

2999

3000

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

3001

AGE_GATE_REASONS = (

3002

'confirm your age', 'age-restricted', 'inappropriate', # reason

3003

'age_verification_required', 'age_check_required', # status

3004

)

3005

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

3006

3007

@staticmethod

3008

def _is_unplayable(player_response):

3009

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

3010

3011

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

3012

3013

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

3014

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

3015

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

3016

headers = self.generate_api_headers(

3017

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

yt_query = {

'videoId': video_id,

'params': '8AEB' # enable stories

3022

}

3023

yt_query.update(self._generate_player_context(sts))

3024

return self._extract_response(

3025

item_id=video_id, ep='player', query=yt_query,

3026

ytcfg=player_ytcfg, headers=headers, fatal=True,

3027

default_client=client,

3028

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

3029

) or None

3030

3031

def _get_requested_clients(self, url, smuggled_data):

3032

requested_clients = []

3033

default = ['android', 'web']

3034

allowed_clients = sorted(

3035

(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),

3036

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

3037

for client in self._configuration_arg('player_client'):

3038

if client in allowed_clients:

3039

requested_clients.append(client)

3040

elif client == 'default':

3041

requested_clients.extend(default)

3042

elif client == 'all':

3043

requested_clients.extend(allowed_clients)

3044

else:

3045

self.report_warning(f'Skipping unsupported client {client}')

3046

if not requested_clients:

3047

requested_clients = default

3048

3049

if smuggled_data.get('is_music_url') or self.is_music_url(url):

3050

requested_clients.extend(

3051

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

3052

3053

return orderedSet(requested_clients)

3054

3055

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

3056

initial_pr = None

3057

if webpage:

3058

initial_pr = self._search_json(

3059

self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)

3060

3061

all_clients = set(clients)

3062

clients = clients[::-1]

3063

prs = []

3064

3065

def append_client(*client_names):

3066

""" Append the first client name that exists but not already used """

3067

for client_name in client_names:

3068

actual_client = _split_innertube_client(client_name)[0]

3069

if actual_client in INNERTUBE_CLIENTS:

3070

if actual_client not in all_clients:

3071

clients.append(client_name)

3072

all_clients.add(actual_client)

3073

return

3074

3075

# Android player_response does not have microFormats which are needed for

3076

# extraction of some data. So we return the initial_pr with formats

3077

# stripped out even if not requested by the user

3078

# See: https://github.com/yt-dlp/yt-dlp/issues/501

3079

if initial_pr:

3080

pr = dict(initial_pr)

3081

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3086

player_url = None

3087

while clients:

3088

client, base_client, variant = _split_innertube_client(clients.pop())

3089

player_ytcfg = master_ytcfg if client == 'web' else {}

3090

if 'configs' not in self._configuration_arg('player_skip') and client != 'web':

3091

player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg

3092

3093

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3094

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3095

if 'js' in self._configuration_arg('player_skip'):

3096

require_js_player = False

3097

player_url = None

3098

3099

if not player_url and not tried_iframe_fallback and require_js_player:

3100

player_url = self._download_player_url(video_id)

3101

tried_iframe_fallback = True

3102

3103

try:

3104

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3105

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3106

except ExtractorError as e:

3107

if last_error:

3108

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3116

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3117

append_client(f'{base_client}_creator')

3118

elif self._is_agegated(pr):

3119

if variant == 'tv_embedded':

3120

append_client(f'{base_client}_embedded')

3121

elif not variant:

3122

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3128

return prs, player_url

3129

3130

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3131

itags, stream_ids = {}, []

3132

itag_qualities, res_qualities = {}, {}

3133

q = qualities([

3134

# Normally tiny is the smallest video-only formats. But

3135

# audio-only formats with unknown quality may get tagged as tiny

3136

'tiny',

3137

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3138

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3139

])

3140

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3141

3142

for fmt in streaming_formats:

3143

if fmt.get('targetDurationSec'):

3144

continue

3145

3146

itag = str_or_none(fmt.get('itag'))

3147

audio_track = fmt.get('audioTrack') or {}

3148

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3149

if stream_id in stream_ids:

3150

continue

3151

3152

quality = fmt.get('quality')

3153

height = int_or_none(fmt.get('height'))

3154

if quality == 'tiny' or not quality:

3155

quality = fmt.get('audioQuality', '').lower() or quality

3156

# The 3gp format (17) in android client has a quality of "small",

3157

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3163

if height:

3164

res_qualities[height] = quality

3165

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3166

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3167

# number of fragment that would subsequently requested with (`&sq=N`)

3168

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3169

continue

3170

3171

fmt_url = fmt.get('url')

3172

if not fmt_url:

3173

sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))

3174

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3175

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3176

if not all((sc, fmt_url, player_url, encrypted_sig)):

3177

continue

3178

try:

3179

fmt_url += '&%s=%s' % (

3180

traverse_obj(sc, ('sp', -1)) or 'signature',

3181

self._decrypt_signature(encrypted_sig, video_id, player_url)

3182

)

3183

except ExtractorError as e:

3184

self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)

3185

self.write_debug(e, only_once=True)

3186

continue

3187

3188

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3193

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3194

except ExtractorError as e:

3195

self.report_warning(

3196

'nsig extraction failed: You may experience throttling for some formats\n'

3197

f'n = {query["n"][0]} ; player = {player_url}', only_once=True)

3198

self.write_debug(e, only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3203

stream_ids.append(stream_id)

3204

3205

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3206

language_preference = (

3207

10 if audio_track.get('audioIsDefault') and 10

3208

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3209

else -1)

3210

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3211

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3212

# Make sure to avoid false positives with small duration differences.

3213

# Eg: __2ABJjxzNo, ySuUZEjARPY

3214

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3215

if is_damaged:

3216

self.report_warning(

3217

f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3218

dct = {

3219

'asr': int_or_none(fmt.get('audioSampleRate')),

3220

'filesize': int_or_none(fmt.get('contentLength')),

3221

'format_id': itag,

3222

'format_note': join_nonempty(

3223

'%s%s' % (audio_track.get('displayName') or '',

3224

' (default)' if language_preference > 0 else ''),

3225

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3226

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3227

# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372

3228

'source_preference': -10 if throttled else -5 if itag == '22' else -1,

3229

'fps': int_or_none(fmt.get('fps')) or None,

3230

'height': height,

3231

'quality': q(quality),

3232

'has_drm': bool(fmt.get('drmFamilies')),

3233

'tbr': tbr,

3234

'url': fmt_url,

3235

'width': int_or_none(fmt.get('width')),

3236

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3237

'desc' if language_preference < -1 else ''),

3238

'language_preference': language_preference,

3239

# Strictly de-prioritize damaged and 3gp formats

3240

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3241

}

3242

mime_mobj = re.match(

3243

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3244

if mime_mobj:

3245

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3246

dct.update(parse_codecs(mime_mobj.group(2)))

3247

no_audio = dct.get('acodec') == 'none'

3248

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3254

dct['downloader_options'] = {

3255

# Youtube throttles chunks >~10M

3256

'http_chunk_size': 10485760,

3257

}

3258

if dct.get('ext'):

3259

dct['container'] = dct['ext'] + '_dash'

3260

yield dct

3261

3262

live_from_start = is_live and self.get_param('live_from_start')

3263

skip_manifests = self._configuration_arg('skip')

3264

if not self.get_param('youtube_include_hls_manifest', True):

3265

skip_manifests.append('hls')

3266

if not self.get_param('youtube_include_dash_manifest', True):

3267

skip_manifests.append('dash')

3268

get_dash = 'dash' not in skip_manifests and (

3269

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3270

get_hls = not live_from_start and 'hls' not in skip_manifests

3271

3272

def process_manifest_format(f, proto, itag):

3273

if itag in itags:

3274

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3275

return False

3276

itag = f'{itag}-{proto}'

3277

if itag:

3278

f['format_id'] = itag

3279

itags[itag] = proto

3280

3281

f['quality'] = next((

3282

q(qdict[val])

3283

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3284

if val in qdict), -1)

3285

return True

3286

3287

for sd in streaming_data:

3288

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3289

if hls_manifest_url:

3290

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3291

if process_manifest_format(f, 'hls', self._search_regex(

3292

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3293

yield f

3294

3295

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3296

if dash_manifest_url:

3297

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3298

if process_manifest_format(f, 'dash', f['format_id']):

3299

f['filesize'] = int_or_none(self._search_regex(

3300

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3301

if live_from_start:

3302

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3307

spec = get_first(

3308

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3309

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3314

args = args.split('#')

3315

counts = list(map(int_or_none, args[:5]))

3316

if len(args) != 8 or not all(counts):

3317

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3318

continue

3319

width, height, frame_count, cols, rows = counts

3320

N, sigh = args[6:]

3321

3322

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3323

fragment_count = frame_count / (cols * rows)

3324

fragment_duration = duration / fragment_count

3325

yield {

3326

'format_id': f'sb{i}',

3327

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3337

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3338

} for j in range(math.ceil(fragment_count))],

3339

}

3340

3341

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3342

webpage = None

3343

if 'webpage' not in self._configuration_arg('player_skip'):

3344

webpage = self._download_webpage(

3345

webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)

3346

3347

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3348

3349

player_responses, player_url = self._extract_player_responses(

3350

self._get_requested_clients(url, smuggled_data),

3351

video_id, webpage, master_ytcfg)

3352

3353

return webpage, master_ytcfg, player_responses, player_url

3354

3355

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3356

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3357

is_live = get_first(video_details, 'isLive')

3358

if is_live is None:

3359

is_live = get_first(live_broadcast_details, 'isLiveNow')

3360

3361

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3362

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3363

3364

return live_broadcast_details, is_live, streaming_data, formats

3365

3366

def _real_extract(self, url):

3367

url, smuggled_data = unsmuggle_url(url, {})

3368

video_id = self._match_id(url)

3369

3370

base_url = self.http_scheme() + '//www.youtube.com/'

3371

webpage_url = base_url + 'watch?v=' + video_id

3372

3373

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3374

3375

playability_statuses = traverse_obj(

3376

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3377

3378

trailer_video_id = get_first(

3379

playability_statuses,

3380

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3381

expected_type=str)

3382

if trailer_video_id:

3383

return self.url_result(

3384

trailer_video_id, self.ie_key(), trailer_video_id)

3385

3386

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3387

if webpage else (lambda x: None))

3388

3389

video_details = traverse_obj(

3390

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3391

microformats = traverse_obj(

3392

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3393

expected_type=dict, default=[])

3394

video_title = (

3395

get_first(video_details, 'title')

3396

or self._get_text(microformats, (..., 'title'))

3397

or search_meta(['og:title', 'twitter:title', 'title']))

3398

video_description = get_first(video_details, 'shortDescription')

3399

3400

multifeed_metadata_list = get_first(

3401

player_responses,

3402

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3403

expected_type=str)

3404

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3405

if self.get_param('noplaylist'):

3406

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3411

# Unquote should take place before split on comma (,) since textual

3412

# fields may contain comma as well (see

3413

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3414

feed_data = urllib.parse.parse_qs(

3415

urllib.parse.unquote_plus(feed))

3416

3417

def feed_entry(name):

3418

return try_get(

3419

feed_data, lambda x: x[name][0], str)

3420

3421

feed_id = feed_entry('id')

3422

if not feed_id:

3423

continue

3424

feed_title = feed_entry('title')

3425

title = video_title

3426

if feed_title:

3427

title += ' (%s)' % feed_title

3428

entries.append({

3429

'_type': 'url_transparent',

3430

'ie_key': 'Youtube',

3431

'url': smuggle_url(

3432

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3433

{'force_singlefeed': True}),

3434

'title': title,

3435

})

3436

feed_ids.append(feed_id)

3437

self.to_screen(

3438

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3439

% (', '.join(feed_ids), video_id))

3440

return self.playlist_result(

3441

entries, video_id, video_title, video_description)

3442

3443

duration = int_or_none(

3444

get_first(video_details, 'lengthSeconds')

3445

or get_first(microformats, 'lengthSeconds')

3446

or parse_duration(search_meta('duration'))) or None

3447

3448

if get_first(video_details, 'isPostLiveDvr'):

3449

self.write_debug('Video is in Post-Live Manifestless mode')

3450

if duration or 0 > 4 * 3600:

3451

self.report_warning(

3452

'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '

3453

'This is a known issue and patches are welcome')

3454

3455

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3456

video_id, microformats, video_details, player_responses, player_url, duration)

3457

3458

if not formats:

3459

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3460

self.report_drm(video_id)

3461

pemr = get_first(

3462

playability_statuses,

3463

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3464

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3465

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3466

if subreason:

3467

if subreason == 'The uploader has not made this video available in your country.':

3468

countries = get_first(microformats, 'availableCountries')

3469

if not countries:

3470

regions_allowed = search_meta('regionsAllowed')

3471

countries = regions_allowed.split(',') if regions_allowed else None

3472

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3473

reason += f'. {subreason}'

3474

if reason:

3475

self.raise_no_formats(reason, expected=True)

3476

3477

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3478

if not keywords and webpage:

3479

keywords = [

3480

unescapeHTML(m.group('content'))

3481

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3482

for keyword in keywords:

3483

if keyword.startswith('yt:stretch='):

3484

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3485

if mobj:

3486

# NB: float is intentional for forcing float division

3487

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3492

f['stretched_ratio'] = ratio

3493

break

3494

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3495

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3496

if thumbnail_url:

3497

thumbnails.append({

3498

'url': thumbnail_url,

3499

})

3500

original_thumbnails = thumbnails.copy()

3501

3502

# The best resolution thumbnails sometimes does not appear in the webpage

3503

# See: https://github.com/yt-dlp/yt-dlp/issues/340

3504

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3505

thumbnail_names = [

3506

# While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants

3507

# in resolution, these are not the custom thumbnail. So de-prioritize them

3508

'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',

3509

'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'

3510

]

3511

n_thumbnail_names = len(thumbnail_names)

3512

thumbnails.extend({

3513

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3514

video_id=video_id, name=name, ext=ext,

3515

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3516

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3517

for thumb in thumbnails:

3518

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3519

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3520

self._remove_duplicate_formats(thumbnails)

3521

self._downloader._sort_thumbnails(original_thumbnails)

3522

3523

category = get_first(microformats, 'category') or search_meta('genre')

3524

channel_id = str_or_none(

3525

get_first(video_details, 'channelId')

3526

or get_first(microformats, 'externalChannelId')

3527

or search_meta('channelId'))

3528

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3529

3530

live_content = get_first(video_details, 'isLiveContent')

3531

is_upcoming = get_first(video_details, 'isUpcoming')

3532

if is_live is None:

3533

if is_upcoming or live_content is False:

3534

is_live = False

3535

if is_upcoming is None and (live_content or is_live):

3536

is_upcoming = False

3537

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3538

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3539

if not duration and live_end_time and live_start_time:

3540

duration = live_end_time - live_start_time

3541

3542

if is_live and self.get_param('live_from_start'):

3543

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3544

3545

formats.extend(self._extract_storyboard(player_responses, duration))

3546

3547

# Source is given priority since formats that throttle are given lower source_preference

3548

# When throttling issue is fully fixed, remove this

3549

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3554

'formats': formats,

3555

'thumbnails': thumbnails,

3556

# The best thumbnail that we are sure exists. Prevents unnecessary

3557

# URL checking if user don't care about getting the best possible thumbnail

3558

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3559

'description': video_description,

3560

'uploader': get_first(video_details, 'author'),

3561

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3562

'uploader_url': owner_profile_url,

3563

'channel_id': channel_id,

3564

'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),

3565

'duration': duration,

3566

'view_count': int_or_none(

3567

get_first((video_details, microformats), (..., 'viewCount'))

3568

or search_meta('interactionCount')),

3569

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3570

'age_limit': 18 if (

3571

get_first(microformats, 'isFamilySafe') is False

3572

or search_meta('isFamilyFriendly') == 'false'

3573

or search_meta('og:restrictions:age') == '18+') else 0,

3574

'webpage_url': webpage_url,

3575

'categories': [category] if category else None,

3576

'tags': keywords,

3577

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3578

'is_live': is_live,

3579

'was_live': (False if is_live or is_upcoming or live_content is False

3580

else None if is_live is None or is_upcoming is None

3581

else live_content),

3582

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3583

'release_timestamp': live_start_time,

3584

}

3585

3586

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3587

if pctr:

3588

def get_lang_code(track):

3589

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3590

or track.get('languageCode'))

3591

3592

# Converted into dicts to remove duplicates

3593

captions = {

3594

get_lang_code(sub): sub

3595

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3596

translation_languages = {

3597

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3598

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3599

3600

def process_language(container, base_url, lang_code, sub_name, query):

3601

lang_subs = container.setdefault(lang_code, [])

3602

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3613

for lang_code, caption_track in captions.items():

3614

base_url = caption_track.get('baseUrl')

3615

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3616

if not base_url:

3617

continue

3618

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3619

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3624

if not caption_track.get('isTranslatable'):

3625

continue

3626

for trans_code, trans_name in translation_languages.items():

3627

if not trans_code:

3628

continue

3629

orig_trans_code = trans_code

3630

if caption_track.get('kind') != 'asr':

3631

if 'translated_subs' in self._configuration_arg('skip'):

3632

continue

3633

trans_code += f'-{lang_code}'

3634

trans_name += format_field(lang_name, None, ' from %s')

3635

# Add an "-orig" label to the original language so that it can be distinguished.

3636

# The subs are returned without "-orig" as well for compatibility

3637

if lang_code == f'a-{orig_trans_code}':

3638

process_language(

3639

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3640

# Setting tlang=lang returns damaged subtitles.

3641

process_language(automatic_captions, base_url, trans_code, trans_name,

3642

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3643

info['automatic_captions'] = automatic_captions

3644

info['subtitles'] = subtitles

3645

3646

parsed_url = urllib.parse.urlparse(url)

3647

for component in [parsed_url.fragment, parsed_url.query]:

3648

query = urllib.parse.parse_qs(component)

3649

for k, v in query.items():

3650

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3651

d_k += '_time'

3652

if d_k not in info and k in s_ks:

3653

info[d_k] = parse_duration(query[k][0])

3654

3655

# Youtube Music Auto-generated description

3656

if video_description:

3657

mobj = re.search(

3658

r'''(?xs)

3659

(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+

3660

(?P<album>[^\n]+)

3661

(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?

3662

(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?

3663

(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?

3664

.+\nAuto-generated\ by\ YouTube\.\s*$

3665

''', video_description)

3666

if mobj:

3667

release_year = mobj.group('release_year')

3668

release_date = mobj.group('release_date')

3669

if release_date:

3670

release_date = release_date.replace('-', '')

3671

if not release_year:

3672

release_year = release_date[:4]

3673

info.update({

3674

'album': mobj.group('album'.strip()),

3675

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3676

'track': mobj.group('track').strip(),

3677

'release_date': release_date,

3678

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)

3684

if not initial_data:

3685

query = {'videoId': video_id}

3686

query.update(self._get_checkok_params())

3687

initial_data = self._extract_response(

3688

item_id=video_id, ep='next', fatal=False,

3689

ytcfg=master_ytcfg, query=query,

3690

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3691

note='Downloading initial data API JSON')

3692

3693

info['comment_count'] = traverse_obj(initial_data, (

3694

'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',

3695

'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'

3696

), (

3697

'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',

3698

'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'

3699

), expected_type=int_or_none, get_all=False)

3700

3701

try: # This will error if there is no livechat

3702

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3703

except (KeyError, IndexError, TypeError):

3704

pass

3705

else:

3706

info.setdefault('subtitles', {})['live_chat'] = [{

3707

# url is needed to set cookies

3708

'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',

3709

'video_id': video_id,

3710

'ext': 'json',

3711

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

}]

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3717

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3718

or self._extract_chapters_from_description(video_description, duration)

3719

or None)

3720

3721

contents = traverse_obj(

3722

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3723

expected_type=list, default=[])

3724

3725

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3726

if vpir:

3727

stl = vpir.get('superTitleLink')

3728

if stl:

3729

stl = self._get_text(stl)

3730

if try_get(

3731

vpir,

3732

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3733

info['location'] = stl

3734

else:

3735

mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)

3736

if mobj:

3737

info.update({

3738

'series': mobj.group(1),

3739

'season_number': int(mobj.group(2)),

3740

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3745

list) or []):

3746

tbr = tlb.get('toggleButtonRenderer') or {}

3747

for getter, regex in [(

3748

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3749

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3750

lambda x: x['accessibility'],

3751

lambda x: x['accessibilityData']['accessibilityData'],

3752

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3753

label = (try_get(tbr, getter, dict) or {}).get('label')

3754

if label:

3755

mobj = re.match(regex, label)

3756

if mobj:

3757

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3758

break

3759

sbr_tooltip = try_get(

3760

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3761

if sbr_tooltip:

3762

like_count, dislike_count = sbr_tooltip.split(' / ')

3763

info.update({

3764

'like_count': str_to_int(like_count),

3765

'dislike_count': str_to_int(dislike_count),

3766

})

3767

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3768

if vsir:

3769

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3770

info.update({

3771

'channel': self._get_text(vor, 'title'),

3772

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3777

list) or []

3778

multiple_songs = False

3779

for row in rows:

3780

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3781

multiple_songs = True

3782

break

3783

for row in rows:

3784

mrr = row.get('metadataRowRenderer') or {}

3785

mrr_title = mrr.get('title')

3786

if not mrr_title:

3787

continue

3788

mrr_title = self._get_text(mrr, 'title')

3789

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3790

if mrr_title == 'License':

3791

info['license'] = mrr_contents_text

3792

elif not multiple_songs:

3793

if mrr_title == 'Album':

3794

info['album'] = mrr_contents_text

3795

elif mrr_title == 'Artist':

3796

info['artist'] = mrr_contents_text

3797

elif mrr_title == 'Song':

3798

info['track'] = mrr_contents_text

3799

3800

fallbacks = {

3801

'channel': 'uploader',

3802

'channel_id': 'uploader_id',

3803

'channel_url': 'uploader_url',

3804

}

3805

3806

# The upload date for scheduled, live and past live streams / premieres in microformats

3807

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3808

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3809

upload_date = (

3810

unified_strdate(get_first(microformats, 'uploadDate'))

3811

or unified_strdate(search_meta('uploadDate')))

3812

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3813

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date

3814

info['upload_date'] = upload_date

3815

3816

for to, frm in fallbacks.items():

3817

if not info.get(to):

3818

info[to] = info.get(frm)

3819

3820

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3826

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3827

is_membersonly = None

3828

is_premium = None

3829

if initial_data and is_private is not None:

3830

is_membersonly = False

3831

is_premium = False

3832

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3833

badge_labels = set()

3834

for content in contents:

3835

if not isinstance(content, dict):

3836

continue

3837

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3838

for badge_label in badge_labels:

3839

if badge_label.lower() == 'members only':

3840

is_membersonly = True

3841

elif badge_label.lower() == 'premium':

3842

is_premium = True

3843

elif badge_label.lower() == 'unlisted':

3844

is_unlisted = True

3845

3846

info['availability'] = self._availability(

3847

is_private=is_private,

3848

needs_premium=is_premium,

3849

needs_subscription=is_membersonly,

3850

needs_auth=info['age_limit'] >= 18,

3851

is_unlisted=None if is_private is None else is_unlisted)

3852

3853

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3854

3855

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3861

3862

@staticmethod

3863

def passthrough_smuggled_data(func):

3864

def _smuggle(entries, smuggled_data):

3865

for entry in entries:

3866

# TODO: Convert URL to music.youtube instead.

3867

# Do we need to passthrough any other smuggled_data?

3868

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3869

yield entry

3870

3871

@functools.wraps(func)

3872

def wrapper(self, url):

3873

url, smuggled_data = unsmuggle_url(url, {})

3874

if self.is_music_url(url):

3875

smuggled_data['is_music_url'] = True

3876

info_dict = func(self, url, smuggled_data)

3877

if smuggled_data and info_dict.get('entries'):

3878

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3883

channel_id = self._html_search_meta(

3884

'channelId', webpage, 'channel id', default=None)

3885

if channel_id:

3886

return channel_id

3887

channel_url = self._html_search_meta(

3888

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3889

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3890

'twitter:app:url:googleplay'), webpage, 'channel url')

3891

return self._search_regex(

3892

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3893

channel_url, 'channel id')

3894

3895

@staticmethod

3896

def _extract_basic_item_renderer(item):

3897

# Modified from _extract_grid_item_renderer

3898

known_basic_renderers = (

3899

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3900

)

3901

for key, renderer in item.items():

3902

if not isinstance(renderer, dict):

3903

continue

3904

elif key in known_basic_renderers:

3905

return renderer

3906

elif key.startswith('grid') and key.endswith('Renderer'):

3907

return renderer

3908

3909

def _grid_entries(self, grid_renderer):

3910

for item in grid_renderer['items']:

3911

if not isinstance(item, dict):

3912

continue

3913

renderer = self._extract_basic_item_renderer(item)

3914

if not isinstance(renderer, dict):

3915

continue

3916

title = self._get_text(renderer, 'title')

3917

3918

# playlist

3919

playlist_id = renderer.get('playlistId')

3920

if playlist_id:

3921

yield self.url_result(

3922

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3923

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3928

if video_id:

3929

yield self._extract_video(renderer)

3930

continue

3931

# channel

3932

channel_id = renderer.get('channelId')

3933

if channel_id:

3934

yield self.url_result(

3935

'https://www.youtube.com/channel/%s' % channel_id,

3936

ie=YoutubeTabIE.ie_key(), video_title=title)

3937

continue

3938

# generic endpoint URL support

3939

ep_url = urljoin('https://www.youtube.com/', try_get(

3940

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3941

str))

3942

if ep_url:

3943

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3944

if ie.suitable(ep_url):

3945

yield self.url_result(

3946

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3947

break

3948

3949

def _music_reponsive_list_entry(self, renderer):

3950

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3951

if video_id:

3952

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3953

ie=YoutubeIE.ie_key(), video_id=video_id)

3954

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3955

if playlist_id:

3956

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3957

if video_id:

3958

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3959

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3960

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3961

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3962

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3963

if browse_id:

3964

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3965

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3966

3967

def _shelf_entries_from_content(self, shelf_renderer):

3968

content = shelf_renderer.get('content')

3969

if not isinstance(content, dict):

3970

return

3971

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3972

if renderer:

3973

# TODO: add support for nested playlists so each shelf is processed

3974

# as separate playlist

3975

# TODO: this includes only first N items

3976

yield from self._grid_entries(renderer)

3977

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3983

ep = try_get(

3984

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3985

str)

3986

shelf_url = urljoin('https://www.youtube.com', ep)

3987

if shelf_url:

3988

# Skipping links to another channels, note that checking for

3989

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3990

# will not work

3991

if skip_channels and '/channels?' in shelf_url:

3992

return

3993

title = self._get_text(shelf_renderer, 'title')

3994

yield self.url_result(shelf_url, video_title=title)

3995

# Shelf may not contain shelf URL, fallback to extraction from content

3996

yield from self._shelf_entries_from_content(shelf_renderer)

3997

3998

def _playlist_entries(self, video_list_renderer):

3999

for content in video_list_renderer['contents']:

4000

if not isinstance(content, dict):

4001

continue

4002

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

4003

if not isinstance(renderer, dict):

4004

continue

4005

video_id = renderer.get('videoId')

4006

if not video_id:

4007

continue

4008

yield self._extract_video(renderer)

4009

4010

def _rich_entries(self, rich_grid_renderer):

4011

renderer = try_get(

4012

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

4013

video_id = renderer.get('videoId')

4014

if not video_id:

4015

return

4016

yield self._extract_video(renderer)

4017

4018

def _video_entry(self, video_renderer):

4019

video_id = video_renderer.get('videoId')

4020

if video_id:

4021

return self._extract_video(video_renderer)

4022

4023

def _hashtag_tile_entry(self, hashtag_tile_renderer):

4024

url = urljoin('https://youtube.com', traverse_obj(

4025

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

4026

if url:

4027

return self.url_result(

4028

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

4029

4030

def _post_thread_entries(self, post_thread_renderer):

4031

post_renderer = try_get(

4032

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

4033

if not post_renderer:

4034

return

4035

# video attachment

4036

video_renderer = try_get(

4037

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

4038

video_id = video_renderer.get('videoId')

4039

if video_id:

4040

entry = self._extract_video(video_renderer)

4041

if entry:

4042

yield entry

4043

# playlist attachment

4044

playlist_id = try_get(

4045

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)

4046

if playlist_id:

4047

yield self.url_result(

4048

'https://www.youtube.com/playlist?list=%s' % playlist_id,

4049

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4050

# inline video links

4051

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

4052

for run in runs:

4053

if not isinstance(run, dict):

4054

continue

4055

ep_url = try_get(

4056

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)

4057

if not ep_url:

4058

continue

4059

if not YoutubeIE.suitable(ep_url):

4060

continue

4061

ep_video_id = YoutubeIE._match_id(ep_url)

4062

if video_id == ep_video_id:

4063

continue

4064

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

4065

4066

def _post_thread_continuation_entries(self, post_thread_continuation):

4067

contents = post_thread_continuation.get('contents')

4068

if not isinstance(contents, list):

4069

return

4070

for content in contents:

4071

renderer = content.get('backstagePostThreadRenderer')

4072

if isinstance(renderer, dict):

4073

yield from self._post_thread_entries(renderer)

4074

continue

4075

renderer = content.get('videoRenderer')

4076

if isinstance(renderer, dict):

4077

yield self._video_entry(renderer)

4078

4079

r''' # unused

4080

def _rich_grid_entries(self, contents):

4081

for content in contents:

4082

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

4083

if video_renderer:

4084

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

4090

# continuation_list is modified in-place with continuation_list = [continuation_token]

4091

continuation_list[:] = [None]

4092

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

4093

for content in contents:

4094

if not isinstance(content, dict):

4095

continue

4096

is_renderer = traverse_obj(

4097

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

4098

expected_type=dict)

4099

if not is_renderer:

4100

renderer = content.get('richItemRenderer')

4101

if renderer:

4102

for entry in self._rich_entries(renderer):

4103

yield entry

4104

continuation_list[0] = self._extract_continuation(parent_renderer)

4105

continue

4106

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

4107

for isr_content in isr_contents:

4108

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

4113

'gridRenderer': self._grid_entries,

4114

'reelShelfRenderer': self._grid_entries,

4115

'shelfRenderer': self._shelf_entries,

4116

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4117

'backstagePostThreadRenderer': self._post_thread_entries,

4118

'videoRenderer': lambda x: [self._video_entry(x)],

4119

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4120

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4121

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4122

}

4123

for key, renderer in isr_content.items():

4124

if key not in known_renderers:

4125

continue

4126

for entry in known_renderers[key](renderer):

4127

if entry:

4128

yield entry

4129

continuation_list[0] = self._extract_continuation(renderer)

4130

break

4131

4132

if not continuation_list[0]:

4133

continuation_list[0] = self._extract_continuation(is_renderer)

4134

4135

if not continuation_list[0]:

4136

continuation_list[0] = self._extract_continuation(parent_renderer)

4137

4138

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4139

continuation_list = [None]

4140

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4141

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4146

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4147

yield from extract_entries(parent_renderer)

4148

continuation = continuation_list[0]

4149

4150

for page_num in itertools.count(1):

4151

if not continuation:

4152

break

4153

headers = self.generate_api_headers(

4154

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4155

response = self._extract_response(

4156

item_id=f'{item_id} page {page_num}',

4157

query=continuation, headers=headers, ytcfg=ytcfg,

4158

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4163

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4164

visitor_data = self._extract_visitor_data(response) or visitor_data

4165

4166

known_continuation_renderers = {

4167

'playlistVideoListContinuation': self._playlist_entries,

4168

'gridContinuation': self._grid_entries,

4169

'itemSectionContinuation': self._post_thread_continuation_entries,

4170

'sectionListContinuation': extract_entries, # for feeds

4171

}

4172

continuation_contents = try_get(

4173

response, lambda x: x['continuationContents'], dict) or {}

4174

continuation_renderer = None

4175

for key, value in continuation_contents.items():

4176

if key not in known_continuation_renderers:

4177

continue

4178

continuation_renderer = value

4179

continuation_list = [None]

4180

yield from known_continuation_renderers[key](continuation_renderer)

4181

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4182

break

4183

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4188

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4189

'gridVideoRenderer': (self._grid_entries, 'items'),

4190

'gridChannelRenderer': (self._grid_entries, 'items'),

4191

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4192

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4193

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4194

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4195

}

4196

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4197

continuation_items = try_get(

4198

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4199

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4200

video_items_renderer = None

4201

for key, value in continuation_item.items():

4202

if key not in known_renderers:

4203

continue

4204

video_items_renderer = {known_renderers[key][1]: continuation_items}

4205

continuation_list = [None]

4206

yield from known_renderers[key][0](video_items_renderer)

4207

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4208

break

4209

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4215

for tab in tabs:

4216

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4217

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4222

4223

def _extract_uploader(self, data):

4224

uploader = {}

4225

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4226

owner = try_get(

4227

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4228

if owner:

4229

owner_text = owner.get('text')

4230

uploader['uploader'] = self._search_regex(

4231

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4232

uploader['uploader_id'] = try_get(

4233

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)

4234

uploader['uploader_url'] = urljoin(

4235

'https://www.youtube.com/',

4236

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))

4237

return {k: v for k, v in uploader.items() if v is not None}

4238

4239

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4240

playlist_id = title = description = channel_url = channel_name = channel_id = None

4241

tags = []

4242

4243

selected_tab = self._extract_selected_tab(tabs)

4244

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4245

renderer = try_get(

4246

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4247

if renderer:

4248

channel_name = renderer.get('title')

4249

channel_url = renderer.get('channelUrl')

4250

channel_id = renderer.get('externalId')

4251

else:

4252

renderer = try_get(

4253

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4254

4255

if renderer:

4256

title = renderer.get('title')

4257

description = renderer.get('description', '')

4258

playlist_id = channel_id

4259

tags = renderer.get('keywords', '').split()

4260

4261

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4262

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4263

def _get_uncropped(url):

4264

return url_or_none((url or '').split('=')[0] + '=s0')

4265

4266

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4267

if avatar_thumbnails:

4268

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4269

if uncropped_avatar:

4270

avatar_thumbnails.append({

4271

'url': uncropped_avatar,

4272

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4277

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4278

for banner in channel_banners:

4279

banner['preference'] = -10

4280

4281

if channel_banners:

4282

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4283

if uncropped_banner:

4284

channel_banners.append({

4285

'url': uncropped_banner,

4286

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4291

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4292

4293

if playlist_id is None:

4294

playlist_id = item_id

4295

4296

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4297

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4298

if title is None:

4299

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4300

title += format_field(selected_tab, 'title', ' - %s')

4301

title += format_field(selected_tab, 'expandedText', ' - %s')

4302

4303

metadata = {

4304

'playlist_id': playlist_id,

4305

'playlist_title': title,

4306

'playlist_description': description,

4307

'uploader': channel_name,

4308

'uploader_id': channel_id,

4309

'uploader_url': channel_url,

4310

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4311

'tags': tags,

4312

'view_count': self._get_count(playlist_stats, 1),

4313

'availability': self._extract_availability(data),

4314

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4315

'playlist_count': self._get_count(playlist_stats, 0),

4316

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4317

}

4318

if not channel_id:

4319

metadata.update(self._extract_uploader(data))

4320

metadata.update({

4321

'channel': metadata['uploader'],

4322

'channel_id': metadata['uploader_id'],

4323

'channel_url': metadata['uploader_url']})

4324

return self.playlist_result(

4325

self._entries(

4326

selected_tab, playlist_id, ytcfg,

4327

self._extract_account_syncid(ytcfg, data),

4328

self._extract_visitor_data(data, ytcfg)),

4329

**metadata)

4330

4331

def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):

4332

first_id = last_id = response = None

4333

for page_num in itertools.count(1):

4334

videos = list(self._playlist_entries(playlist))

4335

if not videos:

4336

return

4337

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4338

if start >= len(videos):

4339

return

4340

yield from videos[start:]

4341

first_id = first_id or videos[0]['id']

4342

last_id = videos[-1]['id']

4343

watch_endpoint = try_get(

4344

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4345

headers = self.generate_api_headers(

4346

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4347

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4348

query = {

4349

'playlistId': playlist_id,

4350

'videoId': watch_endpoint.get('videoId') or last_id,

4351

'index': watch_endpoint.get('index') or len(videos),

4352

'params': watch_endpoint.get('params') or 'OAE%3D'

4353

}

4354

response = self._extract_response(

4355

item_id='%s page %d' % (playlist_id, page_num),

4356

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4357

check_get_keys='contents'

4358

)

4359

playlist = try_get(

4360

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4361

4362

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4363

title = playlist.get('title') or try_get(

4364

data, lambda x: x['titleText']['simpleText'], str)

4365

playlist_id = playlist.get('playlistId') or item_id

4366

4367

# Delegating everything except mix playlists to regular tab-based playlist URL

4368

playlist_url = urljoin(url, try_get(

4369

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4370

str))

4371

4372

# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]

4373

# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg

4374

is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)

4375

4376

if playlist_url and playlist_url != url and not is_known_unviewable:

4377

return self.url_result(

4378

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4379

video_title=title)

4380

4381

return self.playlist_result(

4382

self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),

4383

playlist_id=playlist_id, playlist_title=title)

4384

4385

def _extract_availability(self, data):

4386

"""

4387

Gets the availability of a given playlist/tab.

4388

Note: Unless YouTube tells us explicitly, we do not assume it is public

4389

@param data: response

4390

"""

4391

is_private = is_unlisted = None

4392

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4393

badge_labels = self._extract_badges(renderer)

4394

4395

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4396

privacy_dropdown_entries = try_get(

4397

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4398

for renderer_dict in privacy_dropdown_entries:

4399

is_selected = try_get(

4400

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4401

if not is_selected:

4402

continue

4403

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4404

if label:

4405

badge_labels.add(label.lower())

4406

break

4407

4408

for badge_label in badge_labels:

4409

if badge_label == 'unlisted':

4410

is_unlisted = True

4411

elif badge_label == 'private':

4412

is_private = True

4413

elif badge_label == 'public':

4414

is_unlisted = is_private = False

4415

return self._availability(is_private, False, False, False, is_unlisted)

4416

4417

@staticmethod

4418

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4419

sidebar_renderer = try_get(

4420

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4421

for item in sidebar_renderer:

4422

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4427

"""

4428

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4429

"""

4430

browse_id = params = None

4431

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4432

if not renderer:

4433

return

4434

menu_renderer = try_get(

4435

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4436

for menu_item in menu_renderer:

4437

if not isinstance(menu_item, dict):

4438

continue

4439

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4440

text = try_get(

4441

nav_item_renderer, lambda x: x['text']['simpleText'], str)

4442

if not text or text.lower() != 'show unavailable videos':

4443

continue

4444

browse_endpoint = try_get(

4445

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4446

browse_id = browse_endpoint.get('browseId')

4447

params = browse_endpoint.get('params')

4448

break

4449

4450

headers = self.generate_api_headers(

4451

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4452

visitor_data=self._extract_visitor_data(data, ytcfg))

4453

query = {

4454

'params': params or 'wgYCCAA=',

4455

'browseId': browse_id or 'VL%s' % item_id

4456

}

4457

return self._extract_response(

4458

item_id=item_id, headers=headers, query=query,

4459

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4460

note='Downloading API JSON with unavailable videos')

4461

4462

@functools.cached_property

4463

def skip_webpage(self):

4464

return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())

4465

4466

def _extract_webpage(self, url, item_id, fatal=True):

4467

retries = self.get_param('extractor_retries', 3)

4468

count = -1

4469

webpage = data = last_error = None

4470

while count < retries:

4471

count += 1

4472

# Sometimes youtube returns a webpage with incomplete ytInitialData

4473

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4474

if last_error:

4475

self.report_warning('%s. Retrying ...' % last_error)

4476

try:

4477

webpage = self._download_webpage(

4478

url, item_id,

4479

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4480

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4481

except ExtractorError as e:

4482

if isinstance(e.cause, network_exceptions):

4483

if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):

4484

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4494

except ExtractorError as e:

4495

if fatal:

4496

raise

4497

self.report_warning(error_to_compat_str(e))

4498

break

4499

4500

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4501

break

4502

4503

last_error = 'Incomplete yt initial data received'

4504

if count >= retries:

4505

if fatal:

4506

raise ExtractorError(last_error)

4507

self.report_warning(last_error)

break

return webpage, data

def _report_playlist_authcheck(self, ytcfg, fatal=True):

4513

"""Use if failed to extract ytcfg (and data) from initial webpage"""

4514

if not ytcfg and self.is_authenticated:

4515

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'

4516

if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:

4517

raise ExtractorError(

4518

f'{msg}. If you are not downloading private content, or '

4519

'your cookies are only for the first account and channel,'

4520

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4521

expected=True)

4522

self.report_warning(msg, only_once=True)

4523

4524

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4525

data = None

4526

if not self.skip_webpage:

4527

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4528

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4529

# Reject webpage data if redirected to home page without explicitly requesting

4530

selected_tab = self._extract_selected_tab(traverse_obj(

4531

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4532

if (url != 'https://www.youtube.com/feed/recommended'

4533

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4534

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4535

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4536

if fatal:

4537

raise ExtractorError(msg, expected=True)

4538

self.report_warning(msg, only_once=True)

4539

if not data:

4540

self._report_playlist_authcheck(ytcfg, fatal=fatal)

4541

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4542

return data, ytcfg

4543

4544

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4545

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4546

resolve_response = self._extract_response(

4547

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4548

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4549

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4550

for ep_key, ep in endpoints.items():

4551

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4552

if params:

4553

return self._extract_response(

4554

item_id=item_id, query=params, ep=ep, headers=headers,

4555

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4556

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4557

err_note = 'Failed to resolve url (does the playlist exist?)'

4558

if fatal:

4559

raise ExtractorError(err_note, expected=True)

4560

self.report_warning(err_note, item_id)

4561

4562

_SEARCH_PARAMS = None

4563

4564

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4565

data = {'query': query}

4566

if params is NO_DEFAULT:

4567

params = self._SEARCH_PARAMS

4568

if params:

4569

data['params'] = params

4570

4571

content_keys = (

4572

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4573

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4574

# ytmusic search

4575

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4576

('continuationContents', ),

4577

)

4578

display_id = f'query "{query}"'

4579

check_get_keys = tuple({keys[0] for keys in content_keys})

4580

ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}

4581

self._report_playlist_authcheck(ytcfg, fatal=False)

4582

4583

continuation_list = [None]

4584

search = None

4585

for page_num in itertools.count(1):

4586

data.update(continuation_list[0] or {})

4587

headers = self.generate_api_headers(

4588

ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)

4589

search = self._extract_response(

4590

item_id=f'{display_id} page {page_num}', ep='search', query=data,

4591

default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)

4592

slr_contents = traverse_obj(search, *content_keys)

4593

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4594

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4599

IE_DESC = 'YouTube Tabs'

4600

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4609

(?P<not_channel>

4610

feed/|hashtag/|

4611

(?:playlist|watch)\?.*?\blist=

4612

)|

4613

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4618

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4619

}

4620

IE_NAME = 'youtube:tab'

4621

4622

_TESTS = [{

4623

'note': 'playlists, multipage',

4624

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4625

'playlist_mincount': 94,

4626

'info_dict': {

4627

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4628

'title': 'Igor Kleiner - Playlists',

4629

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4630

'uploader': 'Igor Kleiner',

4631

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4632

'channel': 'Igor Kleiner',

4633

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4634

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4635

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4636

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4637

'channel_follower_count': int

4638

},

4639

}, {

4640

'note': 'playlists, multipage, different order',

4641

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4642

'playlist_mincount': 94,

4643

'info_dict': {

4644

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4645

'title': 'Igor Kleiner - Playlists',

4646

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4647

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4648

'uploader': 'Igor Kleiner',

4649

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4650

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4651

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4652

'channel': 'Igor Kleiner',

4653

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4654

'channel_follower_count': int

4655

},

4656

}, {

4657

'note': 'playlists, series',

4658

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4659

'playlist_mincount': 5,

4660

'info_dict': {

4661

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4662

'title': '3Blue1Brown - Playlists',

4663

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4664

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4665

'uploader': '3Blue1Brown',

4666

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4667

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4668

'channel': '3Blue1Brown',

4669

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4670

'tags': ['Mathematics'],

4671

'channel_follower_count': int

4672

},

4673

}, {

4674

'note': 'playlists, singlepage',

4675

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4676

'playlist_mincount': 4,

4677

'info_dict': {

4678

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4679

'title': 'ThirstForScience - Playlists',

4680

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4681

'uploader': 'ThirstForScience',

4682

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4683

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4684

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4685

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4686

'tags': 'count:13',

4687

'channel': 'ThirstForScience',

4688

'channel_follower_count': int

4689

}

4690

}, {

4691

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4692

'only_matching': True,

4693

}, {

4694

'note': 'basic, single video playlist',

4695

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4696

'info_dict': {

4697

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4698

'uploader': 'Sergey M.',

4699

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4700

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4705

'channel': 'Sergey M.',

4706

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4707

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4708

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4713

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4714

'info_dict': {

4715

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4716

'uploader': 'Sergey M.',

4717

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4718

'title': 'youtube-dl empty playlist',

4719

'tags': [],

4720

'channel': 'Sergey M.',

4721

'description': '',

4722

'modified_date': '20160902',

4723

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4724

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4725

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4731

'info_dict': {

4732

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4733

'title': 'lex will - Home',

4734

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4735

'uploader': 'lex will',

4736

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4737

'channel': 'lex will',

4738

'tags': ['bible', 'history', 'prophesy'],

4739

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4740

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4741

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4742

'channel_follower_count': int

4743

},

4744

'playlist_mincount': 2,

4745

}, {

4746

'note': 'Videos tab',

4747

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4748

'info_dict': {

4749

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4750

'title': 'lex will - Videos',

4751

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4752

'uploader': 'lex will',

4753

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4754

'tags': ['bible', 'history', 'prophesy'],

4755

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4756

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4757

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4758

'channel': 'lex will',

4759

'channel_follower_count': int

4760

},

4761

'playlist_mincount': 975,

4762

}, {

4763

'note': 'Videos tab, sorted by popular',

4764

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4765

'info_dict': {

4766

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4767

'title': 'lex will - Videos',

4768

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4769

'uploader': 'lex will',

4770

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4771

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4772

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4773

'channel': 'lex will',

4774

'tags': ['bible', 'history', 'prophesy'],

4775

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4776

'channel_follower_count': int

4777

},

4778

'playlist_mincount': 199,

4779

}, {

4780

'note': 'Playlists tab',

4781

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4782

'info_dict': {

4783

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4784

'title': 'lex will - Playlists',

4785

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4786

'uploader': 'lex will',

4787

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4788

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4789

'channel': 'lex will',

4790

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4791

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4792

'tags': ['bible', 'history', 'prophesy'],

4793

'channel_follower_count': int

4794

},

4795

'playlist_mincount': 17,

4796

}, {

4797

'note': 'Community tab',

4798

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4799

'info_dict': {

4800

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4801

'title': 'lex will - Community',

4802

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4803

'uploader': 'lex will',

4804

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4805

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4806

'channel': 'lex will',

4807

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4808

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4809

'tags': ['bible', 'history', 'prophesy'],

4810

'channel_follower_count': int

4811

},

4812

'playlist_mincount': 18,

4813

}, {

4814

'note': 'Channels tab',

4815

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4816

'info_dict': {

4817

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4818

'title': 'lex will - Channels',

4819

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4820

'uploader': 'lex will',

4821

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4822

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4823

'channel': 'lex will',

4824

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4825

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4826

'tags': ['bible', 'history', 'prophesy'],

4827

'channel_follower_count': int

4828

},

4829

'playlist_mincount': 12,

4830

}, {

4831

'note': 'Search tab',

4832

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4833

'playlist_mincount': 40,

4834

'info_dict': {

4835

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4836

'title': '3Blue1Brown - Search - linear algebra',

4837

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4838

'uploader': '3Blue1Brown',

4839

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4840

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4841

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4842

'tags': ['Mathematics'],

4843

'channel': '3Blue1Brown',

4844

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4845

'channel_follower_count': int

4846

},

4847

}, {

4848

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4849

'only_matching': True,

4850

}, {

4851

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4852

'only_matching': True,

4853

}, {

4854

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4855

'only_matching': True,

4856

}, {

4857

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4858

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4859

'info_dict': {

4860

'title': '29C3: Not my department',

4861

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4862

'uploader': 'Christiaan008',

4863

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4864

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4865

'tags': [],

4866

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4867

'view_count': int,

4868

'modified_date': '20150605',

4869

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4870

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4871

'channel': 'Christiaan008',

4872

},

4873

'playlist_count': 96,

4874

}, {

4875

'note': 'Large playlist',

4876

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4877

'info_dict': {

4878

'title': 'Uploads from Cauchemar',

4879

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4880

'uploader': 'Cauchemar',

4881

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4882

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4883

'tags': [],

4884

'modified_date': r're:\d{8}',

4885

'channel': 'Cauchemar',

4886

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4887

'view_count': int,

4888

'description': '',

4889

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4890

},

4891

'playlist_mincount': 1123,

4892

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4893

}, {

4894

'note': 'even larger playlist, 8832 videos',

4895

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4896

'only_matching': True,

4897

}, {

4898

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4899

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4900

'info_dict': {

4901

'title': 'Uploads from Interstellar Movie',

4902

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4903

'uploader': 'Interstellar Movie',

4904

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4905

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4906

'tags': [],

4907

'view_count': int,

4908

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4909

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4910

'channel': 'Interstellar Movie',

4911

'description': '',

4912

'modified_date': r're:\d{8}',

4913

},

4914

'playlist_mincount': 21,

4915

}, {

4916

'note': 'Playlist with "show unavailable videos" button',

4917

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4918

'info_dict': {

4919

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4920

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4921

'uploader': 'Phim Siêu Nhân Nhật Bản',

4922

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4923

'view_count': int,

4924

'channel': 'Phim Siêu Nhân Nhật Bản',

4925

'tags': [],

4926

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4927

'description': '',

4928

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4929

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4930

'modified_date': r're:\d{8}',

4931

},

4932

'playlist_mincount': 200,

4933

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4934

}, {

4935

'note': 'Playlist with unavailable videos in page 7',

4936

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4937

'info_dict': {

4938

'title': 'Uploads from BlankTV',

4939

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4940

'uploader': 'BlankTV',

4941

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4942

'channel': 'BlankTV',

4943

'channel_url': 'https://www.youtube.com/c/blanktv',

4944

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4945

'view_count': int,

4946

'tags': [],

4947

'uploader_url': 'https://www.youtube.com/c/blanktv',

4948

'modified_date': r're:\d{8}',

4949

'description': '',

4950

},

4951

'playlist_mincount': 1000,

4952

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4953

}, {

4954

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4955

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4956

'info_dict': {

4957

'title': 'Data Analysis with Dr Mike Pound',

4958

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4959

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4960

'uploader': 'Computerphile',

4961

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4962

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4963

'tags': [],

4964

'view_count': int,

4965

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4966

'channel_url': 'https://www.youtube.com/user/Computerphile',

4967

'channel': 'Computerphile',

4968

},

4969

'playlist_mincount': 11,

4970

}, {

4971

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4972

'only_matching': True,

4973

}, {

4974

'note': 'Playlist URL that does not actually serve a playlist',

4975

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4980

'uploader': 'STREEM',

4981

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4982

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4983

'upload_date': '20150526',

4984

'license': 'Standard YouTube License',

4985

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4986

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4993

},

4994

'skip': 'This video is not available.',

4995

'add_ie': [YoutubeIE.ie_key()],

4996

}, {

4997

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4998

'only_matching': True,

4999

}, {

5000

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

5001

'only_matching': True,

5002

}, {

5003

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

5004

'info_dict': {

5005

'id': 'GgL890LIznQ', # This will keep changing

5006

'ext': 'mp4',

5007

'title': str,

5008

'uploader': 'Sky News',

5009

'uploader_id': 'skynews',

5010

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

5011

'upload_date': r're:\d{8}',

5012

'description': str,

5013

'categories': ['News & Politics'],

5014

'tags': list,

5015

'like_count': int,

5016

'release_timestamp': 1642502819,

5017

'channel': 'Sky News',

5018

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

5019

'age_limit': 0,

5020

'view_count': int,

5021

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

5022

'playable_in_embed': True,

5023

'release_date': '20220118',

5024

'availability': 'public',

5025

'live_status': 'is_live',

5026

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

5027

'channel_follower_count': int

5028

},

5029

'params': {

5030

'skip_download': True,

5031

},

5032

'expected_warnings': ['Ignoring subtitle tracks found in '],

5033

}, {

5034

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

5039

'uploader': 'The Young Turks',

5040

'uploader_id': 'TheYoungTurks',

5041

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

5042

'upload_date': '20150715',

5043

'license': 'Standard YouTube License',

5044

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

5045

'categories': ['News & Politics'],

5046

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

5051

},

5052

'only_matching': True,

5053

}, {

5054

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

5055

'only_matching': True,

5056

}, {

5057

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

5058

'only_matching': True,

5059

}, {

5060

'note': 'A channel that is not live. Should raise error',

5061

'url': 'https://www.youtube.com/user/numberphile/live',

5062

'only_matching': True,

5063

}, {

5064

'url': 'https://www.youtube.com/feed/trending',

5065

'only_matching': True,

5066

}, {

5067

'url': 'https://www.youtube.com/feed/library',

5068

'only_matching': True,

5069

}, {

5070

'url': 'https://www.youtube.com/feed/history',

5071

'only_matching': True,

5072

}, {

5073

'url': 'https://www.youtube.com/feed/subscriptions',

5074

'only_matching': True,

5075

}, {

5076

'url': 'https://www.youtube.com/feed/watch_later',

5077

'only_matching': True,

5078

}, {

5079

'note': 'Recommended - redirects to home page.',

5080

'url': 'https://www.youtube.com/feed/recommended',

5081

'only_matching': True,

5082

}, {

5083

'note': 'inline playlist with not always working continuations',

5084

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

5085

'only_matching': True,

5086

}, {

5087

'url': 'https://www.youtube.com/course',

5088

'only_matching': True,

5089

}, {

5090

'url': 'https://www.youtube.com/zsecurity',

5091

'only_matching': True,

5092

}, {

5093

'url': 'http://www.youtube.com/NASAgovVideo/videos',

5094

'only_matching': True,

5095

}, {

5096

'url': 'https://www.youtube.com/TheYoungTurks/live',

5097

'only_matching': True,

5098

}, {

5099

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

5106

}, {

5107

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

5108

'only_matching': True,

5109

}, {

5110

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

5111

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5112

'only_matching': True

5113

}, {

5114

'note': '/browse/ should redirect to /channel/',

5115

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

5116

'only_matching': True

5117

}, {

5118

'note': 'VLPL, should redirect to playlist?list=PL...',

5119

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5120

'info_dict': {

5121

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

5122

'uploader': 'NoCopyrightSounds',

5123

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

5124

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5125

'title': 'NCS Releases',

5126

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5127

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5128

'modified_date': r're:\d{8}',

5129

'view_count': int,

5130

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5131

'tags': [],

5132

'channel': 'NoCopyrightSounds',

5133

},

5134

'playlist_mincount': 166,

5135

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5136

}, {

5137

'note': 'Topic, should redirect to playlist?list=UU...',

5138

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5139

'info_dict': {

5140

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5141

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5142

'title': 'Uploads from Royalty Free Music - Topic',

5143

'uploader': 'Royalty Free Music - Topic',

5144

'tags': [],

5145

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5146

'channel': 'Royalty Free Music - Topic',

5147

'view_count': int,

5148

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5149

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5150

'modified_date': r're:\d{8}',

5151

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5152

'description': '',

5153

},

5154

'expected_warnings': [

5155

'The URL does not have a videos tab',

5156

r'[Uu]navailable videos (are|will be) hidden',

5157

],

5158

'playlist_mincount': 101,

5159

}, {

5160

'note': 'Topic without a UU playlist',

5161

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5162

'info_dict': {

5163

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5164

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5165

'tags': [],

5166

},

5167

'expected_warnings': [

5168

'the playlist redirect gave error',

5169

],

5170

'playlist_mincount': 9,

5171

}, {

5172

'note': 'Youtube music Album',

5173

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5174

'info_dict': {

5175

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5176

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5181

'modified_date': r're:\d{8}',

5182

},

5183

'playlist_count': 50,

5184

}, {

5185

'note': 'unlisted single video playlist',

5186

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5187

'info_dict': {

5188

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5189

'uploader': 'colethedj',

5190

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5191

'title': 'yt-dlp unlisted playlist test',

5192

'availability': 'unlisted',

5193

'tags': [],

5194

'modified_date': '20211208',

5195

'channel': 'colethedj',

5196

'view_count': int,

5197

'description': '',

5198

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5199

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5200

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5205

'url': 'https://www.youtube.com/feed/recommended',

5206

'info_dict': {

5207

'id': 'recommended',

5208

'title': 'recommended',

5209

'tags': [],

5210

},

5211

'playlist_mincount': 50,

5212

'params': {

5213

'skip_download': True,

5214

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5215

},

5216

}, {

5217

'note': 'API Fallback: /videos tab, sorted by oldest first',

5218

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5219

'info_dict': {

5220

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5221

'title': 'Cody\'sLab - Videos',

5222

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5223

'uploader': 'Cody\'sLab',

5224

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5225

'channel': 'Cody\'sLab',

5226

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5227

'tags': [],

5228

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5229

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5230

'channel_follower_count': int

5231

},

5232

'playlist_mincount': 650,

5233

'params': {

5234

'skip_download': True,

5235

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5236

},

5237

}, {

5238

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5239

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5240

'info_dict': {

5241

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5242

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5243

'title': 'Uploads from Royalty Free Music - Topic',

5244

'uploader': 'Royalty Free Music - Topic',

5245

'modified_date': r're:\d{8}',

5246

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5247

'description': '',

5248

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5249

'tags': [],

5250

'channel': 'Royalty Free Music - Topic',

5251

'view_count': int,

5252

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5253

},

5254

'expected_warnings': [

5255

'does not have a videos tab',

5256

r'[Uu]navailable videos (are|will be) hidden',

5257

],

5258

'playlist_mincount': 101,

5259

'params': {

5260

'skip_download': True,

5261

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5262

},

5263

}, {

5264

'note': 'non-standard redirect to regional channel',

5265

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5266

'only_matching': True

5267

}, {

5268

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5269

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5270

'info_dict': {

5271

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5272

'modified_date': '20220407',

5273

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5274

'tags': [],

5275

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5276

'uploader': 'pukkandan',

5277

'availability': 'unlisted',

5278

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5279

'channel': 'pukkandan',

5280

'description': 'Test for collaborative playlist',

5281

'title': 'yt-dlp test - collaborative playlist',

5282

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5283

},

5284

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5289

return False if YoutubeIE.suitable(url) else super().suitable(url)

5290

5291

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5292

5293

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5294

def _real_extract(self, url, smuggled_data):

5295

item_id = self._match_id(url)

5296

url = urllib.parse.urlunparse(

5297

urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))

5298

compat_opts = self.get_param('compat_opts', [])

5299

5300

def get_mobj(url):

5301

mobj = self._URL_RE.match(url).groupdict()

5302

mobj.update((k, '') for k, v in mobj.items() if v is None)

5303

return mobj

5304

5305

mobj, redirect_warning = get_mobj(url), None

5306

# Youtube returns incomplete data if tabname is not lower case

5307

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5308

if is_channel:

5309

if smuggled_data.get('is_music_url'):

5310

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5311

item_id = item_id[2:]

5312

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5313

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5314

mdata = self._extract_tab_endpoint(

5315

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5316

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5317

get_all=False, expected_type=str)

5318

if not murl:

5319

raise ExtractorError('Failed to resolve album to playlist')

5320

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5321

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5322

pre = f'https://www.youtube.com/channel/{item_id}'

5323

5324

original_tab_name = tab

5325

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5326

# Home URLs should redirect to /videos/

5327

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5328

'To download only the videos in the home page, add a "/featured" to the URL')

5329

tab = '/videos'

5330

5331

url = ''.join((pre, tab, post))

5332

mobj = get_mobj(url)

5333

5334

# Handle both video/playlist URLs

5335

qs = parse_qs(url)

5336

video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))

5337

5338

if not video_id and mobj['not_channel'].startswith('watch'):

5339

if not playlist_id:

5340

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5341

raise ExtractorError('Unable to recognize tab page')

5342

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5343

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5344

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5345

mobj = get_mobj(url)

5346

5347

if video_id and playlist_id:

5348

if self.get_param('noplaylist'):

5349

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5350

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5351

ie=YoutubeIE.ie_key(), video_id=video_id)

5352

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5353

5354

data, ytcfg = self._extract_data(url, item_id)

5355

5356

# YouTube may provide a non-standard redirect to the regional channel

5357

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5358

redirect_url = traverse_obj(

5359

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5360

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5361

redirect_url = ''.join((

5362

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5363

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5364

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5365

5366

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5367

if tabs:

5368

selected_tab = self._extract_selected_tab(tabs)

5369

selected_tab_name = selected_tab.get('title', '').lower()

5370

if selected_tab_name == 'home':

5371

selected_tab_name = 'featured'

5372

requested_tab_name = mobj['tab'][1:]

5373

if 'no-youtube-channel-redirect' not in compat_opts:

5374

if requested_tab_name == 'live':

5375

# Live tab should have redirected to the video

5376

raise ExtractorError('The channel is not currently live', expected=True)

5377

if requested_tab_name not in ('', selected_tab_name):

5378

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5379

if not original_tab_name:

5380

if item_id[:2] == 'UC':

5381

# Topic channels don't have /videos. Use the equivalent playlist instead

5382

pl_id = f'UU{item_id[2:]}'

5383

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5384

try:

5385

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5386

except ExtractorError:

5387

redirect_warning += ' and the playlist redirect gave error'

5388

else:

5389

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5390

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5391

if selected_tab_name and selected_tab_name != requested_tab_name:

5392

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5393

else:

5394

raise ExtractorError(redirect_warning, expected=True)

5395

5396

if redirect_warning:

5397

self.to_screen(redirect_warning)

5398

self.write_debug(f'Final URL: {url}')

5399

5400

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5401

if 'no-youtube-unavailable-videos' not in compat_opts:

5402

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5403

self._extract_and_report_alerts(data, only_once=True)

5404

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5405

if tabs:

5406

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5407

5408

playlist = traverse_obj(

5409

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5410

if playlist:

5411

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5412

5413

video_id = traverse_obj(

5414

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5415

if video_id:

5416

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5417

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5418

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5419

ie=YoutubeIE.ie_key(), video_id=video_id)

5420

5421

raise ExtractorError('Unable to recognize tab page')

5422

5423

5424

class YoutubePlaylistIE(InfoExtractor):

5425

IE_DESC = 'YouTube playlists'

5426

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5437

)''' % {

5438

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5439

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5440

}

5441

IE_NAME = 'youtube:playlist'

5442

_TESTS = [{

5443

'note': 'issue #673',

5444

'url': 'PLBB231211A4F62143',

5445

'info_dict': {

5446

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5447

'id': 'PLBB231211A4F62143',

5448

'uploader': 'Wickman',

5449

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5450

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5451

'view_count': int,

5452

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5453

'modified_date': r're:\d{8}',

5454

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5455

'channel': 'Wickman',

5456

'tags': [],

5457

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5458

},

5459

'playlist_mincount': 29,

5460

}, {

5461

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5462

'info_dict': {

5463

'title': 'YDL_safe_search',

5464

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5465

},

5466

'playlist_count': 2,

5467

'skip': 'This playlist is private',

5468

}, {

5469

'note': 'embedded',

5470

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5475

'uploader': 'milan',

5476

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5477

'description': '',

5478

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5479

'tags': [],

5480

'modified_date': '20140919',

5481

'view_count': int,

5482

'channel': 'milan',

5483

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5484

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5485

},

5486

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5487

}, {

5488

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5489

'playlist_mincount': 654,

5490

'info_dict': {

5491

'title': '2018 Chinese New Singles (11/6 updated)',

5492

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5493

'uploader': 'LBK',

5494

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5495

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5496

'channel': 'LBK',

5497

'view_count': int,

5498

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5499

'tags': [],

5500

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5501

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5502

'modified_date': r're:\d{8}',

5503

},

5504

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5505

}, {

5506

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5507

'only_matching': True,

5508

}, {

5509

# music album playlist

5510

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5511

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5516

if YoutubeTabIE.suitable(url):

5517

return False

5518

from ..utils import parse_qs

5519

qs = parse_qs(url)

5520

if qs.get('v', [None])[0]:

5521

return False

5522

return super().suitable(url)

5523

5524

def _real_extract(self, url):

5525

playlist_id = self._match_id(url)

5526

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5527

url = update_url_query(

5528

'https://www.youtube.com/playlist',

5529

parse_qs(url) or {'list': playlist_id})

5530

if is_music_url:

5531

url = smuggle_url(url, {'is_music_url': True})

5532

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5533

5534

5535

class YoutubeYtBeIE(InfoExtractor):

5536

IE_DESC = 'youtu.be'

5537

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5538

_TESTS = [{

5539

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5544

'uploader': 'Backus-Page House Museum',

5545

'uploader_id': 'backuspagemuseum',

5546

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5547

'upload_date': '20161008',

5548

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5549

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5554

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5555

'channel': 'Backus-Page House Museum',

5556

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5557

'live_status': 'not_live',

5558

'view_count': int,

5559

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5560

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5566

},

5567

}, {

5568

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5569

'only_matching': True,

5570

}]

5571

5572

def _real_extract(self, url):

5573

mobj = self._match_valid_url(url)

5574

video_id = mobj.group('id')

5575

playlist_id = mobj.group('playlist_id')

5576

return self.url_result(

5577

update_url_query('https://www.youtube.com/watch', {

5578

'v': video_id,

5579

'list': playlist_id,

5580

'feature': 'youtu.be',

5581

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5582

5583

5584

class YoutubeLivestreamEmbedIE(InfoExtractor):

5585

IE_DESC = 'YouTube livestream embeds'

5586

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5587

_TESTS = [{

5588

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5589

'only_matching': True,

5590

}]

5591

5592

def _real_extract(self, url):

5593

channel_id = self._match_id(url)

5594

return self.url_result(

5595

f'https://www.youtube.com/channel/{channel_id}/live',

5596

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5597

5598

5599

class YoutubeYtUserIE(InfoExtractor):

5600

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5601

IE_NAME = 'youtube:user'

5602

_VALID_URL = r'ytuser:(?P<id>.+)'

5603

_TESTS = [{

5604

'url': 'ytuser:phihag',

5605

'only_matching': True,

5606

}]

5607

5608

def _real_extract(self, url):

5609

user_id = self._match_id(url)

5610

return self.url_result(

5611

'https://www.youtube.com/user/%s/videos' % user_id,

5612

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5613

5614

5615

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5616

IE_NAME = 'youtube:favorites'

5617

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5618

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5619

_LOGIN_REQUIRED = True

5620

_TESTS = [{

5621

'url': ':ytfav',

5622

'only_matching': True,

5623

}, {

5624

'url': ':ytfavorites',

5625

'only_matching': True,

5626

}]

5627

5628

def _real_extract(self, url):

5629

return self.url_result(

5630

'https://www.youtube.com/playlist?list=LL',

5631

ie=YoutubeTabIE.ie_key())

5632

5633

5634

class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):

5635

IE_NAME = 'youtube:notif'

5636

IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'

5637

_VALID_URL = r':ytnotif(?:ication)?s?'

5638

_LOGIN_REQUIRED = True

5639

_TESTS = [{

5640

'url': ':ytnotif',

5641

'only_matching': True,

5642

}, {

5643

'url': ':ytnotifications',

5644

'only_matching': True,

5645

}]

5646

5647

def _extract_notification_menu(self, response, continuation_list):

5648

notification_list = traverse_obj(

5649

response,

5650

('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),

5651

('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),

5652

expected_type=list) or []

5653

continuation_list[0] = None

5654

for item in notification_list:

5655

entry = self._extract_notification_renderer(item.get('notificationRenderer'))

5656

if entry:

5657

yield entry

5658

continuation = item.get('continuationItemRenderer')

5659

if continuation:

5660

continuation_list[0] = continuation

5661

5662

def _extract_notification_renderer(self, notification):

5663

video_id = traverse_obj(

5664

notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)

5665

url = f'https://www.youtube.com/watch?v={video_id}'

5666

channel_id = None

5667

if not video_id:

5668

browse_ep = traverse_obj(

5669

notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)

5670

channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)

5671

post_id = self._search_regex(

5672

r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),

5673

'post id', default=None)

5674

if not channel_id or not post_id:

5675

return

5676

# The direct /post url redirects to this in the browser

5677

url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'

5678

5679

channel = traverse_obj(

5680

notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),

5681

expected_type=str)

5682

notification_title = self._get_text(notification, 'shortMessage')

5683

if notification_title:

5684

notification_title = notification_title.replace('\xad', '') # remove soft hyphens

5685

# TODO: handle recommended videos

5686

title = self._search_regex(

5687

rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,

5688

'video title', default=None)

5689

upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')

5690

if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())

else None)

return {

'_type': 'url',

'url': url,

'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),

5696

'video_id': video_id,

5697

'title': title,

5698

'channel_id': channel_id,

5699

'channel': channel,

5700

'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),

5701

'upload_date': upload_date,

5702

}

5703

5704

def _notification_menu_entries(self, ytcfg):

5705

continuation_list = [None]

5706

response = None

5707

for page in itertools.count(1):

5708

ctoken = traverse_obj(

5709

continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)

5710

response = self._extract_response(

5711

item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,

5712

ep='notification/get_notification_menu', check_get_keys='actions',

5713

headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))

5714

yield from self._extract_notification_menu(response, continuation_list)

5715

if not continuation_list[0]:

5716

break

5717

5718

def _real_extract(self, url):

5719

display_id = 'notifications'

5720

ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}

5721

self._report_playlist_authcheck(ytcfg)

5722

return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)

5723

5724

5725

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5726

IE_DESC = 'YouTube search'

5727

IE_NAME = 'youtube:search'

5728

_SEARCH_KEY = 'ytsearch'

5729

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5730

_TESTS = [{

5731

'url': 'ytsearch5:youtube-dl test video',

5732

'playlist_count': 5,

5733

'info_dict': {

5734

'id': 'youtube-dl test video',

5735

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5741

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5742

_SEARCH_KEY = 'ytsearchdate'

5743

IE_DESC = 'YouTube search, newest videos first'

5744

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5745

_TESTS = [{

5746

'url': 'ytsearchdate5:youtube-dl test video',

5747

'playlist_count': 5,

5748

'info_dict': {

5749

'id': 'youtube-dl test video',

5750

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5756

IE_DESC = 'YouTube search URLs with sorting and filter support'

5757

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5758

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5759

_TESTS = [{

5760

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5761

'playlist_mincount': 5,

5762

'info_dict': {

5763

'id': 'youtube-dl test video',

5764

'title': 'youtube-dl test video',

5765

}

5766

}, {

5767

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5768

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5775

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5786

'only_matching': True,

5787

}]

5788

5789

def _real_extract(self, url):

5790

qs = parse_qs(url)

5791

query = (qs.get('search_query') or qs.get('q'))[0]

5792

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5793

5794

5795

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5796

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5797

IE_NAME = 'youtube:music:search_url'

5798

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5799

_TESTS = [{

5800

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5801

'playlist_count': 16,

5802

'info_dict': {

5803

'id': 'royalty free music',

5804

'title': 'royalty free music',

5805

}

5806

}, {

5807

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5808

'playlist_mincount': 30,

5809

'info_dict': {

5810

'id': 'royalty free music - songs',

5811

'title': 'royalty free music - songs',

5812

},

5813

'params': {'extract_flat': 'in_playlist'}

5814

}, {

5815

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5816

'playlist_mincount': 30,

5817

'info_dict': {

5818

'id': 'royalty free music - community playlists',

5819

'title': 'royalty free music - community playlists',

5820

},

5821

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5826

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5827

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5828

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5829

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5830

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5831

}

5832

5833

def _real_extract(self, url):

5834

qs = parse_qs(url)

5835

query = (qs.get('search_query') or qs.get('q'))[0]

5836

params = qs.get('sp', (None,))[0]

5837

if params:

5838

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5839

else:

5840

section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()

5841

params = self._SECTIONS.get(section)

5842

if not params:

5843

section = None

5844

title = join_nonempty(query, section, delim=' - ')

5845

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5846

5847

5848

class YoutubeFeedsInfoExtractor(InfoExtractor):

5849

"""

5850

Base class for feed extractors

5851

Subclasses must re-define the _FEED_NAME property.

5852

"""

5853

_LOGIN_REQUIRED = True

5854

_FEED_NAME = 'feeds'

5855

5856

def _real_initialize(self):

5857

YoutubeBaseInfoExtractor._check_login_required(self)

@classproperty

def IE_NAME(self):

return f'youtube:{self._FEED_NAME}'

5862

5863

def _real_extract(self, url):

5864

return self.url_result(

5865

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5866

5867

5868

class YoutubeWatchLaterIE(InfoExtractor):

5869

IE_NAME = 'youtube:watchlater'

5870

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5871

_VALID_URL = r':ytwatchlater'

5872

_TESTS = [{

5873

'url': ':ytwatchlater',

5874

'only_matching': True,

5875

}]

5876

5877

def _real_extract(self, url):

5878

return self.url_result(

5879

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5880

5881

5882

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5883

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5884

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5885

_FEED_NAME = 'recommended'

5886

_LOGIN_REQUIRED = False

5887

_TESTS = [{

5888

'url': ':ytrec',

5889

'only_matching': True,

5890

}, {

5891

'url': ':ytrecommended',

5892

'only_matching': True,

5893

}, {

5894

'url': 'https://youtube.com',

5895

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5900

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5901

_VALID_URL = r':ytsub(?:scription)?s?'

5902

_FEED_NAME = 'subscriptions'

5903

_TESTS = [{

5904

'url': ':ytsubs',

5905

'only_matching': True,

5906

}, {

5907

'url': ':ytsubscriptions',

5908

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5913

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5914

_VALID_URL = r':ythis(?:tory)?'

5915

_FEED_NAME = 'history'

5916

_TESTS = [{

5917

'url': ':ythistory',

5918

'only_matching': True,

}]

class YoutubeStoriesIE(InfoExtractor):

5923

IE_DESC = 'YouTube channel stories; "ytstories:" prefix'

5924

IE_NAME = 'youtube:stories'

5925

_VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'

5926

_TESTS = [{

5927

'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',

5928

'only_matching': True,

5929

}]

5930

5931

def _real_extract(self, url):

5932

playlist_id = f'RLTD{self._match_id(url)}'

5933

return self.url_result(

5934

f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',

5935

ie=YoutubeTabIE, video_id=playlist_id)

5936

5937

5938

class YoutubeTruncatedURLIE(InfoExtractor):

5939

IE_NAME = 'youtube:truncated_url'

5940

IE_DESC = False # Do not list

5941

_VALID_URL = r'''(?x)

5942

(?:https?://)?

5943

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5944

(?:watch\?(?:

5945

feature=[a-z_]+|

5946

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5959

'only_matching': True,

5960

}, {

5961

'url': 'https://www.youtube.com/watch?',

5962

'only_matching': True,

5963

}, {

5964

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5965

'only_matching': True,

5966

}, {

5967

'url': 'https://www.youtube.com/watch?feature=foo',

5968

'only_matching': True,

5969

}, {

5970

'url': 'https://www.youtube.com/watch?hl=en-GB',

5971

'only_matching': True,

5972

}, {

5973

'url': 'https://www.youtube.com/watch?t=2372',

5974

'only_matching': True,

5975

}]

5976

5977

def _real_extract(self, url):

5978

raise ExtractorError(

5979

'Did you forget to quote the URL? Remember that & is a meta '

5980

'character in most shells, so you want to put the URL in quotes, '

5981

'like youtube-dl '

5982

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5983

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(YoutubeTabBaseInfoExtractor):

5988

IE_NAME = 'youtube:clip'

5989

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'

5990

_TESTS = [{

5991

# FIXME: Other metadata should be extracted from the clip, not from the base video

5992

'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',

5993

'info_dict': {

5994

'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',

5995

'ext': 'mp4',

5996

'section_start': 29.0,

'section_end': 39.7,

'duration': 10.7,

}

}]

def _real_extract(self, url):

6003

clip_id = self._match_id(url)

6004

_, data = self._extract_webpage(url, clip_id)

6005

6006

video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))

6007

if not video_id:

6008

raise ExtractorError('Unable to find video ID')

6009

6010

clip_data = traverse_obj(data, (

6011

'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',

6012

'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,

6013

'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',

6014

'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)

6015

6016

return {

6017

'_type': 'url_transparent',

6018

'url': f'https://www.youtube.com/watch?v={video_id}',

6019

'ie_key': YoutubeIE.ie_key(),

6020

'id': clip_id,

6021

'section_start': int(clip_data['startTimeMs']) / 1000,

6022

'section_end': int(clip_data['endTimeMs']) / 1000,

}

class YoutubeTruncatedIDIE(InfoExtractor):

6027

IE_NAME = 'youtube:truncated_id'

6028

IE_DESC = False # Do not list

6029

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

6030

6031

_TESTS = [{

6032

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

6033

'only_matching': True,

6034

}]

6035

6036

def _real_extract(self, url):

6037

video_id = self._match_id(url)

6038

raise ExtractorError(

6039

f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',

6040

expected=True)