jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import calendar
	6	import copy
	7	import datetime
	8	import functools
	9	import hashlib
	10	import itertools
	11	import json
	12	import math
	13	import os.path
	14	import random
	15	import re
	16	import sys
	17	import time
	18	import traceback
	19	import threading
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from ..compat import (
	23	compat_chr,
	24	compat_HTTPError,
	25	compat_parse_qs,
	26	compat_str,
	27	compat_urllib_parse_unquote_plus,
	28	compat_urllib_parse_urlencode,
	29	compat_urllib_parse_urlparse,
	30	compat_urlparse,
	31	)
	32	from ..jsinterp import JSInterpreter
	33	from ..utils import (
	34	bug_reports_message,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	ExtractorError,
	40	float_or_none,
	41	format_field,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	js_to_json,
	46	mimetype2ext,
	47	network_exceptions,
	48	NO_DEFAULT,
	49	orderedSet,
	50	parse_codecs,
	51	parse_count,
	52	parse_duration,
	53	parse_iso8601,
	54	parse_qs,
	55	qualities,
	56	remove_end,
	57	remove_start,
	58	smuggle_url,
	59	str_or_none,
	60	str_to_int,
	61	strftime_or_none,
	62	traverse_obj,
	63	try_get,
	64	unescapeHTML,
	65	unified_strdate,
	66	unified_timestamp,
	67	unsmuggle_url,
	68	update_url_query,
	69	url_or_none,
	70	urljoin,
	71	variadic,
	72	)
	73
	74
	75	def get_first(obj, keys, **kwargs):
	76	return traverse_obj(obj, (..., variadic(keys)), *kwargs, get_all=False)
	77
	78
	79	# any clients starting with _ cannot be explicity requested by the user
	80	INNERTUBE_CLIENTS = {
	81	'web': {
	82	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	83	'INNERTUBE_CONTEXT': {
	84	'client': {
	85	'clientName': 'WEB',
	86	'clientVersion': '2.20211221.00.00',
	87	}
	88	},
	89	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	90	},
	91	'web_embedded': {
	92	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	93	'INNERTUBE_CONTEXT': {
	94	'client': {
	95	'clientName': 'WEB_EMBEDDED_PLAYER',
	96	'clientVersion': '1.20211215.00.01',
	97	},
	98	},
	99	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	100	},
	101	'web_music': {
	102	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	103	'INNERTUBE_HOST': 'music.youtube.com',
	104	'INNERTUBE_CONTEXT': {
	105	'client': {
	106	'clientName': 'WEB_REMIX',
	107	'clientVersion': '1.20211213.00.00',
	108	}
	109	},
	110	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	111	},
	112	'web_creator': {
	113	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	114	'INNERTUBE_CONTEXT': {
	115	'client': {
	116	'clientName': 'WEB_CREATOR',
	117	'clientVersion': '1.20211220.02.00',
	118	}
	119	},
	120	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	121	},
	122	'android': {
	123	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	124	'INNERTUBE_CONTEXT': {
	125	'client': {
	126	'clientName': 'ANDROID',
	127	'clientVersion': '16.49',
	128	}
	129	},
	130	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	131	'REQUIRE_JS_PLAYER': False
	132	},
	133	'android_embedded': {
	134	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	135	'INNERTUBE_CONTEXT': {
	136	'client': {
	137	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	138	'clientVersion': '16.49',
	139	},
	140	},
	141	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	142	'REQUIRE_JS_PLAYER': False
	143	},
	144	'android_music': {
	145	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	146	'INNERTUBE_CONTEXT': {
	147	'client': {
	148	'clientName': 'ANDROID_MUSIC',
	149	'clientVersion': '4.57',
	150	}
	151	},
	152	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	153	'REQUIRE_JS_PLAYER': False
	154	},
	155	'android_creator': {
	156	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	157	'INNERTUBE_CONTEXT': {
	158	'client': {
	159	'clientName': 'ANDROID_CREATOR',
	160	'clientVersion': '21.47',
	161	},
	162	},
	163	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	164	'REQUIRE_JS_PLAYER': False
	165	},
	166	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	167	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	168	'ios': {
	169	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	170	'INNERTUBE_CONTEXT': {
	171	'client': {
	172	'clientName': 'IOS',
	173	'clientVersion': '16.46',
	174	'deviceModel': 'iPhone14,3',
	175	}
	176	},
	177	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	178	'REQUIRE_JS_PLAYER': False
	179	},
	180	'ios_embedded': {
	181	'INNERTUBE_CONTEXT': {
	182	'client': {
	183	'clientName': 'IOS_MESSAGES_EXTENSION',
	184	'clientVersion': '16.46',
	185	'deviceModel': 'iPhone14,3',
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_music': {
	192	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	193	'INNERTUBE_CONTEXT': {
	194	'client': {
	195	'clientName': 'IOS_MUSIC',
	196	'clientVersion': '4.57',
	197	},
	198	},
	199	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	200	'REQUIRE_JS_PLAYER': False
	201	},
	202	'ios_creator': {
	203	'INNERTUBE_CONTEXT': {
	204	'client': {
	205	'clientName': 'IOS_CREATOR',
	206	'clientVersion': '21.47',
	207	},
	208	},
	209	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	210	'REQUIRE_JS_PLAYER': False
	211	},
	212	# mweb has 'ultralow' formats
	213	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	214	'mweb': {
	215	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	216	'INNERTUBE_CONTEXT': {
	217	'client': {
	218	'clientName': 'MWEB',
	219	'clientVersion': '2.20211221.01.00',
	220	}
	221	},
	222	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	223	}
	224	}
	225
	226
	227	def build_innertube_clients():
	228	THIRD_PARTY = {
	229	'embedUrl': 'https://google.com', # Can be any valid URL
	230	}
	231	BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')
	232	priority = qualities(BASE_CLIENTS[::-1])
	233
	234	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	235	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	236	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	237	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	238	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	239
	240	base_client, *variant = client.split('_')
	241	ytcfg['priority'] = 10 * priority(base_client)
	242
	243	if not variant:
	244	INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
	245	agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	246	agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	247	agegate_ytcfg['priority'] -= 1
	248	elif variant == ['embedded']:
	249	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	250	ytcfg['priority'] -= 2
	251	else:
	252	ytcfg['priority'] -= 3
	253
	254
	255	build_innertube_clients()
	256
	257
	258	class YoutubeBaseInfoExtractor(InfoExtractor):
	259	"""Provide base functions for Youtube extractors"""
	260
	261	_RESERVED_NAMES = (
	262	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	263	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	264	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	265	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	266
	267	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	268
	269	_NETRC_MACHINE = 'youtube'
	270
	271	# If True it will raise an error if no login info is provided
	272	_LOGIN_REQUIRED = False
	273
	274	_INVIDIOUS_SITES = (
	275	# invidious-redirect websites
	276	r'(?:www\.)?redirect\.invidious\.io',
	277	r'(?:(?:www\|dev)\.)?invidio\.us',
	278	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
	279	r'(?:www\.)?invidious\.pussthecat\.org',
	280	r'(?:www\.)?invidious\.zee\.li',
	281	r'(?:www\.)?invidious\.ethibox\.fr',
	282	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	283	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	284	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	285	# youtube-dl invidious instances list
	286	r'(?:(?:www\|no)\.)?invidiou\.sh',
	287	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	288	r'(?:www\.)?invidious\.kabi\.tk',
	289	r'(?:www\.)?invidious\.mastodon\.host',
	290	r'(?:www\.)?invidious\.zapashcanon\.fr',
	291	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	292	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	293	r'(?:www\.)?invidious\.himiko\.cloud',
	294	r'(?:www\.)?invidious\.reallyancient\.tech',
	295	r'(?:www\.)?invidious\.tube',
	296	r'(?:www\.)?invidiou\.site',
	297	r'(?:www\.)?invidious\.site',
	298	r'(?:www\.)?invidious\.xyz',
	299	r'(?:www\.)?invidious\.nixnet\.xyz',
	300	r'(?:www\.)?invidious\.048596\.xyz',
	301	r'(?:www\.)?invidious\.drycat\.fr',
	302	r'(?:www\.)?inv\.skyn3t\.in',
	303	r'(?:www\.)?tube\.poal\.co',
	304	r'(?:www\.)?tube\.connect\.cafe',
	305	r'(?:www\.)?vid\.wxzm\.sx',
	306	r'(?:www\.)?vid\.mint\.lgbt',
	307	r'(?:www\.)?vid\.puffyan\.us',
	308	r'(?:www\.)?yewtu\.be',
	309	r'(?:www\.)?yt\.elukerio\.org',
	310	r'(?:www\.)?yt\.lelux\.fi',
	311	r'(?:www\.)?invidious\.ggc-project\.de',
	312	r'(?:www\.)?yt\.maisputain\.ovh',
	313	r'(?:www\.)?ytprivate\.com',
	314	r'(?:www\.)?invidious\.13ad\.de',
	315	r'(?:www\.)?invidious\.toot\.koeln',
	316	r'(?:www\.)?invidious\.fdn\.fr',
	317	r'(?:www\.)?watch\.nettohikari\.com',
	318	r'(?:www\.)?invidious\.namazso\.eu',
	319	r'(?:www\.)?invidious\.silkky\.cloud',
	320	r'(?:www\.)?invidious\.exonip\.de',
	321	r'(?:www\.)?invidious\.riverside\.rocks',
	322	r'(?:www\.)?invidious\.blamefran\.net',
	323	r'(?:www\.)?invidious\.moomoo\.de',
	324	r'(?:www\.)?ytb\.trom\.tf',
	325	r'(?:www\.)?yt\.cyberhost\.uk',
	326	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	327	r'(?:www\.)?qklhadlycap4cnod\.onion',
	328	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	329	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	330	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	331	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	332	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	333	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	334	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	335	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	336	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	337	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	338	)
	339
	340	def _login(self):
	341	"""
	342	Attempt to log in to YouTube.
	343	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	344	"""
	345
	346	if (self._LOGIN_REQUIRED
	347	and self.get_param('cookiefile') is None
	348	and self.get_param('cookiesfrombrowser') is None):
	349	self.raise_login_required(
	350	'Login details are needed to download this content', method='cookies')
	351	username, password = self._get_login_info()
	352	if username:
	353	self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
	354
	355	def _initialize_consent(self):
	356	cookies = self._get_cookies('https://www.youtube.com/')
	357	if cookies.get('__Secure-3PSID'):
	358	return
	359	consent_id = None
	360	consent = cookies.get('CONSENT')
	361	if consent:
	362	if 'YES' in consent.value:
	363	return
	364	consent_id = self._search_regex(
	365	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	366	if not consent_id:
	367	consent_id = random.randint(100, 999)
	368	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	369
	370	def _initialize_pref(self):
	371	cookies = self._get_cookies('https://www.youtube.com/')
	372	pref_cookie = cookies.get('PREF')
	373	pref = {}
	374	if pref_cookie:
	375	try:
	376	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	377	except ValueError:
	378	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	379	pref.update({'hl': 'en', 'tz': 'UTC'})
	380	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	381
	382	def _real_initialize(self):
	383	self._initialize_pref()
	384	self._initialize_consent()
	385	self._login()
	386
	387	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	388	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	389	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	390
	391	def _get_default_ytcfg(self, client='web'):
	392	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	393
	394	def _get_innertube_host(self, client='web'):
	395	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	396
	397	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	398	# try_get but with fallback to default ytcfg client values when present
	399	_func = lambda y: try_get(y, getter, expected_type)
	400	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	401
	402	def _extract_client_name(self, ytcfg, default_client='web'):
	403	return self._ytcfg_get_safe(
	404	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	405	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	406
	407	def _extract_client_version(self, ytcfg, default_client='web'):
	408	return self._ytcfg_get_safe(
	409	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	410	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	411
	412	def _extract_api_key(self, ytcfg=None, default_client='web'):
	413	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	414
	415	def _extract_context(self, ytcfg=None, default_client='web'):
	416	context = get_first(
	417	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	418	# Enforce language and tz for extraction
	419	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	420	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	421	return context
	422
	423	_SAPISID = None
	424
	425	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	426	time_now = round(time.time())
	427	if self._SAPISID is None:
	428	yt_cookies = self._get_cookies('https://www.youtube.com')
	429	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	430	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	431	sapisid_cookie = dict_get(
	432	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	433	if sapisid_cookie and sapisid_cookie.value:
	434	self._SAPISID = sapisid_cookie.value
	435	self.write_debug('Extracted SAPISID cookie')
	436	# SAPISID cookie is required if not already present
	437	if not yt_cookies.get('SAPISID'):
	438	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	439	self._set_cookie(
	440	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	441	else:
	442	self._SAPISID = False
	443	if not self._SAPISID:
	444	return None
	445	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	446	sapisidhash = hashlib.sha1(
	447	f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
	448	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	449
	450	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	451	note='Downloading API JSON', errnote='Unable to download API page',
	452	context=None, api_key=None, api_hostname=None, default_client='web'):
	453
	454	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	455	data.update(query)
	456	real_headers = self.generate_api_headers(default_client=default_client)
	457	real_headers.update({'content-type': 'application/json'})
	458	if headers:
	459	real_headers.update(headers)
	460	return self._download_json(
	461	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	462	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	463	data=json.dumps(data).encode('utf8'), headers=real_headers,
	464	query={'key': api_key or self._extract_api_key()})
	465
	466	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	467	data = self._search_regex(
	468	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	469	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	470	if data:
	471	return self._parse_json(data, item_id, fatal=fatal)
	472
	473	@staticmethod
	474	def _extract_session_index(*data):
	475	"""
	476	Index of current account in account list.
	477	See: https://github.com/yt-dlp/yt-dlp/pull/519
	478	"""
	479	for ytcfg in data:
	480	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	481	if session_index is not None:
	482	return session_index
	483
	484	# Deprecated?
	485	def _extract_identity_token(self, ytcfg=None, webpage=None):
	486	if ytcfg:
	487	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	488	if token:
	489	return token
	490	if webpage:
	491	return self._search_regex(
	492	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	493	'identity token', default=None, fatal=False)
	494
	495	@staticmethod
	496	def _extract_account_syncid(*args):
	497	"""
	498	Extract syncId required to download private playlists of secondary channels
	499	@params response and/or ytcfg
	500	"""

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import time

import traceback

import threading

from .common import InfoExtractor, SearchInfoExtractor

22

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

28

compat_urllib_parse_urlencode,

29

compat_urllib_parse_urlparse,

30

compat_urlparse,

31

)

32

from ..jsinterp import JSInterpreter

33

from ..utils import (

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

format_field,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

NO_DEFAULT,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

def get_first(obj, keys, **kwargs):

76

return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)

77

78

79

# any clients starting with _ cannot be explicity requested by the user

80

INNERTUBE_CLIENTS = {

81

'web': {

82

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

83

'INNERTUBE_CONTEXT': {

84

'client': {

85

'clientName': 'WEB',

86

'clientVersion': '2.20211221.00.00',

87

}

88

},

89

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

90

},

91

'web_embedded': {

92

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

93

'INNERTUBE_CONTEXT': {

94

'client': {

95

'clientName': 'WEB_EMBEDDED_PLAYER',

96

'clientVersion': '1.20211215.00.01',

97

},

98

},

99

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

100

},

101

'web_music': {

102

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

103

'INNERTUBE_HOST': 'music.youtube.com',

104

'INNERTUBE_CONTEXT': {

105

'client': {

106

'clientName': 'WEB_REMIX',

107

'clientVersion': '1.20211213.00.00',

108

}

109

},

110

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

111

},

112

'web_creator': {

113

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

114

'INNERTUBE_CONTEXT': {

115

'client': {

116

'clientName': 'WEB_CREATOR',

117

'clientVersion': '1.20211220.02.00',

118

}

119

},

120

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

121

},

122

'android': {

123

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

124

'INNERTUBE_CONTEXT': {

125

'client': {

126

'clientName': 'ANDROID',

127

'clientVersion': '16.49',

128

}

129

},

130

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

131

'REQUIRE_JS_PLAYER': False

132

},

133

'android_embedded': {

134

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

135

'INNERTUBE_CONTEXT': {

136

'client': {

137

'clientName': 'ANDROID_EMBEDDED_PLAYER',

138

'clientVersion': '16.49',

139

},

140

},

141

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

142

'REQUIRE_JS_PLAYER': False

143

},

144

'android_music': {

145

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

146

'INNERTUBE_CONTEXT': {

147

'client': {

148

'clientName': 'ANDROID_MUSIC',

149

'clientVersion': '4.57',

150

}

151

},

152

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

153

'REQUIRE_JS_PLAYER': False

154

},

155

'android_creator': {

156

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

157

'INNERTUBE_CONTEXT': {

158

'client': {

159

'clientName': 'ANDROID_CREATOR',

160

'clientVersion': '21.47',

161

},

162

},

163

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

164

'REQUIRE_JS_PLAYER': False

165

},

166

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

167

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

168

'ios': {

169

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

170

'INNERTUBE_CONTEXT': {

171

'client': {

172

'clientName': 'IOS',

173

'clientVersion': '16.46',

174

'deviceModel': 'iPhone14,3',

175

}

176

},

177

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

178

'REQUIRE_JS_PLAYER': False

179

},

180

'ios_embedded': {

181

'INNERTUBE_CONTEXT': {

182

'client': {

183

'clientName': 'IOS_MESSAGES_EXTENSION',

184

'clientVersion': '16.46',

185

'deviceModel': 'iPhone14,3',

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_music': {

192

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

193

'INNERTUBE_CONTEXT': {

194

'client': {

195

'clientName': 'IOS_MUSIC',

196

'clientVersion': '4.57',

197

},

198

},

199

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

200

'REQUIRE_JS_PLAYER': False

201

},

202

'ios_creator': {

203

'INNERTUBE_CONTEXT': {

204

'client': {

205

'clientName': 'IOS_CREATOR',

206

'clientVersion': '21.47',

207

},

208

},

209

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

210

'REQUIRE_JS_PLAYER': False

211

},

212

# mweb has 'ultralow' formats

213

# See: https://github.com/yt-dlp/yt-dlp/pull/557

214

'mweb': {

215

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

216

'INNERTUBE_CONTEXT': {

217

'client': {

218

'clientName': 'MWEB',

219

'clientVersion': '2.20211221.01.00',

220

}

221

},

222

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

}

}

def build_innertube_clients():

228

THIRD_PARTY = {

229

'embedUrl': 'https://google.com', # Can be any valid URL

230

}

231

BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')

232

priority = qualities(BASE_CLIENTS[::-1])

233

234

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

235

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

236

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

237

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

238

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

239

240

base_client, *variant = client.split('_')

241

ytcfg['priority'] = 10 * priority(base_client)

242

243

if not variant:

244

INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)

245

agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

246

agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

247

agegate_ytcfg['priority'] -= 1

248

elif variant == ['embedded']:

249

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

250

ytcfg['priority'] -= 2

251

else:

252

ytcfg['priority'] -= 3

253

254

255

build_innertube_clients()

256

257

258

class YoutubeBaseInfoExtractor(InfoExtractor):

259

"""Provide base functions for Youtube extractors"""

260

261

_RESERVED_NAMES = (

262

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

268

269

_NETRC_MACHINE = 'youtube'

270

271

# If True it will raise an error if no login info is provided

272

_LOGIN_REQUIRED = False

273

274

_INVIDIOUS_SITES = (

275

# invidious-redirect websites

276

r'(?:www\.)?redirect\.invidious\.io',

277

r'(?:(?:www|dev)\.)?invidio\.us',

278

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

279

r'(?:www\.)?invidious\.pussthecat\.org',

280

r'(?:www\.)?invidious\.zee\.li',

281

r'(?:www\.)?invidious\.ethibox\.fr',

282

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

283

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

284

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

285

# youtube-dl invidious instances list

286

r'(?:(?:www|no)\.)?invidiou\.sh',

287

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

288

r'(?:www\.)?invidious\.kabi\.tk',

289

r'(?:www\.)?invidious\.mastodon\.host',

290

r'(?:www\.)?invidious\.zapashcanon\.fr',

291

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

292

r'(?:www\.)?invidious\.tinfoil-hat\.net',

293

r'(?:www\.)?invidious\.himiko\.cloud',

294

r'(?:www\.)?invidious\.reallyancient\.tech',

295

r'(?:www\.)?invidious\.tube',

296

r'(?:www\.)?invidiou\.site',

297

r'(?:www\.)?invidious\.site',

298

r'(?:www\.)?invidious\.xyz',

299

r'(?:www\.)?invidious\.nixnet\.xyz',

300

r'(?:www\.)?invidious\.048596\.xyz',

301

r'(?:www\.)?invidious\.drycat\.fr',

302

r'(?:www\.)?inv\.skyn3t\.in',

303

r'(?:www\.)?tube\.poal\.co',

304

r'(?:www\.)?tube\.connect\.cafe',

305

r'(?:www\.)?vid\.wxzm\.sx',

306

r'(?:www\.)?vid\.mint\.lgbt',

307

r'(?:www\.)?vid\.puffyan\.us',

308

r'(?:www\.)?yewtu\.be',

309

r'(?:www\.)?yt\.elukerio\.org',

310

r'(?:www\.)?yt\.lelux\.fi',

311

r'(?:www\.)?invidious\.ggc-project\.de',

312

r'(?:www\.)?yt\.maisputain\.ovh',

313

r'(?:www\.)?ytprivate\.com',

314

r'(?:www\.)?invidious\.13ad\.de',

315

r'(?:www\.)?invidious\.toot\.koeln',

316

r'(?:www\.)?invidious\.fdn\.fr',

317

r'(?:www\.)?watch\.nettohikari\.com',

318

r'(?:www\.)?invidious\.namazso\.eu',

319

r'(?:www\.)?invidious\.silkky\.cloud',

320

r'(?:www\.)?invidious\.exonip\.de',

321

r'(?:www\.)?invidious\.riverside\.rocks',

322

r'(?:www\.)?invidious\.blamefran\.net',

323

r'(?:www\.)?invidious\.moomoo\.de',

324

r'(?:www\.)?ytb\.trom\.tf',

325

r'(?:www\.)?yt\.cyberhost\.uk',

326

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

327

r'(?:www\.)?qklhadlycap4cnod\.onion',

328

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

329

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

330

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

331

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

332

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

333

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

334

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

335

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

336

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

337

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

)

def _login(self):

"""

Attempt to log in to YouTube.

343

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

344

"""

345

346

if (self._LOGIN_REQUIRED

347

and self.get_param('cookiefile') is None

348

and self.get_param('cookiesfrombrowser') is None):

349

self.raise_login_required(

350

'Login details are needed to download this content', method='cookies')

351

username, password = self._get_login_info()

352

if username:

353

self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')

354

355

def _initialize_consent(self):

356

cookies = self._get_cookies('https://www.youtube.com/')

357

if cookies.get('__Secure-3PSID'):

358

return

359

consent_id = None

360

consent = cookies.get('CONSENT')

361

if consent:

362

if 'YES' in consent.value:

363

return

364

consent_id = self._search_regex(

365

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

366

if not consent_id:

367

consent_id = random.randint(100, 999)

368

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

369

370

def _initialize_pref(self):

371

cookies = self._get_cookies('https://www.youtube.com/')

372

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

377

except ValueError:

378

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

379

pref.update({'hl': 'en', 'tz': 'UTC'})

380

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

381

382

def _real_initialize(self):

383

self._initialize_pref()

384

self._initialize_consent()

385

self._login()

386

387

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

388

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

389

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

390

391

def _get_default_ytcfg(self, client='web'):

392

return copy.deepcopy(INNERTUBE_CLIENTS[client])

393

394

def _get_innertube_host(self, client='web'):

395

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

396

397

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

398

# try_get but with fallback to default ytcfg client values when present

399

_func = lambda y: try_get(y, getter, expected_type)

400

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

401

402

def _extract_client_name(self, ytcfg, default_client='web'):

403

return self._ytcfg_get_safe(

404

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

405

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

406

407

def _extract_client_version(self, ytcfg, default_client='web'):

408

return self._ytcfg_get_safe(

409

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

410

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

411

412

def _extract_api_key(self, ytcfg=None, default_client='web'):

413

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

414

415

def _extract_context(self, ytcfg=None, default_client='web'):

416

context = get_first(

417

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

418

# Enforce language and tz for extraction

419

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

420

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

426

time_now = round(time.time())

427

if self._SAPISID is None:

428

yt_cookies = self._get_cookies('https://www.youtube.com')

429

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

430

# See: https://github.com/yt-dlp/yt-dlp/issues/393

431

sapisid_cookie = dict_get(

432

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

433

if sapisid_cookie and sapisid_cookie.value:

434

self._SAPISID = sapisid_cookie.value

435

self.write_debug('Extracted SAPISID cookie')

436

# SAPISID cookie is required if not already present

437

if not yt_cookies.get('SAPISID'):

438

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

439

self._set_cookie(

440

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

441

else:

442

self._SAPISID = False

443

if not self._SAPISID:

444

return None

445

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

446

sapisidhash = hashlib.sha1(

447

f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()

448

return f'SAPISIDHASH {time_now}_{sapisidhash}'

449

450

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

451

note='Downloading API JSON', errnote='Unable to download API page',

452

context=None, api_key=None, api_hostname=None, default_client='web'):

453

454

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

455

data.update(query)

456

real_headers = self.generate_api_headers(default_client=default_client)

457

real_headers.update({'content-type': 'application/json'})

458

if headers:

459

real_headers.update(headers)

460

return self._download_json(

461

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

462

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

463

data=json.dumps(data).encode('utf8'), headers=real_headers,

464

query={'key': api_key or self._extract_api_key()})

465

466

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

467

data = self._search_regex(

468

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

469

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

470

if data:

471

return self._parse_json(data, item_id, fatal=fatal)

472

473

@staticmethod

474

def _extract_session_index(*data):

475

"""

476

Index of current account in account list.

477

See: https://github.com/yt-dlp/yt-dlp/pull/519

478

"""

479

for ytcfg in data:

480

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

481

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

486

if ytcfg:

487

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

492

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

493

'identity token', default=None, fatal=False)

494

495

@staticmethod

496

def _extract_account_syncid(*args):

497

"""

498

Extract syncId required to download private playlists of secondary channels

499

@params response and/or ytcfg

500

"""

501

for data in args:

502

# ytcfg includes channel_syncid if on secondary channel

503

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

508

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

509

if len(sync_ids) >= 2 and sync_ids[1]:

510

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

511

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

516

"""

517

Extracts visitorData from an API response or ytcfg

518

Appears to be used to track session state

519

"""

520

return get_first(

521

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

expected_type=str)

@property

def is_authenticated(self):

526

return bool(self._generate_sapisidhash_header())

527

528

def extract_ytcfg(self, video_id, webpage):

529

if not webpage:

530

return {}

531

return self._parse_json(

532

self._search_regex(

533

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

534

default='{}'), video_id, fatal=False) or {}

535

536

def generate_api_headers(

537

self, *, ytcfg=None, account_syncid=None, session_index=None,

538

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

539

540

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

541

headers = {

542

'X-YouTube-Client-Name': compat_str(

543

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

544

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

545

'Origin': origin,

546

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

547

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

548

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

549

}

550

if session_index is None:

551

session_index = self._extract_session_index(ytcfg)

552

if account_syncid or session_index is not None:

553

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

554

555

auth = self._generate_sapisidhash_header(origin)

556

if auth is not None:

557

headers['Authorization'] = auth

558

headers['X-Origin'] = origin

559

return {h: v for h, v in headers.items() if v is not None}

560

561

@staticmethod

562

def _build_api_continuation_query(continuation, ctp=None):

563

query = {

564

'continuation': continuation

565

}

566

# TODO: Inconsistency with clickTrackingParams.

567

# Currently we have a fixed ctp contained within context (from ytcfg)

568

# and a ctp in root query for continuation.

569

if ctp:

570

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

575

next_continuation = try_get(

576

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

577

lambda x: x['continuation']['reloadContinuationData']), dict)

578

if not next_continuation:

579

return

580

continuation = next_continuation.get('continuation')

581

if not continuation:

582

return

583

ctp = next_continuation.get('clickTrackingParams')

584

return cls._build_api_continuation_query(continuation, ctp)

585

586

@classmethod

587

def _extract_continuation_ep_data(cls, continuation_ep: dict):

588

if isinstance(continuation_ep, dict):

589

continuation = try_get(

590

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

591

if not continuation:

592

return

593

ctp = continuation_ep.get('clickTrackingParams')

594

return cls._build_api_continuation_query(continuation, ctp)

595

596

@classmethod

597

def _extract_continuation(cls, renderer):

598

next_continuation = cls._extract_next_continuation_data(renderer)

599

if next_continuation:

600

return next_continuation

601

602

contents = []

603

for key in ('contents', 'items'):

604

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

605

606

for content in contents:

607

if not isinstance(content, dict):

608

continue

609

continuation_ep = try_get(

610

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

611

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

612

dict)

613

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

619

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

620

if not isinstance(alert_dict, dict):

621

continue

622

for alert in alert_dict.values():

623

alert_type = alert.get('type')

624

if not alert_type:

625

continue

626

message = cls._get_text(alert, 'text')

627

if message:

628

yield alert_type, message

629

630

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

631

errors = []

632

warnings = []

633

for alert_type, alert_message in alerts:

634

if alert_type.lower() == 'error' and fatal:

635

errors.append([alert_type, alert_message])

636

else:

637

warnings.append([alert_type, alert_message])

638

639

for alert_type, alert_message in (warnings + errors[:-1]):

640

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)

641

if errors:

642

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

643

644

def _extract_and_report_alerts(self, data, *args, **kwargs):

645

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

646

647

def _extract_badges(self, renderer: dict):

648

badges = set()

649

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

650

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

651

if label:

652

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

657

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

662

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

663

obj = [obj]

664

for item in obj:

665

text = try_get(item, lambda x: x['simpleText'], compat_str)

666

if text:

667

return text

668

runs = try_get(item, lambda x: x['runs'], list) or []

669

if not runs and isinstance(item, list):

670

runs = item

671

672

runs = runs[:min(len(runs), max_runs or len(runs))]

673

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

678

count_text = self._get_text(data, *path_list) or ''

679

count = parse_count(count_text)

680

if count is None:

681

count = str_to_int(

682

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

687

"""

688

Extract thumbnails from thumbnails dict

689

@param path_list: path list to level that contains 'thumbnails' key

690

"""

691

thumbnails = []

692

for path in path_list or [()]:

693

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

694

thumbnail_url = url_or_none(thumbnail.get('url'))

695

if not thumbnail_url:

696

continue

697

# Sometimes youtube gives a wrong thumbnail URL. See:

698

# https://github.com/yt-dlp/yt-dlp/issues/233

699

# https://github.com/ytdl-org/youtube-dl/issues/28023

700

if 'maxresdefault' in thumbnail_url:

701

thumbnail_url = thumbnail_url.split('?')[0]

702

thumbnails.append({

703

'url': thumbnail_url,

704

'height': int_or_none(thumbnail.get('height')),

705

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

711

"""

712

Extracts a relative time from string and converts to dt object

713

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

718

if start:

719

return datetime_from_str(start)

720

try:

721

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

726

text = self._get_text(renderer, *path_list) or ''

727

dt = self.extract_relative_time(text)

728

timestamp = None

729

if isinstance(dt, datetime.datetime):

730

timestamp = calendar.timegm(dt.timetuple())

731

732

if timestamp is None:

733

timestamp = (

734

unified_timestamp(text) or unified_timestamp(

735

self._search_regex(

736

(r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'),

737

text.lower(), 'time text', default=None)))

738

739

if text and timestamp is None:

740

self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)

741

return timestamp, text

742

743

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

744

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

745

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

750

if check_get_keys is None:

751

check_get_keys = []

752

while count < retries:

753

count += 1

754

if last_error:

755

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

756

try:

757

response = self._call_api(

758

ep=ep, fatal=True, headers=headers,

759

video_id=item_id, query=query,

760

context=self._extract_context(ytcfg, default_client),

761

api_key=self._extract_api_key(ytcfg, default_client),

762

api_hostname=api_hostname, default_client=default_client,

763

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

764

except ExtractorError as e:

765

if isinstance(e.cause, network_exceptions):

766

if isinstance(e.cause, compat_HTTPError):

767

first_bytes = e.cause.read(512)

768

if not is_html(first_bytes):

769

yt_error = try_get(

770

self._parse_json(

771

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

772

lambda x: x['error']['message'], compat_str)

773

if yt_error:

774

self._report_alerts([('ERROR', yt_error)], fatal=False)

775

# Downloading page may result in intermittent 5xx HTTP error

776

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

777

# We also want to catch all other network exceptions since errors in later pages can be troublesome

778

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

779

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

780

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

792

except ExtractorError as e:

793

# YouTube servers may return errors we want to retry on in a 200 OK response

794

# See: https://github.com/yt-dlp/yt-dlp/issues/839

795

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

801

return

802

if not check_get_keys or dict_get(response, check_get_keys):

803

break

804

# Youtube sometimes sends incomplete data

805

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

806

last_error = 'Incomplete data received'

807

if count >= retries:

808

if fatal:

809

raise ExtractorError(last_error)

810

else:

811

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

817

return re.match(r'https?://music\.youtube\.com/', url) is not None

818

819

def _extract_video(self, renderer):

820

video_id = renderer.get('videoId')

821

title = self._get_text(renderer, 'title')

822

description = self._get_text(renderer, 'descriptionSnippet')

823

duration = parse_duration(self._get_text(

824

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

825

view_count = self._get_count(renderer, 'viewCountText')

826

827

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

828

channel_id = traverse_obj(

829

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)

830

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

831

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

832

overlay_style = traverse_obj(

833

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)

834

badges = self._extract_badges(renderer)

835

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

840

'id': video_id,

841

'url': f'https://www.youtube.com/watch?v={video_id}',

842

'title': title,

843

'description': description,

844

'duration': duration,

845

'view_count': view_count,

846

'uploader': uploader,

847

'channel_id': channel_id,

848

'thumbnails': thumbnails,

849

'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None,

850

'live_status': ('is_upcoming' if scheduled_timestamp is not None

851

else 'was_live' if 'streamed' in time_text.lower()

852

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

853

else None),

854

'release_timestamp': scheduled_timestamp,

855

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

860

IE_DESC = 'YouTube'

861

_VALID_URL = r"""(?x)^

862

(

863

(?:https?://|//) # http(s):// or protocol-independent URL

864

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

865

(?:www\.)?deturl\.com/www\.youtube\.com|

866

(?:www\.)?pwnyoutube\.com|

867

(?:www\.)?hooktube\.com|

868

(?:www\.)?yourepeat\.com|

869

tube\.majestyc\.net|

870

%(invidious)s|

871

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

872

(?:.*?\#/)? # handle anchor (#/) redirect urls

873

(?: # the various things that can precede the ID:

874

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

875

|(?: # or the v= param in all its forms

876

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

877

(?:\?|\#!?) # the params delimiter ? or # or #!

878

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

884

vid\.plus| # or vid.plus/xxxx

885

zwearz\.com/watch| # or zwearz.com/watch/xxxx

886

%(invidious)s

887

)/

888

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

889

)

890

)? # all until now is optional -> you can pass the naked ID

891

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

892

(?(1).+)? # if we found the ID, everything can follow

893

(?:\#|$)""" % {

894

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

895

}

896

_PLAYER_INFO_RE = (

897

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

898

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

899

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

900

)

901

_formats = {

902

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

903

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

904

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

905

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

906

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

907

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

908

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

909

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

910

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

911

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

912

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

913

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

914

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

915

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

916

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

917

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

918

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

919

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

924

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

925

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

926

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

927

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

928

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

929

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

930

931

# Apple HTTP Live Streaming

932

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

933

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

934

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

935

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

936

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

937

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

938

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

939

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

940

941

# DASH mp4 video

942

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

943

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

944

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

945

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

946

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

947

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

948

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

949

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

950

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

951

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

952

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

953

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

954

955

# Dash mp4 audio

956

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

957

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

958

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

959

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

960

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

961

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

962

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

963

964

# Dash webm

965

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

966

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

967

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

968

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

969

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

970

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

971

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

972

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

973

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

974

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

975

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

976

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

977

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

978

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

979

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

980

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

981

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

982

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

983

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

984

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

985

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

986

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

987

988

# Dash webm audio

989

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

990

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

991

992

# Dash webm audio with opus inside

993

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

994

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

995

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

996

997

# RTMP (unnamed)

998

'_rtmp': {'protocol': 'rtmp'},

999

1000

# av01 video only formats sometimes served with "unknown" codecs

1001

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1002

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1003

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1004

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1005

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1006

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1007

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1008

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1009

}

1010

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1022

'uploader': 'Philipp Hagemeister',

1023

'uploader_id': 'phihag',

1024

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1025

'channel': 'Philipp Hagemeister',

1026

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1027

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1028

'upload_date': '20121002',

1029

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1030

'categories': ['Science & Technology'],

1031

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1036

'playable_in_embed': True,

1037

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1038

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1047

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1052

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1053

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1054

'uploader': 'SET India',

1055

'uploader_id': 'setindia',

1056

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1057

'age_limit': 18,

1058

},

1059

'skip': 'Private video',

1060

},

1061

{

1062

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1063

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1068

'uploader': 'Philipp Hagemeister',

1069

'uploader_id': 'phihag',

1070

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1071

'channel': 'Philipp Hagemeister',

1072

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1073

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1074

'upload_date': '20121002',

1075

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1076

'categories': ['Science & Technology'],

1077

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1082

'playable_in_embed': True,

1083

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1084

'live_status': 'not_live',

1085

'age_limit': 0,

1086

'channel_follower_count': int

1087

},

1088

'params': {

1089

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1094

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1099

'uploader_id': '8KVIDEO',

1100

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1101

'description': '',

1102

'uploader': '8KVIDEO',

1103

'title': 'UHDTV TEST 8K VIDEO.mp4'

1104

},

1105

'params': {

1106

'youtube_include_dash_manifest': True,

1107

'format': '141',

1108

},

1109

'skip': 'format 141 not served anymore',

1110

},

1111

# DASH manifest with encrypted signature

1112

{

1113

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1118

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1119

'duration': 244,

1120

'uploader': 'AfrojackVEVO',

1121

'uploader_id': 'AfrojackVEVO',

1122

'upload_date': '20131011',

1123

'abr': 129.495,

1124

'like_count': int,

1125

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1126

'playable_in_embed': True,

1127

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1128

'view_count': int,

1129

'track': 'The Spark',

1130

'live_status': 'not_live',

1131

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1132

'channel': 'Afrojack',

1133

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1134

'tags': 'count:19',

1135

'availability': 'public',

1136

'categories': ['Music'],

1137

'age_limit': 0,

1138

'alt_title': 'The Spark',

1139

'channel_follower_count': int

1140

},

1141

'params': {

1142

'youtube_include_dash_manifest': True,

1143

'format': '141/bestaudio[ext=m4a]',

1144

},

1145

},

1146

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1147

{

1148

'note': 'Embed allowed age-gate video',

1149

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1154

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1155

'duration': 142,

1156

'uploader': 'The Witcher',

1157

'uploader_id': 'WitcherGame',

1158

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1159

'upload_date': '20140605',

1160

'age_limit': 18,

1161

'categories': ['Gaming'],

1162

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1163

'availability': 'needs_auth',

1164

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1165

'like_count': int,

1166

'channel': 'The Witcher',

1167

'live_status': 'not_live',

1168

'tags': 'count:17',

1169

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1170

'playable_in_embed': True,

1171

'view_count': int,

1172

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1177

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1182

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1183

'upload_date': '20200408',

1184

'uploader_id': 'FlyingKitty900',

1185

'uploader': 'FlyingKitty',

1186

'age_limit': 18,

1187

'availability': 'needs_auth',

1188

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1189

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1190

'channel': 'FlyingKitty',

1191

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1192

'view_count': int,

1193

'categories': ['Entertainment'],

1194

'live_status': 'not_live',

1195

'tags': ['Flyingkitty', 'godzilla 2'],

1196

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1197

'like_count': int,

1198

'duration': 177,

1199

'playable_in_embed': True,

1200

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1205

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1206

'info_dict': {

1207

'id': 'Tq92D6wQ1mg',

1208

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1209

'ext': 'mp4',

1210

'upload_date': '20191227',

1211

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1212

'uploader': 'Projekt Melody',

1213

'description': 'md5:17eccca93a786d51bc67646756894066',

1214

'age_limit': 18,

1215

'like_count': int,

1216

'availability': 'needs_auth',

1217

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1218

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1219

'view_count': int,

1220

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1221

'channel': 'Projekt Melody',

1222

'live_status': 'not_live',

1223

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1224

'playable_in_embed': True,

1225

'categories': ['Entertainment'],

1226

'duration': 106,

1227

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1228

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1233

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1238

'uploader': 'Herr Lurik',

1239

'uploader_id': 'st3in234',

1240

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1241

'upload_date': '20130730',

1242

'track': 'Such mich find mich',

1243

'age_limit': 0,

1244

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1245

'like_count': int,

1246

'playable_in_embed': False,

1247

'creator': 'OOMPH!',

1248

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1249

'view_count': int,

1250

'alt_title': 'Such mich find mich',

1251

'duration': 210,

1252

'channel': 'Herr Lurik',

1253

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1254

'categories': ['Music'],

1255

'availability': 'public',

1256

'uploader_url': 'http://www.youtube.com/user/st3in234',

1257

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1258

'live_status': 'not_live',

1259

'artist': 'OOMPH!',

1260

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1265

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1266

'only_matching': True,

1267

},

1268

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1269

# YouTube Red ad is not captured for creator

1270

{

1271

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1277

'uploader_id': 'deadmau5',

1278

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1279

'creator': 'deadmau5',

1280

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1281

'uploader': 'deadmau5',

1282

'title': 'Deadmau5 - Some Chords (HD)',

1283

'alt_title': 'Some Chords',

1284

'availability': 'public',

1285

'tags': 'count:14',

1286

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1287

'view_count': int,

1288

'live_status': 'not_live',

1289

'channel': 'deadmau5',

1290

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1291

'like_count': int,

1292

'track': 'Some Chords',

1293

'artist': 'deadmau5',

1294

'playable_in_embed': True,

1295

'age_limit': 0,

1296

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1297

'categories': ['Music'],

1298

'album': 'Some Chords',

1299

'channel_follower_count': int

1300

},

1301

'expected_warnings': [

1302

'DASH manifest missing',

1303

]

1304

},

1305

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1306

{

1307

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1313

'uploader_id': 'olympic',

1314

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1315

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1316

'uploader': 'Olympics',

1317

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1318

'like_count': int,

1319

'release_timestamp': 1343767800,

1320

'playable_in_embed': True,

1321

'categories': ['Sports'],

1322

'release_date': '20120731',

1323

'channel': 'Olympics',

1324

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1325

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1326

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1327

'age_limit': 0,

1328

'availability': 'public',

1329

'live_status': 'was_live',

1330

'view_count': int,

1331

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1332

'channel_follower_count': int

1333

},

1334

'params': {

1335

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1345

'duration': 85,

1346

'upload_date': '20110310',

1347

'uploader_id': 'AllenMeow',

1348

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1349

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1350

'uploader': '孫ᄋᄅ',

1351

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1352

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1357

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1358

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1359

'view_count': int,

1360

'categories': ['People & Blogs'],

1361

'like_count': int,

1362

'live_status': 'not_live',

1363

'availability': 'unlisted',

1364

'channel_follower_count': int

1365

},

1366

},

1367

# url_encoded_fmt_stream_map is empty string

1368

{

1369

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1374

'description': '',

1375

'upload_date': '20150404',

1376

'uploader_id': 'spbelect',

1377

'uploader': 'Наблюдатели Петербурга',

1378

},

1379

'params': {

1380

'skip_download': 'requires avconv',

1381

},

1382

'skip': 'This live event has ended.',

1383

},

1384

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1385

{

1386

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1391

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1392

'duration': 220,

1393

'upload_date': '20150625',

1394

'uploader_id': 'dorappi2000',

1395

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1396

'uploader': 'dorappi2000',

1397

'formats': 'mincount:31',

1398

},

1399

'skip': 'not actual anymore',

1400

},

1401

# DASH manifest with segment_list

1402

{

1403

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1404

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1409

'uploader': 'Airtek',

1410

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1411

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1412

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1413

},

1414

'params': {

1415

'youtube_include_dash_manifest': True,

1416

'format': '135', # bestvideo

1417

},

1418

'skip': 'This live event has ended.',

1419

},

1420

{

1421

# Multifeed videos (multiple cameras), URL is for Main Camera

1422

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1423

'info_dict': {

1424

'id': 'jvGDaLqkpTg',

1425

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1426

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1433

'description': 'md5:e03b909557865076822aa169218d6a5d',

1434

'duration': 10643,

1435

'upload_date': '20161111',

1436

'uploader': 'Team PGP',

1437

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1438

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1445

'description': 'md5:e03b909557865076822aa169218d6a5d',

1446

'duration': 10991,

1447

'upload_date': '20161111',

1448

'uploader': 'Team PGP',

1449

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1450

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1457

'description': 'md5:e03b909557865076822aa169218d6a5d',

1458

'duration': 10995,

1459

'upload_date': '20161111',

1460

'uploader': 'Team PGP',

1461

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1462

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1469

'description': 'md5:e03b909557865076822aa169218d6a5d',

1470

'duration': 10990,

1471

'upload_date': '20161111',

1472

'uploader': 'Team PGP',

1473

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1474

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1479

},

1480

'skip': 'Not multifeed anymore',

1481

},

1482

{

1483

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1484

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1485

'info_dict': {

1486

'id': 'gVfLd0zydlo',

1487

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1488

},

1489

'playlist_count': 2,

1490

'skip': 'Not multifeed anymore',

1491

},

1492

{

1493

'url': 'https://vid.plus/FlRa-iH7PGw',

1494

'only_matching': True,

1495

},

1496

{

1497

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1498

'only_matching': True,

1499

},

1500

{

1501

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1502

# Also tests cut-off URL expansion in video description (see

1503

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1504

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1505

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1510

'alt_title': 'Dark Walk',

1511

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1512

'duration': 133,

1513

'upload_date': '20151119',

1514

'uploader_id': 'IronSoulElf',

1515

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1516

'uploader': 'IronSoulElf',

1517

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1518

'track': 'Dark Walk',

1519

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1520

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1521

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1522

'categories': ['Film & Animation'],

1523

'view_count': int,

1524

'live_status': 'not_live',

1525

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1526

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1527

'tags': 'count:13',

1528

'availability': 'public',

1529

'channel': 'IronSoulElf',

1530

'playable_in_embed': True,

1531

'like_count': int,

1532

'age_limit': 0,

1533

'channel_follower_count': int

1534

},

1535

'params': {

1536

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1541

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1542

'only_matching': True,

1543

},

1544

{

1545

# Video with yt:stretch=17:0

1546

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1551

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1552

'upload_date': '20151107',

1553

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1554

'uploader': 'CH GAMER DROID',

1555

},

1556

'params': {

1557

'skip_download': True,

1558

},

1559

'skip': 'This video does not exist.',

1560

},

1561

{

1562

# Video with incomplete 'yt:stretch=16:'

1563

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1564

'only_matching': True,

1565

},

1566

{

1567

# Video licensed under Creative Commons

1568

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1573

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1574

'duration': 721,

1575

'upload_date': '20150127',

1576

'uploader_id': 'BerkmanCenter',

1577

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1578

'uploader': 'The Berkman Klein Center for Internet & Society',

1579

'license': 'Creative Commons Attribution license (reuse allowed)',

1580

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1581

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1582

'like_count': int,

1583

'age_limit': 0,

1584

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1585

'channel': 'The Berkman Klein Center for Internet & Society',

1586

'availability': 'public',

1587

'view_count': int,

1588

'categories': ['Education'],

1589

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1590

'live_status': 'not_live',

1591

'playable_in_embed': True,

1592

'channel_follower_count': int

1593

},

1594

'params': {

1595

'skip_download': True,

},

},

{

# Channel-like uploader_url

1600

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1605

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1606

'duration': 4060,

1607

'upload_date': '20151119',

1608

'uploader': 'Bernie Sanders',

1609

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1610

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1611

'license': 'Creative Commons Attribution license (reuse allowed)',

1612

'playable_in_embed': True,

1613

'tags': 'count:12',

1614

'like_count': int,

1615

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1616

'age_limit': 0,

1617

'availability': 'public',

1618

'categories': ['News & Politics'],

1619

'channel': 'Bernie Sanders',

1620

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1621

'view_count': int,

1622

'live_status': 'not_live',

1623

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1624

'channel_follower_count': int

1625

},

1626

'params': {

1627

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1632

'only_matching': True,

1633

},

1634

{

1635

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1636

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1637

'only_matching': True,

1638

},

1639

{

1640

# Rental video preview

1641

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1646

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1647

'upload_date': '20150811',

1648

'uploader': 'FlixMatrix',

1649

'uploader_id': 'FlixMatrixKaravan',

1650

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1651

'license': 'Standard YouTube License',

1652

},

1653

'params': {

1654

'skip_download': True,

1655

},

1656

'skip': 'This video is not available.',

1657

},

1658

{

1659

# YouTube Red video with episode data

1660

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1665

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1666

'duration': 2085,

1667

'upload_date': '20170118',

1668

'uploader': 'Vsauce',

1669

'uploader_id': 'Vsauce',

1670

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1671

'series': 'Mind Field',

1672

'season_number': 1,

1673

'episode_number': 1,

1674

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1675

'tags': 'count:12',

1676

'view_count': int,

1677

'availability': 'public',

1678

'age_limit': 0,

1679

'channel': 'Vsauce',

1680

'episode': 'Episode 1',

1681

'categories': ['Entertainment'],

1682

'season': 'Season 1',

1683

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1684

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1685

'like_count': int,

1686

'playable_in_embed': True,

1687

'live_status': 'not_live',

1688

'channel_follower_count': int

1689

},

1690

'params': {

1691

'skip_download': True,

1692

},

1693

'expected_warnings': [

1694

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1699

# as inappropriate or offensive to some audiences.

1700

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1705

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1706

'duration': 965,

1707

'upload_date': '20140124',

1708

'uploader': 'New Century Foundation',

1709

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1710

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1711

},

1712

'params': {

1713

'skip_download': True,

1714

},

1715

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1720

'only_matching': True,

1721

},

1722

{

1723

# geo restricted to JP

1724

'url': 'sJL6WA-aGkQ',

1725

'only_matching': True,

1726

},

1727

{

1728

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1729

'only_matching': True,

1730

},

1731

{

1732

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1733

'only_matching': True,

1734

},

1735

{

1736

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1737

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1738

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1743

'only_matching': True,

1744

},

1745

{

1746

# Video with unsupported adaptive stream type formats

1747

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1752

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1753

'duration': 433,

1754

'upload_date': '20130923',

1755

'uploader': 'Amelia Putri Harwita',

1756

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1757

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1758

'formats': 'maxcount:10',

1759

},

1760

'params': {

1761

'skip_download': True,

1762

'youtube_include_dash_manifest': False,

1763

},

1764

'skip': 'not actual anymore',

1765

},

1766

{

1767

# Youtube Music Auto-generated description

1768

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1773

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1774

'upload_date': '20190312',

1775

'uploader': 'Stephen - Topic',

1776

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1777

'artist': 'Stephen',

1778

'track': 'Voyeur Girl',

1779

'album': 'it\'s too much love to know my dear',

1780

'release_date': '20190313',

1781

'release_year': 2019,

1782

'alt_title': 'Voyeur Girl',

1783

'view_count': int,

1784

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1785

'playable_in_embed': True,

1786

'like_count': int,

1787

'categories': ['Music'],

1788

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1789

'channel': 'Stephen',

1790

'availability': 'public',

1791

'creator': 'Stephen',

1792

'duration': 169,

1793

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1794

'age_limit': 0,

1795

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1796

'tags': 'count:11',

1797

'live_status': 'not_live',

1798

'channel_follower_count': int

1799

},

1800

'params': {

1801

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1806

'only_matching': True,

1807

},

1808

{

1809

# invalid -> valid video id redirection

1810

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1815

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1816

'upload_date': '20090125',

1817

'uploader': 'Prochorowka',

1818

'uploader_id': 'Prochorowka',

1819

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1820

'artist': 'Panjabi MC',

1821

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1822

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1823

},

1824

'params': {

1825

'skip_download': True,

1826

},

1827

'skip': 'Video unavailable',

1828

},

1829

{

1830

# empty description results in an empty string

1831

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1838

'uploader_id': 'ElevageOrVert',

1839

'uploader': 'ElevageOrVert',

1840

'view_count': int,

1841

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1842

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1843

'like_count': int,

1844

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1845

'tags': [],

1846

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1847

'availability': 'public',

1848

'age_limit': 0,

1849

'categories': ['Pets & Animals'],

1850

'duration': 7,

1851

'playable_in_embed': True,

1852

'live_status': 'not_live',

1853

'channel': 'ElevageOrVert',

1854

'channel_follower_count': int

1855

},

1856

'params': {

1857

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1862

# see [2] for an example with '};' inside ytInitialPlayerResponse

1863

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1864

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1865

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1870

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1871

'upload_date': '20130831',

1872

'uploader_id': 'kudvenkat',

1873

'uploader': 'kudvenkat',

1874

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1875

'like_count': int,

1876

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1877

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1878

'live_status': 'not_live',

1879

'categories': ['Education'],

1880

'availability': 'public',

1881

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1882

'tags': 'count:12',

1883

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1888

'channel_follower_count': int

1889

},

1890

'params': {

1891

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1896

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1897

'only_matching': True,

1898

},

1899

{

1900

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1901

'only_matching': True,

1902

},

1903

{

1904

# https://github.com/ytdl-org/youtube-dl/pull/28094

1905

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1911

'upload_date': '20141120',

1912

'uploader': 'The Cinematic Orchestra - Topic',

1913

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1914

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1915

'artist': 'The Cinematic Orchestra',

1916

'track': 'Burn Out',

1917

'album': 'Every Day',

1918

'like_count': int,

1919

'live_status': 'not_live',

1920

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1925

'creator': 'The Cinematic Orchestra',

1926

'channel': 'The Cinematic Orchestra',

1927

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1928

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1929

'availability': 'public',

1930

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1931

'categories': ['Music'],

1932

'playable_in_embed': True,

1933

'channel_follower_count': int

1934

},

1935

'params': {

1936

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1941

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1942

'only_matching': True,

1943

},

1944

{

1945

# controversial video, requires bpctr/contentCheckOk

1946

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1951

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1952

'uploader': 'CBS Mornings',

1953

'uploader_id': 'CBSThisMorning',

1954

'upload_date': '20140716',

1955

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1956

'duration': 170,

1957

'categories': ['News & Politics'],

1958

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1959

'view_count': int,

1960

'channel': 'CBS Mornings',

1961

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1962

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1963

'age_limit': 18,

1964

'availability': 'needs_auth',

1965

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1966

'like_count': int,

1967

'live_status': 'not_live',

1968

'playable_in_embed': True,

1969

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1974

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1979

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1980

'upload_date': '20201120',

1981

'uploader': 'Walk around Japan',

1982

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1983

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1984

'duration': 1456,

1985

'categories': ['Travel & Events'],

1986

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1987

'view_count': int,

1988

'channel': 'Walk around Japan',

1989

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

1990

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

1991

'age_limit': 0,

1992

'availability': 'public',

1993

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1994

'live_status': 'not_live',

1995

'playable_in_embed': True,

1996

'channel_follower_count': int

1997

},

1998

'params': {

1999

'skip_download': True,

2000

},

2001

}, {

2002

# Has multiple audio streams

2003

'url': 'WaOKSUlf4TM',

2004

'only_matching': True

2005

}, {

2006

# Requires Premium: has format 141 when requested using YTM url

2007

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2008

'only_matching': True

2009

}, {

2010

# multiple subtitles with same lang_code

2011

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2012

'only_matching': True,

2013

}, {

2014

# Force use android client fallback

2015

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2016

'info_dict': {

2017

'id': 'YOelRv7fMxY',

2018

'title': 'DIGGING A SECRET TUNNEL Part 1',

2019

'ext': '3gp',

2020

'upload_date': '20210624',

2021

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2022

'uploader': 'colinfurze',

2023

'uploader_id': 'colinfurze',

2024

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2025

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2026

'duration': 596,

2027

'categories': ['Entertainment'],

2028

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2029

'view_count': int,

2030

'channel': 'colinfurze',

2031

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2032

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2033

'age_limit': 0,

2034

'availability': 'public',

2035

'like_count': int,

2036

'live_status': 'not_live',

2037

'playable_in_embed': True,

2038

'channel_follower_count': int

2039

},

2040

'params': {

2041

'format': '17', # 3gp format available on android

2042

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2047

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2048

'only_matching': True,

2049

'params': {

2050

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2055

'only_matching': True,

2056

}, {

2057

'note': 'Storyboards',

2058

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2064

'uploader_id': 'scishow',

2065

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2066

'upload_date': '20140324',

2067

'uploader': 'SciShow',

2068

'like_count': int,

2069

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2070

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2071

'view_count': int,

2072

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2073

'playable_in_embed': True,

2074

'tags': 'count:12',

2075

'uploader_url': 'http://www.youtube.com/user/scishow',

2076

'availability': 'public',

2077

'channel': 'SciShow',

2078

'live_status': 'not_live',

2079

'duration': 248,

2080

'categories': ['Education'],

2081

'age_limit': 0,

2082

'channel_follower_count': int

2083

}, 'params': {'format': 'mhtml', 'skip_download': True}

}

]

@classmethod

def suitable(cls, url):

2089

from ..utils import parse_qs

2090

2091

qs = parse_qs(url)

2092

if qs.get('list', [None])[0]:

2093

return False

2094

return super(YoutubeIE, cls).suitable(url)

2095

2096

def __init__(self, *args, **kwargs):

2097

super(YoutubeIE, self).__init__(*args, **kwargs)

2098

self._code_cache = {}

2099

self._player_cache = {}

2100

2101

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2102

lock = threading.Lock()

2103

2104

is_live = True

2105

start_time = time.time()

2106

formats = [f for f in formats if f.get('is_from_start')]

2107

2108

def refetch_manifest(format_id, delay):

2109

nonlocal formats, start_time, is_live

2110

if time.time() <= start_time + delay:

2111

return

2112

2113

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2114

video_details = traverse_obj(

2115

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2116

microformats = traverse_obj(

2117

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2118

expected_type=dict, default=[])

2119

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2120

start_time = time.time()

2121

2122

def mpd_feed(format_id, delay):

2123

"""

2124

@returns (manifest_url, manifest_stream_number, is_live) or None

2125

"""

2126

with lock:

2127

refetch_manifest(format_id, delay)

2128

2129

f = next((f for f in formats if f['format_id'] == format_id), None)

2130

if not f:

2131

if not is_live:

2132

self.to_screen(f'{video_id}: Video is no longer live')

2133

else:

2134

self.report_warning(

2135

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2136

return None

2137

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2142

f['fragments'] = functools.partial(

2143

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2144

2145

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2146

FETCH_SPAN, MAX_DURATION = 5, 432000

2147

2148

mpd_url, stream_number, is_live = None, None, True

2149

2150

begin_index = 0

2151

download_start_time = ctx.get('start') or time.time()

2152

2153

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2154

if lack_early_segments:

2155

self.report_warning(bug_reports_message(

2156

'Starting download from the last 120 hours of the live stream since '

2157

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2158

lack_early_segments = True

2159

2160

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2161

fragments, fragment_base_url = None, None

2162

2163

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2164

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2165

# Obtain from MPD's maximum seq value

2166

old_mpd_url = mpd_url

2167

last_error = ctx.pop('last_error', None)

2168

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2169

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2170

or (mpd_url, stream_number, False))

2171

if not refresh_sequence:

2172

if expire_fast and not is_live:

2173

return False, last_seq

2174

elif old_mpd_url == mpd_url:

2175

return True, last_seq

2176

try:

2177

fmts, _ = self._extract_mpd_formats_and_subtitles(

2178

mpd_url, None, note=False, errnote=False, fatal=False)

2179

except ExtractorError:

2180

fmts = None

2181

if not fmts:

2182

no_fragment_score += 2

2183

return False, last_seq

2184

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2185

fragments = fmt_info['fragments']

2186

fragment_base_url = fmt_info['fragment_base_url']

2187

assert fragment_base_url

2188

2189

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2190

return True, _last_seq

2191

2192

while is_live:

2193

fetch_time = time.time()

2194

if no_fragment_score > 30:

2195

return

2196

if last_segment_url:

2197

# Obtain from "X-Head-Seqnum" header value from each segment

2198

try:

2199

urlh = self._request_webpage(

2200

last_segment_url, None, note=False, errnote=False, fatal=False)

2201

except ExtractorError:

2202

urlh = None

2203

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2204

if last_seq is None:

2205

no_fragment_score += 2

2206

last_segment_url = None

2207

continue

2208

else:

2209

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2210

no_fragment_score += 2

2211

if not should_continue:

2212

continue

2213

2214

if known_idx > last_seq:

2215

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2221

# skip from the start when it's negative value

2222

known_idx = last_seq + begin_index

2223

if lack_early_segments:

2224

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2225

try:

2226

for idx in range(known_idx, last_seq):

2227

# do not update sequence here or you'll get skipped some part of it

2228

should_continue, _ = _extract_sequence_from_mpd(False, False)

2229

if not should_continue:

2230

known_idx = idx - 1

2231

raise ExtractorError('breaking out of outer loop')

2232

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2233

yield {

2234

'url': last_segment_url,

2235

}

2236

if known_idx == last_seq:

2237

no_fragment_score += 5

2238

else:

2239

no_fragment_score = 0

2240

known_idx = last_seq

2241

except ExtractorError:

2242

continue

2243

2244

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2245

2246

def _extract_player_url(self, *ytcfgs, webpage=None):

2247

player_url = traverse_obj(

2248

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2249

get_all=False, expected_type=compat_str)

2250

if not player_url:

2251

return

2252

return urljoin('https://www.youtube.com', player_url)

2253

2254

def _download_player_url(self, video_id, fatal=False):

2255

res = self._download_webpage(

2256

'https://www.youtube.com/iframe_api',

2257

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2258

if res:

2259

player_version = self._search_regex(

2260

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2261

if player_version:

2262

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2263

2264

def _signature_cache_id(self, example_sig):

2265

""" Return a string representation of a signature """

2266

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2267

2268

@classmethod

2269

def _extract_player_info(cls, player_url):

2270

for player_re in cls._PLAYER_INFO_RE:

2271

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2276

return id_m.group('id')

2277

2278

def _load_player(self, video_id, player_url, fatal=True):

2279

player_id = self._extract_player_info(player_url)

2280

if player_id not in self._code_cache:

2281

code = self._download_webpage(

2282

player_url, video_id, fatal=fatal,

2283

note='Downloading player ' + player_id,

2284

errnote='Download of %s failed' % player_url)

2285

if code:

2286

self._code_cache[player_id] = code

2287

return self._code_cache.get(player_id)

2288

2289

def _extract_signature_function(self, video_id, player_url, example_sig):

2290

player_id = self._extract_player_info(player_url)

2291

2292

# Read from filesystem cache

2293

func_id = 'js_%s_%s' % (

2294

player_id, self._signature_cache_id(example_sig))

2295

assert os.path.basename(func_id) == func_id

2296

2297

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2298

if cache_spec is not None:

2299

return lambda s: ''.join(s[i] for i in cache_spec)

2300

2301

code = self._load_player(video_id, player_url)

2302

if code:

2303

res = self._parse_sig_js(code)

2304

2305

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2306

cache_res = res(test_string)

2307

cache_spec = [ord(c) for c in cache_res]

2308

2309

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2310

return res

2311

2312

def _print_sig_code(self, func, example_sig):

2313

if not self.get_param('youtube_print_sig_code'):

2314

return

2315

2316

def gen_sig_code(idxs):

2317

def _genslice(start, end, step):

2318

starts = '' if start == 0 else str(start)

2319

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2320

steps = '' if step == 1 else (':%d' % step)

2321

return 's[%s%s%s]' % (starts, ends, steps)

2322

2323

step = None

2324

# Quelch pyflakes warnings - start will be set when step is set

2325

start = '(Never used)'

2326

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2331

step = None

2332

continue

2333

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2343

2344

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2345

cache_res = func(test_string)

2346

cache_spec = [ord(c) for c in cache_res]

2347

expr_code = ' + '.join(gen_sig_code(cache_spec))

2348

signature_id_tuple = '(%s)' % (

2349

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2350

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2351

' return %s\n') % (signature_id_tuple, expr_code)

2352

self.to_screen('Extracted signature function:\n' + code)

2353

2354

def _parse_sig_js(self, jscode):

2355

funcname = self._search_regex(

2356

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2357

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2358

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2359

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2360

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2361

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2362

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2363

# Obsolete patterns

2364

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2365

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2366

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2367

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2368

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2369

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2370

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2371

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2372

jscode, 'Initial JS player signature function name', group='sig')

2373

2374

jsi = JSInterpreter(jscode)

2375

initial_function = jsi.extract_function(funcname)

2376

return lambda s: initial_function([s])

2377

2378

def _decrypt_signature(self, s, video_id, player_url):

2379

"""Turn the encrypted s field into a working signature"""

2380

2381

if player_url is None:

2382

raise ExtractorError('Cannot decrypt signature without player_url')

2383

2384

try:

2385

player_id = (player_url, self._signature_cache_id(s))

2386

if player_id not in self._player_cache:

2387

func = self._extract_signature_function(

2388

video_id, player_url, s

2389

)

2390

self._player_cache[player_id] = func

2391

func = self._player_cache[player_id]

2392

self._print_sig_code(func, s)

2393

return func(s)

2394

except Exception as e:

2395

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2396

2397

def _decrypt_nsig(self, s, video_id, player_url):

2398

"""Turn the encrypted n field into a working signature"""

2399

if player_url is None:

2400

raise ExtractorError('Cannot decrypt nsig without player_url')

2401

player_url = urljoin('https://www.youtube.com', player_url)

2402

2403

sig_id = ('nsig_value', s)

2404

if sig_id in self._player_cache:

2405

return self._player_cache[sig_id]

2406

2407

try:

2408

player_id = ('nsig', player_url)

2409

if player_id not in self._player_cache:

2410

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2411

func = self._player_cache[player_id]

2412

self._player_cache[sig_id] = func(s)

2413

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2414

return self._player_cache[sig_id]

2415

except Exception as e:

2416

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2417

2418

def _extract_n_function_name(self, jscode):

2419

nfunc, idx = self._search_regex(

2420

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2421

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2422

if not idx:

2423

return nfunc

2424

return json.loads(js_to_json(self._search_regex(

2425

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2426

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2427

2428

def _extract_n_function(self, video_id, player_url):

2429

player_id = self._extract_player_info(player_url)

2430

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2431

2432

if func_code:

2433

jsi = JSInterpreter(func_code)

2434

else:

2435

jscode = self._load_player(video_id, player_url)

2436

funcname = self._extract_n_function_name(jscode)

2437

jsi = JSInterpreter(jscode)

2438

func_code = jsi.extract_function_code(funcname)

2439

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2440

2441

if self.get_param('youtube_print_sig_code'):

2442

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2443

2444

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2445

2446

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2447

"""

2448

Extract signatureTimestamp (sts)

2449

Required to tell API what sig/player version is in use.

2450

"""

2451

sts = None

2452

if isinstance(ytcfg, dict):

2453

sts = int_or_none(ytcfg.get('STS'))

2454

2455

if not sts:

2456

# Attempt to extract from player

2457

if player_url is None:

2458

error_msg = 'Cannot extract signature timestamp without player_url.'

2459

if fatal:

2460

raise ExtractorError(error_msg)

2461

self.report_warning(error_msg)

2462

return

2463

code = self._load_player(video_id, player_url, fatal=fatal)

2464

if code:

2465

sts = int_or_none(self._search_regex(

2466

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2467

'JS player signature timestamp', group='sts', fatal=fatal))

2468

return sts

2469

2470

def _mark_watched(self, video_id, player_responses):

2471

playback_url = get_first(

2472

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2473

expected_type=url_or_none)

2474

if not playback_url:

2475

self.report_warning('Unable to mark watched')

2476

return

2477

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2478

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2479

2480

# cpn generation algorithm is reverse engineered from base.js.

2481

# In fact it works even with dummy cpn.

2482

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2483

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2490

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2491

2492

self._download_webpage(

2493

playback_url, video_id, 'Marking watched',

2494

'Unable to mark watched', fatal=False)

2495

2496

@staticmethod

2497

def _extract_urls(webpage):

2498

# Embedded YouTube player

2499

entries = [

2500

unescapeHTML(mobj.group('url'))

2501

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2512

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2513

\1''', webpage)]

2514

2515

# lazyYT YouTube embed

2516

entries.extend(list(map(

2517

unescapeHTML,

2518

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2519

2520

# Wordpress "YouTube Video Importer" plugin

2521

matches = re.findall(r'''(?x)<div[^>]+

2522

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2523

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2524

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2530

urls = YoutubeIE._extract_urls(webpage)

2531

return urls[0] if urls else None

2532

2533

@classmethod

2534

def extract_id(cls, url):

2535

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2536

if mobj is None:

2537

raise ExtractorError('Invalid URL: %s' % url)

2538

return mobj.group('id')

2539

2540

def _extract_chapters_from_json(self, data, duration):

2541

chapter_list = traverse_obj(

2542

data, (

2543

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2544

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2545

), expected_type=list)

2546

2547

return self._extract_chapters(

2548

chapter_list,

2549

chapter_time=lambda chapter: float_or_none(

2550

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2551

chapter_title=lambda chapter: traverse_obj(

2552

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2553

duration=duration)

2554

2555

def _extract_chapters_from_engagement_panel(self, data, duration):

2556

content_list = traverse_obj(

2557

data,

2558

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2559

expected_type=list, default=[])

2560

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2561

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2566

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2567

chapter_time, chapter_title, duration)

2568

for contents in content_list

2569

))), [])

2570

2571

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2572

chapters = []

2573

last_chapter = {'start_time': 0}

2574

for idx, chapter in enumerate(chapter_list or []):

2575

title = chapter_title(chapter)

2576

start_time = chapter_time(chapter)

2577

if start_time is None:

2578

continue

2579

last_chapter['end_time'] = start_time

2580

if start_time < last_chapter['start_time']:

2581

if idx == 1:

2582

chapters.pop()

2583

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2584

else:

2585

self.report_warning(f'Invalid start time for chapter "{title}"')

2586

continue

2587

last_chapter = {'start_time': start_time, 'title': title}

2588

chapters.append(last_chapter)

2589

last_chapter['end_time'] = duration

2590

return chapters

2591

2592

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2593

return self._parse_json(self._search_regex(

2594

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

2595

regex), webpage, name, default='{}'), video_id, fatal=False)

2596

2597

def _extract_comment(self, comment_renderer, parent=None):

2598

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2603

2604

# note: timestamp is an estimate calculated from the current time and time_text

2605

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2606

author = self._get_text(comment_renderer, 'authorText')

2607

author_id = try_get(comment_renderer,

2608

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2609

2610

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2611

lambda x: x['likeCount']), compat_str)) or 0

2612

author_thumbnail = try_get(comment_renderer,

2613

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2614

2615

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2616

is_favorited = 'creatorHeart' in (try_get(

2617

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2622

'time_text': time_text,

2623

'like_count': votes,

2624

'is_favorited': is_favorited,

2625

'author': author,

2626

'author_id': author_id,

2627

'author_thumbnail': author_thumbnail,

2628

'author_is_uploader': author_is_uploader,

2629

'parent': parent or 'root'

2630

}

2631

2632

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2633

2634

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2635

2636

def extract_header(contents):

2637

_continuation = None

2638

for content in contents:

2639

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2640

expected_comment_count = self._get_count(

2641

comments_header_renderer, 'countText', 'commentsCount')

2642

2643

if expected_comment_count:

2644

tracker['est_total'] = expected_comment_count

2645

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2646

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2647

2648

sort_menu_item = try_get(

2649

comments_header_renderer,

2650

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2651

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2652

2653

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2654

if not _continuation:

2655

continue

2656

2657

sort_text = str_or_none(sort_menu_item.get('title'))

2658

if not sort_text:

2659

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2660

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2665

if not parent:

2666

tracker['current_page_thread'] = 0

2667

for content in contents:

2668

if not parent and tracker['total_parent_comments'] >= max_parents:

2669

yield

2670

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2671

comment_renderer = get_first(

2672

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2673

expected_type=dict, default={})

2674

2675

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2680

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2681

yield comment

2682

2683

# Attempt to get the replies

2684

comment_replies_renderer = try_get(

2685

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2686

2687

if comment_replies_renderer:

2688

tracker['current_page_thread'] += 1

2689

comment_entries_iter = self._comment_entries(

2690

comment_replies_renderer, ytcfg, video_id,

2691

parent=comment.get('id'), tracker=tracker)

2692

for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):

2693

yield reply_comment

2694

2695

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2701

total_parent_comments=0,

2702

total_reply_comments=0)

2703

2704

# TODO: Deprecated

2705

# YouTube comments have a max depth of 2

2706

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2707

if max_depth:

2708

self._downloader.deprecation_warning(

2709

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2710

if max_depth == 1 and parent:

2711

return

2712

2713

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2714

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2715

2716

continuation = self._extract_continuation(root_continuation_data)

2717

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2718

if message and not parent:

2719

self.report_warning(message, video_id=video_id)

2720

2721

response = None

2722

is_first_continuation = parent is None

2723

2724

for page_num in itertools.count(0):

2725

if not continuation:

2726

break

2727

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2728

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2729

if page_num == 0:

2730

if is_first_continuation:

2731

note_prefix = 'Downloading comment section API JSON'

2732

else:

2733

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2734

tracker['current_page_thread'], comment_prog_str)

2735

else:

2736

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2737

' ' if parent else '', ' replies' if parent else '',

2738

page_num, comment_prog_str)

2739

2740

response = self._extract_response(

2741

item_id=None, query=continuation,

2742

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2743

check_get_keys='onResponseReceivedEndpoints')

2744

2745

continuation_contents = traverse_obj(

2746

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2747

2748

continuation = None

2749

for continuation_section in continuation_contents:

2750

continuation_items = traverse_obj(

2751

continuation_section,

2752

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2753

get_all=False, expected_type=list) or []

2754

if is_first_continuation:

2755

continuation = extract_header(continuation_items)

2756

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

def _get_comments(self, ytcfg, video_id, contents, webpage):

2770

"""Entry for comment extraction"""

2771

def _real_comment_extract(contents):

2772

renderer = next((

2773

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2774

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2775

yield from self._comment_entries(renderer, ytcfg, video_id)

2776

2777

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2778

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2779

2780

@staticmethod

2781

def _get_checkok_params():

2782

return {'contentCheckOk': True, 'racyCheckOk': True}

2783

2784

@classmethod

2785

def _generate_player_context(cls, sts=None):

2786

context = {

2787

'html5Preference': 'HTML5_PREF_WANTS',

2788

}

2789

if sts is not None:

2790

context['signatureTimestamp'] = sts

2791

return {

2792

'playbackContext': {

2793

'contentPlaybackContext': context

2794

},

2795

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2800

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2801

return True

2802

2803

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2804

AGE_GATE_REASONS = (

2805

'confirm your age', 'age-restricted', 'inappropriate', # reason

2806

'age_verification_required', 'age_check_required', # status

2807

)

2808

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2809

2810

@staticmethod

2811

def _is_unplayable(player_response):

2812

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2813

2814

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2815

2816

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2817

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2818

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2819

headers = self.generate_api_headers(

2820

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

2821

2822

yt_query = {'videoId': video_id}

2823

yt_query.update(self._generate_player_context(sts))

2824

return self._extract_response(

2825

item_id=video_id, ep='player', query=yt_query,

2826

ytcfg=player_ytcfg, headers=headers, fatal=True,

2827

default_client=client,

2828

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2829

) or None

2830

2831

def _get_requested_clients(self, url, smuggled_data):

2832

requested_clients = []

2833

default = ['android', 'web']

2834

allowed_clients = sorted(

2835

[client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],

2836

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

2837

for client in self._configuration_arg('player_client'):

2838

if client in allowed_clients:

2839

requested_clients.append(client)

2840

elif client == 'default':

2841

requested_clients.extend(default)

2842

elif client == 'all':

2843

requested_clients.extend(allowed_clients)

2844

else:

2845

self.report_warning(f'Skipping unsupported client {client}')

2846

if not requested_clients:

2847

requested_clients = default

2848

2849

if smuggled_data.get('is_music_url') or self.is_music_url(url):

2850

requested_clients.extend(

2851

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

2852

2853

return orderedSet(requested_clients)

2854

2855

def _extract_player_ytcfg(self, client, video_id):

2856

url = {

2857

'web_music': 'https://music.youtube.com',

2858

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())

2863

return self.extract_ytcfg(video_id, webpage) or {}

2864

2865

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

2866

initial_pr = None

2867

if webpage:

2868

initial_pr = self._extract_yt_initial_variable(

2869

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2870

video_id, 'initial player response')

2871

2872

original_clients = clients

2873

clients = clients[::-1]

2874

prs = []

2875

2876

def append_client(client_name):

2877

if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:

2878

clients.append(client_name)

2879

2880

# Android player_response does not have microFormats which are needed for

2881

# extraction of some data. So we return the initial_pr with formats

2882

# stripped out even if not requested by the user

2883

# See: https://github.com/yt-dlp/yt-dlp/issues/501

2884

if initial_pr:

2885

pr = dict(initial_pr)

2886

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

2891

player_url = None

2892

while clients:

2893

client = clients.pop()

2894

player_ytcfg = master_ytcfg if client == 'web' else {}

2895

if 'configs' not in self._configuration_arg('player_skip'):

2896

player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg

2897

2898

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

2899

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

2900

if 'js' in self._configuration_arg('player_skip'):

2901

require_js_player = False

2902

player_url = None

2903

2904

if not player_url and not tried_iframe_fallback and require_js_player:

2905

player_url = self._download_player_url(video_id)

2906

tried_iframe_fallback = True

2907

2908

try:

2909

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

2910

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

2911

except ExtractorError as e:

2912

if last_error:

2913

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

2921

if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:

2922

append_client(client.replace('_agegate', '_creator'))

2923

elif self._is_agegated(pr):

2924

append_client(f'{client}_agegate')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

2930

return prs, player_url

2931

2932

def _extract_formats(self, streaming_data, video_id, player_url, is_live):

2933

itags, stream_ids = {}, []

2934

itag_qualities, res_qualities = {}, {}

2935

q = qualities([

2936

# Normally tiny is the smallest video-only formats. But

2937

# audio-only formats with unknown quality may get tagged as tiny

2938

'tiny',

2939

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

2940

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

2941

])

2942

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

2943

approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None

2944

2945

for fmt in streaming_formats:

2946

if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):

2947

continue

2948

2949

itag = str_or_none(fmt.get('itag'))

2950

audio_track = fmt.get('audioTrack') or {}

2951

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

2952

if stream_id in stream_ids:

2953

continue

2954

2955

quality = fmt.get('quality')

2956

height = int_or_none(fmt.get('height'))

2957

if quality == 'tiny' or not quality:

2958

quality = fmt.get('audioQuality', '').lower() or quality

2959

# The 3gp format (17) in android client has a quality of "small",

2960

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

2966

if height:

2967

res_qualities[height] = quality

2968

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

2969

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

2970

# number of fragment that would subsequently requested with (`&sq=N`)

2971

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

2972

continue

2973

2974

fmt_url = fmt.get('url')

2975

if not fmt_url:

2976

sc = compat_parse_qs(fmt.get('signatureCipher'))

2977

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

2978

encrypted_sig = try_get(sc, lambda x: x['s'][0])

2979

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

2984

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

2985

fmt_url += '&' + sp + '=' + signature

2986

2987

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

2992

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

2993

except ExtractorError as e:

2994

self.report_warning(

2995

f'nsig extraction failed: You may experience throttling for some formats\n'

2996

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3001

stream_ids.append(stream_id)

3002

3003

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3004

language_preference = (

3005

10 if audio_track.get('audioIsDefault') and 10

3006

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3007

else -1)

3008

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3009

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3010

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)

3011

dct = {

3012

'asr': int_or_none(fmt.get('audioSampleRate')),

3013

'filesize': int_or_none(fmt.get('contentLength')),

3014

'format_id': itag,

3015

'format_note': join_nonempty(

3016

'%s%s' % (audio_track.get('displayName') or '',

3017

' (default)' if language_preference > 0 else ''),

3018

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3019

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3020

'source_preference': -10 if throttled else -1,

3021

'fps': int_or_none(fmt.get('fps')) or None,

3022

'height': height,

3023

'quality': q(quality),

3024

'tbr': tbr,

3025

'url': fmt_url,

3026

'width': int_or_none(fmt.get('width')),

3027

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3028

'desc' if language_preference < -1 else ''),

3029

'language_preference': language_preference,

3030

'preference': -10 if is_damaged else None,

3031

}

3032

mime_mobj = re.match(

3033

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3034

if mime_mobj:

3035

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3036

dct.update(parse_codecs(mime_mobj.group(2)))

3037

no_audio = dct.get('acodec') == 'none'

3038

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3044

dct['downloader_options'] = {

3045

# Youtube throttles chunks >~10M

3046

'http_chunk_size': 10485760,

3047

}

3048

if dct.get('ext'):

3049

dct['container'] = dct['ext'] + '_dash'

3050

yield dct

3051

3052

live_from_start = is_live and self.get_param('live_from_start')

3053

skip_manifests = self._configuration_arg('skip')

3054

if not self.get_param('youtube_include_hls_manifest', True):

3055

skip_manifests.append('hls')

3056

get_dash = 'dash' not in skip_manifests and (

3057

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3058

get_hls = not live_from_start and 'hls' not in skip_manifests

3059

3060

def process_manifest_format(f, proto, itag):

3061

if itag in itags:

3062

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3063

return False

3064

itag = f'{itag}-{proto}'

3065

if itag:

3066

f['format_id'] = itag

3067

itags[itag] = proto

3068

3069

f['quality'] = next((

3070

q(qdict[val])

3071

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3072

if val in qdict), -1)

3073

return True

3074

3075

for sd in streaming_data:

3076

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3077

if hls_manifest_url:

3078

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3079

if process_manifest_format(f, 'hls', self._search_regex(

3080

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3081

yield f

3082

3083

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3084

if dash_manifest_url:

3085

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3086

if process_manifest_format(f, 'dash', f['format_id']):

3087

f['filesize'] = int_or_none(self._search_regex(

3088

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3089

if live_from_start:

3090

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3095

spec = get_first(

3096

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3097

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3102

args = args.split('#')

3103

counts = list(map(int_or_none, args[:5]))

3104

if len(args) != 8 or not all(counts):

3105

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3106

continue

3107

width, height, frame_count, cols, rows = counts

3108

N, sigh = args[6:]

3109

3110

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3111

fragment_count = frame_count / (cols * rows)

3112

fragment_duration = duration / fragment_count

3113

yield {

3114

'format_id': f'sb{i}',

3115

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'path': url.replace('$M', str(j)),

3125

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3126

} for j in range(math.ceil(fragment_count))],

3127

}

3128

3129

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3130

webpage = None

3131

if 'webpage' not in self._configuration_arg('player_skip'):

3132

webpage = self._download_webpage(

3133

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

3134

3135

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3136

3137

player_responses, player_url = self._extract_player_responses(

3138

self._get_requested_clients(url, smuggled_data),

3139

video_id, webpage, master_ytcfg)

3140

3141

return webpage, master_ytcfg, player_responses, player_url

3142

3143

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):

3144

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3145

is_live = get_first(video_details, 'isLive')

3146

if is_live is None:

3147

is_live = get_first(live_broadcast_details, 'isLiveNow')

3148

3149

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3150

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))

3151

3152

return live_broadcast_details, is_live, streaming_data, formats

3153

3154

def _real_extract(self, url):

3155

url, smuggled_data = unsmuggle_url(url, {})

3156

video_id = self._match_id(url)

3157

3158

base_url = self.http_scheme() + '//www.youtube.com/'

3159

webpage_url = base_url + 'watch?v=' + video_id

3160

3161

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3162

3163

playability_statuses = traverse_obj(

3164

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3165

3166

trailer_video_id = get_first(

3167

playability_statuses,

3168

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3169

expected_type=str)

3170

if trailer_video_id:

3171

return self.url_result(

3172

trailer_video_id, self.ie_key(), trailer_video_id)

3173

3174

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3175

if webpage else (lambda x: None))

3176

3177

video_details = traverse_obj(

3178

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3179

microformats = traverse_obj(

3180

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3181

expected_type=dict, default=[])

3182

video_title = (

3183

get_first(video_details, 'title')

3184

or self._get_text(microformats, (..., 'title'))

3185

or search_meta(['og:title', 'twitter:title', 'title']))

3186

video_description = get_first(video_details, 'shortDescription')

3187

3188

multifeed_metadata_list = get_first(

3189

player_responses,

3190

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3191

expected_type=str)

3192

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3193

if self.get_param('noplaylist'):

3194

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3199

# Unquote should take place before split on comma (,) since textual

3200

# fields may contain comma as well (see

3201

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3202

feed_data = compat_parse_qs(

3203

compat_urllib_parse_unquote_plus(feed))

3204

3205

def feed_entry(name):

3206

return try_get(

3207

feed_data, lambda x: x[name][0], compat_str)

3208

3209

feed_id = feed_entry('id')

3210

if not feed_id:

3211

continue

3212

feed_title = feed_entry('title')

3213

title = video_title

3214

if feed_title:

3215

title += ' (%s)' % feed_title

3216

entries.append({

3217

'_type': 'url_transparent',

3218

'ie_key': 'Youtube',

3219

'url': smuggle_url(

3220

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3221

{'force_singlefeed': True}),

3222

'title': title,

3223

})

3224

feed_ids.append(feed_id)

3225

self.to_screen(

3226

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3227

% (', '.join(feed_ids), video_id))

3228

return self.playlist_result(

3229

entries, video_id, video_title, video_description)

3230

3231

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3232

3233

if not formats:

3234

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3235

self.report_drm(video_id)

3236

pemr = get_first(

3237

playability_statuses,

3238

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3239

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3240

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3241

if subreason:

3242

if subreason == 'The uploader has not made this video available in your country.':

3243

countries = get_first(microformats, 'availableCountries')

3244

if not countries:

3245

regions_allowed = search_meta('regionsAllowed')

3246

countries = regions_allowed.split(',') if regions_allowed else None

3247

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3248

reason += f'. {subreason}'

3249

if reason:

3250

self.raise_no_formats(reason, expected=True)

3251

3252

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3253

if not keywords and webpage:

3254

keywords = [

3255

unescapeHTML(m.group('content'))

3256

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3257

for keyword in keywords:

3258

if keyword.startswith('yt:stretch='):

3259

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3260

if mobj:

3261

# NB: float is intentional for forcing float division

3262

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3267

f['stretched_ratio'] = ratio

3268

break

3269

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3270

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3271

if thumbnail_url:

3272

thumbnails.append({

3273

'url': thumbnail_url,

3274

})

3275

original_thumbnails = thumbnails.copy()

3276

3277

# The best resolution thumbnails sometimes does not appear in the webpage

3278

# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340

3279

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3280

thumbnail_names = [

3281

'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',

3282

'hqdefault', 'hq1', 'hq2', 'hq3', '0',

3283

'mqdefault', 'mq1', 'mq2', 'mq3',

3284

'default', '1', '2', '3'

3285

]

3286

n_thumbnail_names = len(thumbnail_names)

3287

thumbnails.extend({

3288

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3289

video_id=video_id, name=name, ext=ext,

3290

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3291

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3292

for thumb in thumbnails:

3293

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3294

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3295

self._remove_duplicate_formats(thumbnails)

3296

self._downloader._sort_thumbnails(original_thumbnails)

3297

3298

category = get_first(microformats, 'category') or search_meta('genre')

3299

channel_id = str_or_none(

3300

get_first(video_details, 'channelId')

3301

or get_first(microformats, 'externalChannelId')

3302

or search_meta('channelId'))

3303

duration = int_or_none(

3304

get_first(video_details, 'lengthSeconds')

3305

or get_first(microformats, 'lengthSeconds')

3306

or parse_duration(search_meta('duration'))) or None

3307

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3308

3309

live_content = get_first(video_details, 'isLiveContent')

3310

is_upcoming = get_first(video_details, 'isUpcoming')

3311

if is_live is None:

3312

if is_upcoming or live_content is False:

3313

is_live = False

3314

if is_upcoming is None and (live_content or is_live):

3315

is_upcoming = False

3316

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3317

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3318

if not duration and live_end_time and live_start_time:

3319

duration = live_end_time - live_start_time

3320

3321

if is_live and self.get_param('live_from_start'):

3322

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3323

3324

formats.extend(self._extract_storyboard(player_responses, duration))

3325

3326

# Source is given priority since formats that throttle are given lower source_preference

3327

# When throttling issue is fully fixed, remove this

3328

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3333

'formats': formats,

3334

'thumbnails': thumbnails,

3335

# The best thumbnail that we are sure exists. Prevents unnecessary

3336

# URL checking if user don't care about getting the best possible thumbnail

3337

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3338

'description': video_description,

3339

'upload_date': unified_strdate(

3340

get_first(microformats, 'uploadDate')

3341

or search_meta('uploadDate')),

3342

'uploader': get_first(video_details, 'author'),

3343

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3344

'uploader_url': owner_profile_url,

3345

'channel_id': channel_id,

3346

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3347

'duration': duration,

3348

'view_count': int_or_none(

3349

get_first((video_details, microformats), (..., 'viewCount'))

3350

or search_meta('interactionCount')),

3351

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3352

'age_limit': 18 if (

3353

get_first(microformats, 'isFamilySafe') is False

3354

or search_meta('isFamilyFriendly') == 'false'

3355

or search_meta('og:restrictions:age') == '18+') else 0,

3356

'webpage_url': webpage_url,

3357

'categories': [category] if category else None,

3358

'tags': keywords,

3359

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3360

'is_live': is_live,

3361

'was_live': (False if is_live or is_upcoming or live_content is False

3362

else None if is_live is None or is_upcoming is None

3363

else live_content),

3364

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3365

'release_timestamp': live_start_time,

3366

}

3367

3368

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3369

if pctr:

3370

def get_lang_code(track):

3371

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3372

or track.get('languageCode'))

3373

3374

# Converted into dicts to remove duplicates

3375

captions = {

3376

get_lang_code(sub): sub

3377

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3378

translation_languages = {

3379

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3380

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3381

3382

def process_language(container, base_url, lang_code, sub_name, query):

3383

lang_subs = container.setdefault(lang_code, [])

3384

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3395

for lang_code, caption_track in captions.items():

3396

base_url = caption_track.get('baseUrl')

3397

if not base_url:

3398

continue

3399

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3400

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3405

if not caption_track.get('isTranslatable'):

3406

continue

3407

for trans_code, trans_name in translation_languages.items():

3408

if not trans_code:

3409

continue

3410

if caption_track.get('kind') != 'asr':

3411

trans_code += f'-{lang_code}'

3412

trans_name += format_field(lang_name, template=' from %s')

3413

# Add an "-orig" label to the original language so that it can be distinguished.

3414

# The subs are returned without "-orig" as well for compatibility

3415

if lang_code == f'a-{trans_code}':

3416

process_language(

3417

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3418

# Setting tlang=lang returns damaged subtitles.

3419

# Not using lang_code == f'a-{trans_code}' here for future-proofing

3420

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3421

process_language(automatic_captions, base_url, trans_code, trans_name,

3422

{} if orig_lang == trans_code else {'tlang': trans_code})

3423

info['automatic_captions'] = automatic_captions

3424

info['subtitles'] = subtitles

3425

3426

parsed_url = compat_urllib_parse_urlparse(url)

3427

for component in [parsed_url.fragment, parsed_url.query]:

3428

query = compat_parse_qs(component)

3429

for k, v in query.items():

3430

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3431

d_k += '_time'

3432

if d_k not in info and k in s_ks:

3433

info[d_k] = parse_duration(query[k][0])

3434

3435

# Youtube Music Auto-generated description

3436

if video_description:

3437

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3438

if mobj:

3439

release_year = mobj.group('release_year')

3440

release_date = mobj.group('release_date')

3441

if release_date:

3442

release_date = release_date.replace('-', '')

3443

if not release_year:

3444

release_year = release_date[:4]

3445

info.update({

3446

'album': mobj.group('album'.strip()),

3447

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3448

'track': mobj.group('track').strip(),

3449

'release_date': release_date,

3450

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3456

webpage, self._YT_INITIAL_DATA_RE, video_id,

3457

'yt initial data')

3458

if not initial_data:

3459

query = {'videoId': video_id}

3460

query.update(self._get_checkok_params())

3461

initial_data = self._extract_response(

3462

item_id=video_id, ep='next', fatal=False,

3463

ytcfg=master_ytcfg, query=query,

3464

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3465

note='Downloading initial data API JSON')

3466

3467

try:

3468

# This will error if there is no livechat

3469

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3470

info.setdefault('subtitles', {})['live_chat'] = [{

3471

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

3472

'video_id': video_id,

3473

'ext': 'json',

3474

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

3475

}]

3476

except (KeyError, IndexError, TypeError):

pass

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3482

or self._extract_chapters_from_engagement_panel(initial_data, duration)

or None)

contents = try_get(

initial_data,

lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],

3488

list) or []

3489

for content in contents:

3490

vpir = content.get('videoPrimaryInfoRenderer')

3491

if vpir:

3492

stl = vpir.get('superTitleLink')

3493

if stl:

3494

stl = self._get_text(stl)

3495

if try_get(

3496

vpir,

3497

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3498

info['location'] = stl

3499

else:

3500

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

3501

if mobj:

3502

info.update({

3503

'series': mobj.group(1),

3504

'season_number': int(mobj.group(2)),

3505

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3510

list) or []):

3511

tbr = tlb.get('toggleButtonRenderer') or {}

3512

for getter, regex in [(

3513

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3514

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3515

lambda x: x['accessibility'],

3516

lambda x: x['accessibilityData']['accessibilityData'],

3517

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3518

label = (try_get(tbr, getter, dict) or {}).get('label')

3519

if label:

3520

mobj = re.match(regex, label)

3521

if mobj:

3522

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3523

break

3524

sbr_tooltip = try_get(

3525

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3526

if sbr_tooltip:

3527

like_count, dislike_count = sbr_tooltip.split(' / ')

3528

info.update({

3529

'like_count': str_to_int(like_count),

3530

'dislike_count': str_to_int(dislike_count),

3531

})

3532

vsir = content.get('videoSecondaryInfoRenderer')

3533

if vsir:

3534

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3535

info.update({

3536

'channel': self._get_text(vor, 'title'),

3537

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3542

list) or []

3543

multiple_songs = False

3544

for row in rows:

3545

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3546

multiple_songs = True

3547

break

3548

for row in rows:

3549

mrr = row.get('metadataRowRenderer') or {}

3550

mrr_title = mrr.get('title')

3551

if not mrr_title:

3552

continue

3553

mrr_title = self._get_text(mrr, 'title')

3554

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3555

if mrr_title == 'License':

3556

info['license'] = mrr_contents_text

3557

elif not multiple_songs:

3558

if mrr_title == 'Album':

3559

info['album'] = mrr_contents_text

3560

elif mrr_title == 'Artist':

3561

info['artist'] = mrr_contents_text

3562

elif mrr_title == 'Song':

3563

info['track'] = mrr_contents_text

3564

3565

fallbacks = {

3566

'channel': 'uploader',

3567

'channel_id': 'uploader_id',

3568

'channel_url': 'uploader_url',

3569

}

3570

for to, frm in fallbacks.items():

3571

if not info.get(to):

3572

info[to] = info.get(frm)

3573

3574

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3580

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3581

is_membersonly = None

3582

is_premium = None

3583

if initial_data and is_private is not None:

3584

is_membersonly = False

3585

is_premium = False

3586

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3587

badge_labels = set()

3588

for content in contents:

3589

if not isinstance(content, dict):

3590

continue

3591

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3592

for badge_label in badge_labels:

3593

if badge_label.lower() == 'members only':

3594

is_membersonly = True

3595

elif badge_label.lower() == 'premium':

3596

is_premium = True

3597

elif badge_label.lower() == 'unlisted':

3598

is_unlisted = True

3599

3600

info['availability'] = self._availability(

3601

is_private=is_private,

3602

needs_premium=is_premium,

3603

needs_subscription=is_membersonly,

3604

needs_auth=info['age_limit'] >= 18,

3605

is_unlisted=None if is_private is None else is_unlisted)

3606

3607

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3608

3609

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3615

3616

@staticmethod

3617

def passthrough_smuggled_data(func):

3618

def _smuggle(entries, smuggled_data):

3619

for entry in entries:

3620

# TODO: Convert URL to music.youtube instead.

3621

# Do we need to passthrough any other smuggled_data?

3622

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3623

yield entry

3624

3625

@functools.wraps(func)

3626

def wrapper(self, url):

3627

url, smuggled_data = unsmuggle_url(url, {})

3628

if self.is_music_url(url):

3629

smuggled_data['is_music_url'] = True

3630

info_dict = func(self, url, smuggled_data)

3631

if smuggled_data and info_dict.get('entries'):

3632

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3637

channel_id = self._html_search_meta(

3638

'channelId', webpage, 'channel id', default=None)

3639

if channel_id:

3640

return channel_id

3641

channel_url = self._html_search_meta(

3642

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3643

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3644

'twitter:app:url:googleplay'), webpage, 'channel url')

3645

return self._search_regex(

3646

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3647

channel_url, 'channel id')

3648

3649

@staticmethod

3650

def _extract_basic_item_renderer(item):

3651

# Modified from _extract_grid_item_renderer

3652

known_basic_renderers = (

3653

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'

3654

)

3655

for key, renderer in item.items():

3656

if not isinstance(renderer, dict):

3657

continue

3658

elif key in known_basic_renderers:

3659

return renderer

3660

elif key.startswith('grid') and key.endswith('Renderer'):

3661

return renderer

3662

3663

def _grid_entries(self, grid_renderer):

3664

for item in grid_renderer['items']:

3665

if not isinstance(item, dict):

3666

continue

3667

renderer = self._extract_basic_item_renderer(item)

3668

if not isinstance(renderer, dict):

3669

continue

3670

title = self._get_text(renderer, 'title')

3671

3672

# playlist

3673

playlist_id = renderer.get('playlistId')

3674

if playlist_id:

3675

yield self.url_result(

3676

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3677

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3682

if video_id:

3683

yield self._extract_video(renderer)

3684

continue

3685

# channel

3686

channel_id = renderer.get('channelId')

3687

if channel_id:

3688

yield self.url_result(

3689

'https://www.youtube.com/channel/%s' % channel_id,

3690

ie=YoutubeTabIE.ie_key(), video_title=title)

3691

continue

3692

# generic endpoint URL support

3693

ep_url = urljoin('https://www.youtube.com/', try_get(

3694

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3695

compat_str))

3696

if ep_url:

3697

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3698

if ie.suitable(ep_url):

3699

yield self.url_result(

3700

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3701

break

3702

3703

def _music_reponsive_list_entry(self, renderer):

3704

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3705

if video_id:

3706

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3707

ie=YoutubeIE.ie_key(), video_id=video_id)

3708

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3709

if playlist_id:

3710

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3711

if video_id:

3712

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3713

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3714

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3715

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3716

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3717

if browse_id:

3718

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3719

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3720

3721

def _shelf_entries_from_content(self, shelf_renderer):

3722

content = shelf_renderer.get('content')

3723

if not isinstance(content, dict):

3724

return

3725

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3726

if renderer:

3727

# TODO: add support for nested playlists so each shelf is processed

3728

# as separate playlist

3729

# TODO: this includes only first N items

3730

for entry in self._grid_entries(renderer):

3731

yield entry

3732

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3738

ep = try_get(

3739

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3740

compat_str)

3741

shelf_url = urljoin('https://www.youtube.com', ep)

3742

if shelf_url:

3743

# Skipping links to another channels, note that checking for

3744

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3745

# will not work

3746

if skip_channels and '/channels?' in shelf_url:

3747

return

3748

title = self._get_text(shelf_renderer, 'title')

3749

yield self.url_result(shelf_url, video_title=title)

3750

# Shelf may not contain shelf URL, fallback to extraction from content

3751

for entry in self._shelf_entries_from_content(shelf_renderer):

3752

yield entry

3753

3754

def _playlist_entries(self, video_list_renderer):

3755

for content in video_list_renderer['contents']:

3756

if not isinstance(content, dict):

3757

continue

3758

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3759

if not isinstance(renderer, dict):

3760

continue

3761

video_id = renderer.get('videoId')

3762

if not video_id:

3763

continue

3764

yield self._extract_video(renderer)

3765

3766

def _rich_entries(self, rich_grid_renderer):

3767

renderer = try_get(

3768

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3769

video_id = renderer.get('videoId')

3770

if not video_id:

3771

return

3772

yield self._extract_video(renderer)

3773

3774

def _video_entry(self, video_renderer):

3775

video_id = video_renderer.get('videoId')

3776

if video_id:

3777

return self._extract_video(video_renderer)

3778

3779

def _post_thread_entries(self, post_thread_renderer):

3780

post_renderer = try_get(

3781

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3782

if not post_renderer:

3783

return

3784

# video attachment

3785

video_renderer = try_get(

3786

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3787

video_id = video_renderer.get('videoId')

3788

if video_id:

3789

entry = self._extract_video(video_renderer)

3790

if entry:

3791

yield entry

3792

# playlist attachment

3793

playlist_id = try_get(

3794

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3795

if playlist_id:

3796

yield self.url_result(

3797

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3798

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3799

# inline video links

3800

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3801

for run in runs:

3802

if not isinstance(run, dict):

3803

continue

3804

ep_url = try_get(

3805

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3806

if not ep_url:

3807

continue

3808

if not YoutubeIE.suitable(ep_url):

3809

continue

3810

ep_video_id = YoutubeIE._match_id(ep_url)

3811

if video_id == ep_video_id:

3812

continue

3813

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3814

3815

def _post_thread_continuation_entries(self, post_thread_continuation):

3816

contents = post_thread_continuation.get('contents')

3817

if not isinstance(contents, list):

3818

return

3819

for content in contents:

3820

renderer = content.get('backstagePostThreadRenderer')

3821

if not isinstance(renderer, dict):

3822

continue

3823

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3828

for content in contents:

3829

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3830

if video_renderer:

3831

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

3836

# continuation_list is modified in-place with continuation_list = [continuation_token]

3837

continuation_list[:] = [None]

3838

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3839

for content in contents:

3840

if not isinstance(content, dict):

3841

continue

3842

is_renderer = traverse_obj(

3843

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

3844

expected_type=dict)

3845

if not is_renderer:

3846

renderer = content.get('richItemRenderer')

3847

if renderer:

3848

for entry in self._rich_entries(renderer):

3849

yield entry

3850

continuation_list[0] = self._extract_continuation(parent_renderer)

3851

continue

3852

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3853

for isr_content in isr_contents:

3854

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3859

'gridRenderer': self._grid_entries,

3860

'shelfRenderer': lambda x: self._shelf_entries(x),

3861

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

3862

'backstagePostThreadRenderer': self._post_thread_entries,

3863

'videoRenderer': lambda x: [self._video_entry(x)],

3864

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

3865

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

3866

}

3867

for key, renderer in isr_content.items():

3868

if key not in known_renderers:

3869

continue

3870

for entry in known_renderers[key](renderer):

3871

if entry:

3872

yield entry

3873

continuation_list[0] = self._extract_continuation(renderer)

3874

break

3875

3876

if not continuation_list[0]:

3877

continuation_list[0] = self._extract_continuation(is_renderer)

3878

3879

if not continuation_list[0]:

3880

continuation_list[0] = self._extract_continuation(parent_renderer)

3881

3882

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

3883

continuation_list = [None]

3884

extract_entries = lambda x: self._extract_entries(x, continuation_list)

3885

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3890

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3891

for entry in extract_entries(parent_renderer):

3892

yield entry

3893

continuation = continuation_list[0]

3894

3895

for page_num in itertools.count(1):

3896

if not continuation:

3897

break

3898

headers = self.generate_api_headers(

3899

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

3900

response = self._extract_response(

3901

item_id='%s page %s' % (item_id, page_num),

3902

query=continuation, headers=headers, ytcfg=ytcfg,

3903

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

3908

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

3909

visitor_data = self._extract_visitor_data(response) or visitor_data

3910

3911

known_continuation_renderers = {

3912

'playlistVideoListContinuation': self._playlist_entries,

3913

'gridContinuation': self._grid_entries,

3914

'itemSectionContinuation': self._post_thread_continuation_entries,

3915

'sectionListContinuation': extract_entries, # for feeds

3916

}

3917

continuation_contents = try_get(

3918

response, lambda x: x['continuationContents'], dict) or {}

3919

continuation_renderer = None

3920

for key, value in continuation_contents.items():

3921

if key not in known_continuation_renderers:

3922

continue

3923

continuation_renderer = value

3924

continuation_list = [None]

3925

for entry in known_continuation_renderers[key](continuation_renderer):

3926

yield entry

3927

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

3928

break

3929

if continuation_renderer:

continue

known_renderers = {

'gridPlaylistRenderer': (self._grid_entries, 'items'),

3934

'gridVideoRenderer': (self._grid_entries, 'items'),

3935

'gridChannelRenderer': (self._grid_entries, 'items'),

3936

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

3937

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

3938

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

3939

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

3940

}

3941

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

3942

continuation_items = try_get(

3943

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

3944

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

3945

video_items_renderer = None

3946

for key, value in continuation_item.items():

3947

if key not in known_renderers:

3948

continue

3949

video_items_renderer = {known_renderers[key][1]: continuation_items}

3950

continuation_list = [None]

3951

for entry in known_renderers[key][0](video_items_renderer):

3952

yield entry

3953

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

3954

break

3955

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

3961

for tab in tabs:

3962

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

3963

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

3968

3969

@classmethod

3970

def _extract_uploader(cls, data):

3971

uploader = {}

3972

renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

3973

owner = try_get(

3974

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3975

if owner:

3976

uploader['uploader'] = owner.get('text')

3977

uploader['uploader_id'] = try_get(

3978

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3979

uploader['uploader_url'] = urljoin(

3980

'https://www.youtube.com/',

3981

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3982

return {k: v for k, v in uploader.items() if v is not None}

3983

3984

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

3985

playlist_id = title = description = channel_url = channel_name = channel_id = None

3986

tags = []

3987

3988

selected_tab = self._extract_selected_tab(tabs)

3989

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

3990

renderer = try_get(

3991

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3992

if renderer:

3993

channel_name = renderer.get('title')

3994

channel_url = renderer.get('channelUrl')

3995

channel_id = renderer.get('externalId')

3996

else:

3997

renderer = try_get(

3998

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3999

4000

if renderer:

4001

title = renderer.get('title')

4002

description = renderer.get('description', '')

4003

playlist_id = channel_id

4004

tags = renderer.get('keywords', '').split()

4005

4006

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4007

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4008

def _get_uncropped(url):

4009

return url_or_none((url or '').split('=')[0] + '=s0')

4010

4011

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4012

if avatar_thumbnails:

4013

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4014

if uncropped_avatar:

4015

avatar_thumbnails.append({

4016

'url': uncropped_avatar,

4017

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4022

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4023

for banner in channel_banners:

4024

banner['preference'] = -10

4025

4026

if channel_banners:

4027

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4028

if uncropped_banner:

4029

channel_banners.append({

4030

'url': uncropped_banner,

4031

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4036

primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail'))

4037

4038

if playlist_id is None:

4039

playlist_id = item_id

4040

4041

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4042

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4043

if title is None:

4044

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4045

title += format_field(selected_tab, 'title', ' - %s')

4046

title += format_field(selected_tab, 'expandedText', ' - %s')

4047

4048

metadata = {

4049

'playlist_id': playlist_id,

4050

'playlist_title': title,

4051

'playlist_description': description,

4052

'uploader': channel_name,

4053

'uploader_id': channel_id,

4054

'uploader_url': channel_url,

4055

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4056

'tags': tags,

4057

'view_count': self._get_count(playlist_stats, 1),

4058

'availability': self._extract_availability(data),

4059

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4060

'playlist_count': self._get_count(playlist_stats, 0),

4061

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4062

}

4063

if not channel_id:

4064

metadata.update(self._extract_uploader(data))

4065

metadata.update({

4066

'channel': metadata['uploader'],

4067

'channel_id': metadata['uploader_id'],

4068

'channel_url': metadata['uploader_url']})

4069

return self.playlist_result(

4070

self._entries(

4071

selected_tab, playlist_id, ytcfg,

4072

self._extract_account_syncid(ytcfg, data),

4073

self._extract_visitor_data(data, ytcfg)),

4074

**metadata)

4075

4076

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):

4077

first_id = last_id = response = None

4078

for page_num in itertools.count(1):

4079

videos = list(self._playlist_entries(playlist))

4080

if not videos:

4081

return

4082

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4083

if start >= len(videos):

4084

return

4085

for video in videos[start:]:

4086

if video['id'] == first_id:

4087

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

4088

return

4089

yield video

4090

first_id = first_id or videos[0]['id']

4091

last_id = videos[-1]['id']

4092

watch_endpoint = try_get(

4093

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4094

headers = self.generate_api_headers(

4095

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4096

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4097

query = {

4098

'playlistId': playlist_id,

4099

'videoId': watch_endpoint.get('videoId') or last_id,

4100

'index': watch_endpoint.get('index') or len(videos),

4101

'params': watch_endpoint.get('params') or 'OAE%3D'

4102

}

4103

response = self._extract_response(

4104

item_id='%s page %d' % (playlist_id, page_num),

4105

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4106

check_get_keys='contents'

4107

)

4108

playlist = try_get(

4109

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4110

4111

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4112

title = playlist.get('title') or try_get(

4113

data, lambda x: x['titleText']['simpleText'], compat_str)

4114

playlist_id = playlist.get('playlistId') or item_id

4115

4116

# Delegating everything except mix playlists to regular tab-based playlist URL

4117

playlist_url = urljoin(url, try_get(

4118

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4119

compat_str))

4120

if playlist_url and playlist_url != url:

4121

return self.url_result(

4122

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4123

video_title=title)

4124

4125

return self.playlist_result(

4126

self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),

4127

playlist_id=playlist_id, playlist_title=title)

4128

4129

def _extract_availability(self, data):

4130

"""

4131

Gets the availability of a given playlist/tab.

4132

Note: Unless YouTube tells us explicitly, we do not assume it is public

4133

@param data: response

4134

"""

4135

is_private = is_unlisted = None

4136

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4137

badge_labels = self._extract_badges(renderer)

4138

4139

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4140

privacy_dropdown_entries = try_get(

4141

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4142

for renderer_dict in privacy_dropdown_entries:

4143

is_selected = try_get(

4144

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4145

if not is_selected:

4146

continue

4147

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4148

if label:

4149

badge_labels.add(label.lower())

4150

break

4151

4152

for badge_label in badge_labels:

4153

if badge_label == 'unlisted':

4154

is_unlisted = True

4155

elif badge_label == 'private':

4156

is_private = True

4157

elif badge_label == 'public':

4158

is_unlisted = is_private = False

4159

return self._availability(is_private, False, False, False, is_unlisted)

4160

4161

@staticmethod

4162

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4163

sidebar_renderer = try_get(

4164

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4165

for item in sidebar_renderer:

4166

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4171

"""

4172

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4173

"""

4174

browse_id = params = None

4175

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4176

if not renderer:

4177

return

4178

menu_renderer = try_get(

4179

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4180

for menu_item in menu_renderer:

4181

if not isinstance(menu_item, dict):

4182

continue

4183

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4184

text = try_get(

4185

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4186

if not text or text.lower() != 'show unavailable videos':

4187

continue

4188

browse_endpoint = try_get(

4189

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4190

browse_id = browse_endpoint.get('browseId')

4191

params = browse_endpoint.get('params')

4192

break

4193

4194

headers = self.generate_api_headers(

4195

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4196

visitor_data=self._extract_visitor_data(data, ytcfg))

4197

query = {

4198

'params': params or 'wgYCCAA=',

4199

'browseId': browse_id or 'VL%s' % item_id

4200

}

4201

return self._extract_response(

4202

item_id=item_id, headers=headers, query=query,

4203

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4204

note='Downloading API JSON with unavailable videos')

4205

4206

def _extract_webpage(self, url, item_id, fatal=True):

4207

retries = self.get_param('extractor_retries', 3)

4208

count = -1

4209

webpage = data = last_error = None

4210

while count < retries:

4211

count += 1

4212

# Sometimes youtube returns a webpage with incomplete ytInitialData

4213

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4214

if last_error:

4215

self.report_warning('%s. Retrying ...' % last_error)

4216

try:

4217

webpage = self._download_webpage(

4218

url, item_id,

4219

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4220

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4221

except ExtractorError as e:

4222

if isinstance(e.cause, network_exceptions):

4223

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4224

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4234

except ExtractorError as e:

4235

if fatal:

4236

raise

4237

self.report_warning(error_to_compat_str(e))

4238

break

4239

4240

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4241

break

4242

4243

last_error = 'Incomplete yt initial data received'

4244

if count >= retries:

4245

if fatal:

4246

raise ExtractorError(last_error)

4247

self.report_warning(last_error)

break

return webpage, data

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4253

data = None

4254

if 'webpage' not in self._configuration_arg('skip'):

4255

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4256

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4257

# Reject webpage data if redirected to home page without explicitly requesting

4258

selected_tab = self._extract_selected_tab(traverse_obj(

4259

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4260

if (url != 'https://www.youtube.com/feed/recommended'

4261

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4262

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4263

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4264

if fatal:

4265

raise ExtractorError(msg, expected=True)

4266

self.report_warning(msg, only_once=True)

4267

if not data:

4268

if not ytcfg and self.is_authenticated:

4269

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

4270

if 'authcheck' not in self._configuration_arg('skip') and fatal:

4271

raise ExtractorError(

4272

msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'

4273

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4274

expected=True)

4275

self.report_warning(msg, only_once=True)

4276

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4277

return data, ytcfg

4278

4279

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4280

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4281

resolve_response = self._extract_response(

4282

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4283

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4284

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4285

for ep_key, ep in endpoints.items():

4286

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4287

if params:

4288

return self._extract_response(

4289

item_id=item_id, query=params, ep=ep, headers=headers,

4290

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4291

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4292

err_note = 'Failed to resolve url (does the playlist exist?)'

4293

if fatal:

4294

raise ExtractorError(err_note, expected=True)

4295

self.report_warning(err_note, item_id)

4296

4297

_SEARCH_PARAMS = None

4298

4299

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4300

data = {'query': query}

4301

if params is NO_DEFAULT:

4302

params = self._SEARCH_PARAMS

4303

if params:

4304

data['params'] = params

4305

4306

content_keys = (

4307

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4308

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4309

# ytmusic search

4310

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4311

('continuationContents', ),

4312

)

4313

check_get_keys = tuple(set(keys[0] for keys in content_keys))

4314

4315

continuation_list = [None]

4316

for page_num in itertools.count(1):

4317

data.update(continuation_list[0] or {})

4318

search = self._extract_response(

4319

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4320

default_client=default_client, check_get_keys=check_get_keys)

4321

slr_contents = traverse_obj(search, *content_keys)

4322

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4323

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4328

IE_DESC = 'YouTube Tabs'

4329

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4338

(?P<not_channel>

4339

feed/|hashtag/|

4340

(?:playlist|watch)\?.*?\blist=

4341

)|

4342

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4347

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4348

}

4349

IE_NAME = 'youtube:tab'

4350

4351

_TESTS = [{

4352

'note': 'playlists, multipage',

4353

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4354

'playlist_mincount': 94,

4355

'info_dict': {

4356

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4357

'title': 'Igor Kleiner - Playlists',

4358

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4359

'uploader': 'Igor Kleiner',

4360

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4361

'channel': 'Igor Kleiner',

4362

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4363

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4364

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4365

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4366

'channel_follower_count': int

4367

},

4368

}, {

4369

'note': 'playlists, multipage, different order',

4370

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4371

'playlist_mincount': 94,

4372

'info_dict': {

4373

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4374

'title': 'Igor Kleiner - Playlists',

4375

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4376

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4377

'uploader': 'Igor Kleiner',

4378

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4379

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4380

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4381

'channel': 'Igor Kleiner',

4382

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4383

'channel_follower_count': int

4384

},

4385

}, {

4386

'note': 'playlists, series',

4387

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4388

'playlist_mincount': 5,

4389

'info_dict': {

4390

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4391

'title': '3Blue1Brown - Playlists',

4392

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4393

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4394

'uploader': '3Blue1Brown',

4395

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4396

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4397

'channel': '3Blue1Brown',

4398

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4399

'tags': ['Mathematics'],

4400

'channel_follower_count': int

4401

},

4402

}, {

4403

'note': 'playlists, singlepage',

4404

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4405

'playlist_mincount': 4,

4406

'info_dict': {

4407

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4408

'title': 'ThirstForScience - Playlists',

4409

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4410

'uploader': 'ThirstForScience',

4411

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4412

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4413

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4414

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4415

'tags': 'count:13',

4416

'channel': 'ThirstForScience',

4417

'channel_follower_count': int

4418

}

4419

}, {

4420

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4421

'only_matching': True,

4422

}, {

4423

'note': 'basic, single video playlist',

4424

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4425

'info_dict': {

4426

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4427

'uploader': 'Sergey M.',

4428

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4429

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4434

'channel': 'Sergey M.',

4435

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4436

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4437

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4442

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4443

'info_dict': {

4444

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4445

'uploader': 'Sergey M.',

4446

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4447

'title': 'youtube-dl empty playlist',

4448

'tags': [],

4449

'channel': 'Sergey M.',

4450

'description': '',

4451

'modified_date': '20160902',

4452

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4453

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4454

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4460

'info_dict': {

4461

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4462

'title': 'lex will - Home',

4463

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4464

'uploader': 'lex will',

4465

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4466

'channel': 'lex will',

4467

'tags': ['bible', 'history', 'prophesy'],

4468

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4469

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4470

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4471

'channel_follower_count': int

4472

},

4473

'playlist_mincount': 2,

4474

}, {

4475

'note': 'Videos tab',

4476

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4477

'info_dict': {

4478

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4479

'title': 'lex will - Videos',

4480

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4481

'uploader': 'lex will',

4482

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4483

'tags': ['bible', 'history', 'prophesy'],

4484

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4485

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4486

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4487

'channel': 'lex will',

4488

'channel_follower_count': int

4489

},

4490

'playlist_mincount': 975,

4491

}, {

4492

'note': 'Videos tab, sorted by popular',

4493

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4494

'info_dict': {

4495

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4496

'title': 'lex will - Videos',

4497

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4498

'uploader': 'lex will',

4499

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4500

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4501

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4502

'channel': 'lex will',

4503

'tags': ['bible', 'history', 'prophesy'],

4504

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4505

'channel_follower_count': int

4506

},

4507

'playlist_mincount': 199,

4508

}, {

4509

'note': 'Playlists tab',

4510

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4511

'info_dict': {

4512

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4513

'title': 'lex will - Playlists',

4514

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4515

'uploader': 'lex will',

4516

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4517

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4518

'channel': 'lex will',

4519

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4520

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4521

'tags': ['bible', 'history', 'prophesy'],

4522

'channel_follower_count': int

4523

},

4524

'playlist_mincount': 17,

4525

}, {

4526

'note': 'Community tab',

4527

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4528

'info_dict': {

4529

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4530

'title': 'lex will - Community',

4531

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4532

'uploader': 'lex will',

4533

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4534

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4535

'channel': 'lex will',

4536

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4537

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4538

'tags': ['bible', 'history', 'prophesy'],

4539

'channel_follower_count': int

4540

},

4541

'playlist_mincount': 18,

4542

}, {

4543

'note': 'Channels tab',

4544

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4545

'info_dict': {

4546

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4547

'title': 'lex will - Channels',

4548

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4549

'uploader': 'lex will',

4550

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4551

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4552

'channel': 'lex will',

4553

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4554

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4555

'tags': ['bible', 'history', 'prophesy'],

4556

'channel_follower_count': int

4557

},

4558

'playlist_mincount': 12,

4559

}, {

4560

'note': 'Search tab',

4561

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4562

'playlist_mincount': 40,

4563

'info_dict': {

4564

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4565

'title': '3Blue1Brown - Search - linear algebra',

4566

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4567

'uploader': '3Blue1Brown',

4568

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4569

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4570

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4571

'tags': ['Mathematics'],

4572

'channel': '3Blue1Brown',

4573

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4574

'channel_follower_count': int

4575

},

4576

}, {

4577

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4578

'only_matching': True,

4579

}, {

4580

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4581

'only_matching': True,

4582

}, {

4583

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4584

'only_matching': True,

4585

}, {

4586

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4587

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4588

'info_dict': {

4589

'title': '29C3: Not my department',

4590

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4591

'uploader': 'Christiaan008',

4592

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4593

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4594

'tags': [],

4595

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4596

'view_count': int,

4597

'modified_date': '20150605',

4598

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4599

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4600

'channel': 'Christiaan008',

4601

},

4602

'playlist_count': 96,

4603

}, {

4604

'note': 'Large playlist',

4605

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4606

'info_dict': {

4607

'title': 'Uploads from Cauchemar',

4608

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4609

'uploader': 'Cauchemar',

4610

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4611

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4612

'tags': [],

4613

'modified_date': r're:\d{8}',

4614

'channel': 'Cauchemar',

4615

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4616

'view_count': int,

4617

'description': '',

4618

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4619

},

4620

'playlist_mincount': 1123,

4621

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4622

}, {

4623

'note': 'even larger playlist, 8832 videos',

4624

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4625

'only_matching': True,

4626

}, {

4627

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4628

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4629

'info_dict': {

4630

'title': 'Uploads from Interstellar Movie',

4631

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4632

'uploader': 'Interstellar Movie',

4633

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4634

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4635

'tags': [],

4636

'view_count': int,

4637

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4638

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4639

'channel': 'Interstellar Movie',

4640

'description': '',

4641

'modified_date': r're:\d{8}',

4642

},

4643

'playlist_mincount': 21,

4644

}, {

4645

'note': 'Playlist with "show unavailable videos" button',

4646

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4647

'info_dict': {

4648

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4649

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4650

'uploader': 'Phim Siêu Nhân Nhật Bản',

4651

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4652

'view_count': int,

4653

'channel': 'Phim Siêu Nhân Nhật Bản',

4654

'tags': [],

4655

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4656

'description': '',

4657

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4658

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4659

'modified_date': r're:\d{8}',

4660

},

4661

'playlist_mincount': 200,

4662

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4663

}, {

4664

'note': 'Playlist with unavailable videos in page 7',

4665

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4666

'info_dict': {

4667

'title': 'Uploads from BlankTV',

4668

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4669

'uploader': 'BlankTV',

4670

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4671

'channel': 'BlankTV',

4672

'channel_url': 'https://www.youtube.com/c/blanktv',

4673

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4674

'view_count': int,

4675

'tags': [],

4676

'uploader_url': 'https://www.youtube.com/c/blanktv',

4677

'modified_date': r're:\d{8}',

4678

'description': '',

4679

},

4680

'playlist_mincount': 1000,

4681

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4682

}, {

4683

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4684

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4685

'info_dict': {

4686

'title': 'Data Analysis with Dr Mike Pound',

4687

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4688

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4689

'uploader': 'Computerphile',

4690

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4691

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4692

'tags': [],

4693

'view_count': int,

4694

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4695

'channel_url': 'https://www.youtube.com/user/Computerphile',

4696

'channel': 'Computerphile',

4697

},

4698

'playlist_mincount': 11,

4699

}, {

4700

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4701

'only_matching': True,

4702

}, {

4703

'note': 'Playlist URL that does not actually serve a playlist',

4704

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4709

'uploader': 'STREEM',

4710

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4711

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4712

'upload_date': '20150526',

4713

'license': 'Standard YouTube License',

4714

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4715

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4722

},

4723

'skip': 'This video is not available.',

4724

'add_ie': [YoutubeIE.ie_key()],

4725

}, {

4726

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4727

'only_matching': True,

4728

}, {

4729

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4730

'only_matching': True,

4731

}, {

4732

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4733

'info_dict': {

4734

'id': 'GgL890LIznQ', # This will keep changing

4735

'ext': 'mp4',

4736

'title': str,

4737

'uploader': 'Sky News',

4738

'uploader_id': 'skynews',

4739

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4740

'upload_date': r're:\d{8}',

4741

'description': str,

4742

'categories': ['News & Politics'],

4743

'tags': list,

4744

'like_count': int,

4745

'release_timestamp': 1642502819,

4746

'channel': 'Sky News',

4747

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4748

'age_limit': 0,

4749

'view_count': int,

4750

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

4751

'playable_in_embed': True,

4752

'release_date': '20220118',

4753

'availability': 'public',

4754

'live_status': 'is_live',

4755

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4756

'channel_follower_count': int

4757

},

4758

'params': {

4759

'skip_download': True,

4760

},

4761

'expected_warnings': ['Ignoring subtitle tracks found in '],

4762

}, {

4763

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4768

'uploader': 'The Young Turks',

4769

'uploader_id': 'TheYoungTurks',

4770

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4771

'upload_date': '20150715',

4772

'license': 'Standard YouTube License',

4773

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4774

'categories': ['News & Politics'],

4775

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4780

},

4781

'only_matching': True,

4782

}, {

4783

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4784

'only_matching': True,

4785

}, {

4786

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4787

'only_matching': True,

4788

}, {

4789

'note': 'A channel that is not live. Should raise error',

4790

'url': 'https://www.youtube.com/user/numberphile/live',

4791

'only_matching': True,

4792

}, {

4793

'url': 'https://www.youtube.com/feed/trending',

4794

'only_matching': True,

4795

}, {

4796

'url': 'https://www.youtube.com/feed/library',

4797

'only_matching': True,

4798

}, {

4799

'url': 'https://www.youtube.com/feed/history',

4800

'only_matching': True,

4801

}, {

4802

'url': 'https://www.youtube.com/feed/subscriptions',

4803

'only_matching': True,

4804

}, {

4805

'url': 'https://www.youtube.com/feed/watch_later',

4806

'only_matching': True,

4807

}, {

4808

'note': 'Recommended - redirects to home page.',

4809

'url': 'https://www.youtube.com/feed/recommended',

4810

'only_matching': True,

4811

}, {

4812

'note': 'inline playlist with not always working continuations',

4813

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

4814

'only_matching': True,

4815

}, {

4816

'url': 'https://www.youtube.com/course',

4817

'only_matching': True,

4818

}, {

4819

'url': 'https://www.youtube.com/zsecurity',

4820

'only_matching': True,

4821

}, {

4822

'url': 'http://www.youtube.com/NASAgovVideo/videos',

4823

'only_matching': True,

4824

}, {

4825

'url': 'https://www.youtube.com/TheYoungTurks/live',

4826

'only_matching': True,

4827

}, {

4828

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

4835

}, {

4836

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

4837

'only_matching': True,

4838

}, {

4839

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

4840

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4841

'only_matching': True

4842

}, {

4843

'note': '/browse/ should redirect to /channel/',

4844

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

4845

'only_matching': True

4846

}, {

4847

'note': 'VLPL, should redirect to playlist?list=PL...',

4848

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4849

'info_dict': {

4850

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4851

'uploader': 'NoCopyrightSounds',

4852

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

4853

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4854

'title': 'NCS Releases',

4855

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4856

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4857

'modified_date': r're:\d{8}',

4858

'view_count': int,

4859

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4860

'tags': [],

4861

'channel': 'NoCopyrightSounds',

4862

},

4863

'playlist_mincount': 166,

4864

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4865

}, {

4866

'note': 'Topic, should redirect to playlist?list=UU...',

4867

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4868

'info_dict': {

4869

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4870

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4871

'title': 'Uploads from Royalty Free Music - Topic',

4872

'uploader': 'Royalty Free Music - Topic',

4873

'tags': [],

4874

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4875

'channel': 'Royalty Free Music - Topic',

4876

'view_count': int,

4877

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4878

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4879

'modified_date': r're:\d{8}',

4880

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4881

'description': '',

4882

},

4883

'expected_warnings': [

4884

'The URL does not have a videos tab',

4885

r'[Uu]navailable videos (are|will be) hidden',

4886

],

4887

'playlist_mincount': 101,

4888

}, {

4889

'note': 'Topic without a UU playlist',

4890

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

4891

'info_dict': {

4892

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

4893

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

4894

'tags': [],

4895

},

4896

'expected_warnings': [

4897

'the playlist redirect gave error',

4898

],

4899

'playlist_mincount': 9,

4900

}, {

4901

'note': 'Youtube music Album',

4902

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

4903

'info_dict': {

4904

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

4905

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

4910

'modified_date': r're:\d{8}',

4911

},

4912

'playlist_count': 50,

4913

}, {

4914

'note': 'unlisted single video playlist',

4915

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4916

'info_dict': {

4917

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4918

'uploader': 'colethedj',

4919

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4920

'title': 'yt-dlp unlisted playlist test',

4921

'availability': 'unlisted',

4922

'tags': [],

4923

'modified_date': '20211208',

4924

'channel': 'colethedj',

4925

'view_count': int,

4926

'description': '',

4927

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

4928

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4929

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

4934

'url': 'https://www.youtube.com/feed/recommended',

4935

'info_dict': {

4936

'id': 'recommended',

4937

'title': 'recommended',

4938

'tags': [],

4939

},

4940

'playlist_mincount': 50,

4941

'params': {

4942

'skip_download': True,

4943

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4944

},

4945

}, {

4946

'note': 'API Fallback: /videos tab, sorted by oldest first',

4947

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

4948

'info_dict': {

4949

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4950

'title': 'Cody\'sLab - Videos',

4951

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

4952

'uploader': 'Cody\'sLab',

4953

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4954

'channel': 'Cody\'sLab',

4955

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4956

'tags': [],

4957

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4958

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4959

'channel_follower_count': int

4960

},

4961

'playlist_mincount': 650,

4962

'params': {

4963

'skip_download': True,

4964

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4965

},

4966

}, {

4967

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

4968

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4969

'info_dict': {

4970

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4971

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4972

'title': 'Uploads from Royalty Free Music - Topic',

4973

'uploader': 'Royalty Free Music - Topic',

4974

'modified_date': r're:\d{8}',

4975

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4976

'description': '',

4977

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4978

'tags': [],

4979

'channel': 'Royalty Free Music - Topic',

4980

'view_count': int,

4981

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4982

},

4983

'expected_warnings': [

4984

'does not have a videos tab',

4985

r'[Uu]navailable videos (are|will be) hidden',

4986

],

4987

'playlist_mincount': 101,

4988

'params': {

4989

'skip_download': True,

4990

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4991

},

4992

}, {

4993

'note': 'non-standard redirect to regional channel',

4994

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

4995

'only_matching': True

}]

@classmethod

def suitable(cls, url):

5000

return False if YoutubeIE.suitable(url) else super(

5001

YoutubeTabIE, cls).suitable(url)

5002

5003

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5004

5005

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5006

def _real_extract(self, url, smuggled_data):

5007

item_id = self._match_id(url)

5008

url = compat_urlparse.urlunparse(

5009

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5010

compat_opts = self.get_param('compat_opts', [])

5011

5012

def get_mobj(url):

5013

mobj = self._URL_RE.match(url).groupdict()

5014

mobj.update((k, '') for k, v in mobj.items() if v is None)

5015

return mobj

5016

5017

mobj, redirect_warning = get_mobj(url), None

5018

# Youtube returns incomplete data if tabname is not lower case

5019

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5020

if is_channel:

5021

if smuggled_data.get('is_music_url'):

5022

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5023

item_id = item_id[2:]

5024

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5025

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5026

mdata = self._extract_tab_endpoint(

5027

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5028

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5029

get_all=False, expected_type=compat_str)

5030

if not murl:

5031

raise ExtractorError('Failed to resolve album to playlist')

5032

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5033

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5034

pre = f'https://www.youtube.com/channel/{item_id}'

5035

5036

original_tab_name = tab

5037

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5038

# Home URLs should redirect to /videos/

5039

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5040

'To download only the videos in the home page, add a "/featured" to the URL')

5041

tab = '/videos'

5042

5043

url = ''.join((pre, tab, post))

5044

mobj = get_mobj(url)

5045

5046

# Handle both video/playlist URLs

5047

qs = parse_qs(url)

5048

video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]

5049

5050

if not video_id and mobj['not_channel'].startswith('watch'):

5051

if not playlist_id:

5052

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5053

raise ExtractorError('Unable to recognize tab page')

5054

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5055

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5056

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5057

mobj = get_mobj(url)

5058

5059

if video_id and playlist_id:

5060

if self.get_param('noplaylist'):

5061

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5062

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5063

ie=YoutubeIE.ie_key(), video_id=video_id)

5064

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5065

5066

data, ytcfg = self._extract_data(url, item_id)

5067

5068

# YouTube may provide a non-standard redirect to the regional channel

5069

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5070

redirect_url = traverse_obj(

5071

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5072

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5073

redirect_url = ''.join((

5074

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5075

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5076

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5077

5078

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5079

if tabs:

5080

selected_tab = self._extract_selected_tab(tabs)

5081

selected_tab_name = selected_tab.get('title', '').lower()

5082

if selected_tab_name == 'home':

5083

selected_tab_name = 'featured'

5084

requested_tab_name = mobj['tab'][1:]

5085

if 'no-youtube-channel-redirect' not in compat_opts:

5086

if requested_tab_name == 'live':

5087

# Live tab should have redirected to the video

5088

raise ExtractorError('The channel is not currently live', expected=True)

5089

if requested_tab_name not in ('', selected_tab_name):

5090

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5091

if not original_tab_name:

5092

if item_id[:2] == 'UC':

5093

# Topic channels don't have /videos. Use the equivalent playlist instead

5094

pl_id = f'UU{item_id[2:]}'

5095

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5096

try:

5097

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5098

except ExtractorError:

5099

redirect_warning += ' and the playlist redirect gave error'

5100

else:

5101

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5102

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5103

if selected_tab_name and selected_tab_name != requested_tab_name:

5104

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5105

else:

5106

raise ExtractorError(redirect_warning, expected=True)

5107

5108

if redirect_warning:

5109

self.to_screen(redirect_warning)

5110

self.write_debug(f'Final URL: {url}')

5111

5112

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5113

if 'no-youtube-unavailable-videos' not in compat_opts:

5114

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5115

self._extract_and_report_alerts(data, only_once=True)

5116

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5117

if tabs:

5118

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5119

5120

playlist = traverse_obj(

5121

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5122

if playlist:

5123

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5124

5125

video_id = traverse_obj(

5126

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5127

if video_id:

5128

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5129

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5130

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5131

ie=YoutubeIE.ie_key(), video_id=video_id)

5132

5133

raise ExtractorError('Unable to recognize tab page')

5134

5135

5136

class YoutubePlaylistIE(InfoExtractor):

5137

IE_DESC = 'YouTube playlists'

5138

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5149

)''' % {

5150

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5151

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5152

}

5153

IE_NAME = 'youtube:playlist'

5154

_TESTS = [{

5155

'note': 'issue #673',

5156

'url': 'PLBB231211A4F62143',

5157

'info_dict': {

5158

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5159

'id': 'PLBB231211A4F62143',

5160

'uploader': 'Wickman',

5161

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5162

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5163

'view_count': int,

5164

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5165

'modified_date': r're:\d{8}',

5166

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5167

'channel': 'Wickman',

5168

'tags': [],

5169

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5170

},

5171

'playlist_mincount': 29,

5172

}, {

5173

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5174

'info_dict': {

5175

'title': 'YDL_safe_search',

5176

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5177

},

5178

'playlist_count': 2,

5179

'skip': 'This playlist is private',

5180

}, {

5181

'note': 'embedded',

5182

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5187

'uploader': 'milan',

5188

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5189

'description': '',

5190

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5191

'tags': [],

5192

'modified_date': '20140919',

5193

'view_count': int,

5194

'channel': 'milan',

5195

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5196

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5197

},

5198

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5199

}, {

5200

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5201

'playlist_mincount': 654,

5202

'info_dict': {

5203

'title': '2018 Chinese New Singles (11/6 updated)',

5204

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5205

'uploader': 'LBK',

5206

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5207

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5208

'channel': 'LBK',

5209

'view_count': int,

5210

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5211

'tags': [],

5212

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5213

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5214

'modified_date': r're:\d{8}',

5215

},

5216

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5217

}, {

5218

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5219

'only_matching': True,

5220

}, {

5221

# music album playlist

5222

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5223

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5228

if YoutubeTabIE.suitable(url):

5229

return False

5230

from ..utils import parse_qs

5231

qs = parse_qs(url)

5232

if qs.get('v', [None])[0]:

5233

return False

5234

return super(YoutubePlaylistIE, cls).suitable(url)

5235

5236

def _real_extract(self, url):

5237

playlist_id = self._match_id(url)

5238

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5239

url = update_url_query(

5240

'https://www.youtube.com/playlist',

5241

parse_qs(url) or {'list': playlist_id})

5242

if is_music_url:

5243

url = smuggle_url(url, {'is_music_url': True})

5244

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5245

5246

5247

class YoutubeYtBeIE(InfoExtractor):

5248

IE_DESC = 'youtu.be'

5249

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5250

_TESTS = [{

5251

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5256

'uploader': 'Backus-Page House Museum',

5257

'uploader_id': 'backuspagemuseum',

5258

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5259

'upload_date': '20161008',

5260

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5261

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5266

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5267

'channel': 'Backus-Page House Museum',

5268

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5269

'live_status': 'not_live',

5270

'view_count': int,

5271

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5272

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5278

},

5279

}, {

5280

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5281

'only_matching': True,

5282

}]

5283

5284

def _real_extract(self, url):

5285

mobj = self._match_valid_url(url)

5286

video_id = mobj.group('id')

5287

playlist_id = mobj.group('playlist_id')

5288

return self.url_result(

5289

update_url_query('https://www.youtube.com/watch', {

5290

'v': video_id,

5291

'list': playlist_id,

5292

'feature': 'youtu.be',

5293

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5294

5295

5296

class YoutubeLivestreamEmbedIE(InfoExtractor):

5297

IE_DESC = 'YouTube livestream embeds'

5298

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5299

_TESTS = [{

5300

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5301

'only_matching': True,

5302

}]

5303

5304

def _real_extract(self, url):

5305

channel_id = self._match_id(url)

5306

return self.url_result(

5307

f'https://www.youtube.com/channel/{channel_id}/live',

5308

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5309

5310

5311

class YoutubeYtUserIE(InfoExtractor):

5312

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5313

IE_NAME = 'youtube:user'

5314

_VALID_URL = r'ytuser:(?P<id>.+)'

5315

_TESTS = [{

5316

'url': 'ytuser:phihag',

5317

'only_matching': True,

5318

}]

5319

5320

def _real_extract(self, url):

5321

user_id = self._match_id(url)

5322

return self.url_result(

5323

'https://www.youtube.com/user/%s/videos' % user_id,

5324

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5325

5326

5327

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5328

IE_NAME = 'youtube:favorites'

5329

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5330

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5331

_LOGIN_REQUIRED = True

5332

_TESTS = [{

5333

'url': ':ytfav',

5334

'only_matching': True,

5335

}, {

5336

'url': ':ytfavorites',

5337

'only_matching': True,

5338

}]

5339

5340

def _real_extract(self, url):

5341

return self.url_result(

5342

'https://www.youtube.com/playlist?list=LL',

5343

ie=YoutubeTabIE.ie_key())

5344

5345

5346

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5347

IE_DESC = 'YouTube search'

5348

IE_NAME = 'youtube:search'

5349

_SEARCH_KEY = 'ytsearch'

5350

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5351

_TESTS = [{

5352

'url': 'ytsearch5:youtube-dl test video',

5353

'playlist_count': 5,

5354

'info_dict': {

5355

'id': 'youtube-dl test video',

5356

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5362

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5363

_SEARCH_KEY = 'ytsearchdate'

5364

IE_DESC = 'YouTube search, newest videos first'

5365

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5366

_TESTS = [{

5367

'url': 'ytsearchdate5:youtube-dl test video',

5368

'playlist_count': 5,

5369

'info_dict': {

5370

'id': 'youtube-dl test video',

5371

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5377

IE_DESC = 'YouTube search URLs with sorting and filter support'

5378

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5379

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5380

_TESTS = [{

5381

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5382

'playlist_mincount': 5,

5383

'info_dict': {

5384

'id': 'youtube-dl test video',

5385

'title': 'youtube-dl test video',

5386

}

5387

}, {

5388

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5389

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5397

'only_matching': True,

5398

}]

5399

5400

def _real_extract(self, url):

5401

qs = parse_qs(url)

5402

query = (qs.get('search_query') or qs.get('q'))[0]

5403

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5404

5405

5406

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5407

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5408

IE_NAME = 'youtube:music:search_url'

5409

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5410

_TESTS = [{

5411

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5412

'playlist_count': 16,

5413

'info_dict': {

5414

'id': 'royalty free music',

5415

'title': 'royalty free music',

5416

}

5417

}, {

5418

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5419

'playlist_mincount': 30,

5420

'info_dict': {

5421

'id': 'royalty free music - songs',

5422

'title': 'royalty free music - songs',

5423

},

5424

'params': {'extract_flat': 'in_playlist'}

5425

}, {

5426

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5427

'playlist_mincount': 30,

5428

'info_dict': {

5429

'id': 'royalty free music - community playlists',

5430

'title': 'royalty free music - community playlists',

5431

},

5432

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5437

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5438

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5439

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5440

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5441

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5442

}

5443

5444

def _real_extract(self, url):

5445

qs = parse_qs(url)

5446

query = (qs.get('search_query') or qs.get('q'))[0]

5447

params = qs.get('sp', (None,))[0]

5448

if params:

5449

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5450

else:

5451

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5452

params = self._SECTIONS.get(section)

5453

if not params:

5454

section = None

5455

title = join_nonempty(query, section, delim=' - ')

5456

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5457

5458

5459

class YoutubeFeedsInfoExtractor(InfoExtractor):

5460

"""

5461

Base class for feed extractors

5462

Subclasses must define the _FEED_NAME property.

5463

"""

5464

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5470

5471

def _real_extract(self, url):

5472

return self.url_result(

5473

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5474

5475

5476

class YoutubeWatchLaterIE(InfoExtractor):

5477

IE_NAME = 'youtube:watchlater'

5478

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5479

_VALID_URL = r':ytwatchlater'

5480

_TESTS = [{

5481

'url': ':ytwatchlater',

5482

'only_matching': True,

5483

}]

5484

5485

def _real_extract(self, url):

5486

return self.url_result(

5487

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5488

5489

5490

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5491

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5492

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5493

_FEED_NAME = 'recommended'

5494

_LOGIN_REQUIRED = False

5495

_TESTS = [{

5496

'url': ':ytrec',

5497

'only_matching': True,

5498

}, {

5499

'url': ':ytrecommended',

5500

'only_matching': True,

5501

}, {

5502

'url': 'https://youtube.com',

5503

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5508

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5509

_VALID_URL = r':ytsub(?:scription)?s?'

5510

_FEED_NAME = 'subscriptions'

5511

_TESTS = [{

5512

'url': ':ytsubs',

5513

'only_matching': True,

5514

}, {

5515

'url': ':ytsubscriptions',

5516

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5521

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5522

_VALID_URL = r':ythis(?:tory)?'

5523

_FEED_NAME = 'history'

5524

_TESTS = [{

5525

'url': ':ythistory',

5526

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

5531

IE_NAME = 'youtube:truncated_url'

5532

IE_DESC = False # Do not list

5533

_VALID_URL = r'''(?x)

5534

(?:https?://)?

5535

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5536

(?:watch\?(?:

5537

feature=[a-z_]+|

5538

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5551

'only_matching': True,

5552

}, {

5553

'url': 'https://www.youtube.com/watch?',

5554

'only_matching': True,

5555

}, {

5556

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5557

'only_matching': True,

5558

}, {

5559

'url': 'https://www.youtube.com/watch?feature=foo',

5560

'only_matching': True,

5561

}, {

5562

'url': 'https://www.youtube.com/watch?hl=en-GB',

5563

'only_matching': True,

5564

}, {

5565

'url': 'https://www.youtube.com/watch?t=2372',

5566

'only_matching': True,

5567

}]

5568

5569

def _real_extract(self, url):

5570

raise ExtractorError(

5571

'Did you forget to quote the URL? Remember that & is a meta '

5572

'character in most shells, so you want to put the URL in quotes, '

5573

'like youtube-dl '

5574

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5575

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5580

IE_NAME = 'youtube:clip'

5581

IE_DESC = False # Do not list

5582

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5583

5584

def _real_extract(self, url):

5585

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5586

return self.url_result(url, 'Generic')

5587

5588

5589

class YoutubeTruncatedIDIE(InfoExtractor):

5590

IE_NAME = 'youtube:truncated_id'

5591

IE_DESC = False # Do not list

5592

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5593

5594

_TESTS = [{

5595

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5596

'only_matching': True,

5597

}]

5598

5599

def _real_extract(self, url):

5600

video_id = self._match_id(url)

5601

raise ExtractorError(

5602

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

5603

expected=True)