jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import calendar
	6	import copy
	7	import datetime
	8	import functools
	9	import hashlib
	10	import itertools
	11	import json
	12	import math
	13	import os.path
	14	import random
	15	import re
	16	import sys
	17	import time
	18	import traceback
	19	import threading
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from ..compat import (
	23	compat_chr,
	24	compat_HTTPError,
	25	compat_parse_qs,
	26	compat_str,
	27	compat_urllib_parse_unquote_plus,
	28	compat_urllib_parse_urlencode,
	29	compat_urllib_parse_urlparse,
	30	compat_urlparse,
	31	)
	32	from ..jsinterp import JSInterpreter
	33	from ..utils import (
	34	bug_reports_message,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	ExtractorError,
	40	float_or_none,
	41	format_field,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	js_to_json,
	46	mimetype2ext,
	47	network_exceptions,
	48	NO_DEFAULT,
	49	orderedSet,
	50	parse_codecs,
	51	parse_count,
	52	parse_duration,
	53	parse_iso8601,
	54	parse_qs,
	55	qualities,
	56	remove_end,
	57	remove_start,
	58	smuggle_url,
	59	str_or_none,
	60	str_to_int,
	61	strftime_or_none,
	62	traverse_obj,
	63	try_get,
	64	unescapeHTML,
	65	unified_strdate,
	66	unified_timestamp,
	67	unsmuggle_url,
	68	update_url_query,
	69	url_or_none,
	70	urljoin,
	71	variadic,
	72	)
	73
	74
	75	def get_first(obj, keys, **kwargs):
	76	return traverse_obj(obj, (..., variadic(keys)), *kwargs, get_all=False)
	77
	78
	79	# any clients starting with _ cannot be explicity requested by the user
	80	INNERTUBE_CLIENTS = {
	81	'web': {
	82	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	83	'INNERTUBE_CONTEXT': {
	84	'client': {
	85	'clientName': 'WEB',
	86	'clientVersion': '2.20211221.00.00',
	87	}
	88	},
	89	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	90	},
	91	'web_embedded': {
	92	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	93	'INNERTUBE_CONTEXT': {
	94	'client': {
	95	'clientName': 'WEB_EMBEDDED_PLAYER',
	96	'clientVersion': '1.20211215.00.01',
	97	},
	98	},
	99	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	100	},
	101	'web_music': {
	102	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	103	'INNERTUBE_HOST': 'music.youtube.com',
	104	'INNERTUBE_CONTEXT': {
	105	'client': {
	106	'clientName': 'WEB_REMIX',
	107	'clientVersion': '1.20211213.00.00',
	108	}
	109	},
	110	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	111	},
	112	'web_creator': {
	113	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	114	'INNERTUBE_CONTEXT': {
	115	'client': {
	116	'clientName': 'WEB_CREATOR',
	117	'clientVersion': '1.20211220.02.00',
	118	}
	119	},
	120	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	121	},
	122	'android': {
	123	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	124	'INNERTUBE_CONTEXT': {
	125	'client': {
	126	'clientName': 'ANDROID',
	127	'clientVersion': '16.49',
	128	}
	129	},
	130	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	131	'REQUIRE_JS_PLAYER': False
	132	},
	133	'android_embedded': {
	134	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	135	'INNERTUBE_CONTEXT': {
	136	'client': {
	137	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	138	'clientVersion': '16.49',
	139	},
	140	},
	141	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	142	'REQUIRE_JS_PLAYER': False
	143	},
	144	'android_music': {
	145	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	146	'INNERTUBE_CONTEXT': {
	147	'client': {
	148	'clientName': 'ANDROID_MUSIC',
	149	'clientVersion': '4.57',
	150	}
	151	},
	152	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	153	'REQUIRE_JS_PLAYER': False
	154	},
	155	'android_creator': {
	156	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	157	'INNERTUBE_CONTEXT': {
	158	'client': {
	159	'clientName': 'ANDROID_CREATOR',
	160	'clientVersion': '21.47',
	161	},
	162	},
	163	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	164	'REQUIRE_JS_PLAYER': False
	165	},
	166	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	167	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	168	'ios': {
	169	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	170	'INNERTUBE_CONTEXT': {
	171	'client': {
	172	'clientName': 'IOS',
	173	'clientVersion': '16.46',
	174	'deviceModel': 'iPhone14,3',
	175	}
	176	},
	177	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	178	'REQUIRE_JS_PLAYER': False
	179	},
	180	'ios_embedded': {
	181	'INNERTUBE_CONTEXT': {
	182	'client': {
	183	'clientName': 'IOS_MESSAGES_EXTENSION',
	184	'clientVersion': '16.46',
	185	'deviceModel': 'iPhone14,3',
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_music': {
	192	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	193	'INNERTUBE_CONTEXT': {
	194	'client': {
	195	'clientName': 'IOS_MUSIC',
	196	'clientVersion': '4.57',
	197	},
	198	},
	199	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	200	'REQUIRE_JS_PLAYER': False
	201	},
	202	'ios_creator': {
	203	'INNERTUBE_CONTEXT': {
	204	'client': {
	205	'clientName': 'IOS_CREATOR',
	206	'clientVersion': '21.47',
	207	},
	208	},
	209	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	210	'REQUIRE_JS_PLAYER': False
	211	},
	212	# mweb has 'ultralow' formats
	213	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	214	'mweb': {
	215	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	216	'INNERTUBE_CONTEXT': {
	217	'client': {
	218	'clientName': 'MWEB',
	219	'clientVersion': '2.20211221.01.00',
	220	}
	221	},
	222	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	223	}
	224	}
	225
	226
	227	def build_innertube_clients():
	228	THIRD_PARTY = {
	229	'embedUrl': 'https://google.com', # Can be any valid URL
	230	}
	231	BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')
	232	priority = qualities(BASE_CLIENTS[::-1])
	233
	234	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	235	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	236	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	237	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	238	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	239
	240	base_client, *variant = client.split('_')
	241	ytcfg['priority'] = 10 * priority(base_client)
	242
	243	if not variant:
	244	INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
	245	agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	246	agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	247	agegate_ytcfg['priority'] -= 1
	248	elif variant == ['embedded']:
	249	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	250	ytcfg['priority'] -= 2
	251	else:
	252	ytcfg['priority'] -= 3
	253
	254
	255	build_innertube_clients()
	256
	257
	258	class YoutubeBaseInfoExtractor(InfoExtractor):
	259	"""Provide base functions for Youtube extractors"""
	260
	261	_RESERVED_NAMES = (
	262	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	263	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	264	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	265	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	266
	267	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	268
	269	_NETRC_MACHINE = 'youtube'
	270
	271	# If True it will raise an error if no login info is provided
	272	_LOGIN_REQUIRED = False
	273
	274	_INVIDIOUS_SITES = (
	275	# invidious-redirect websites
	276	r'(?:www\.)?redirect\.invidious\.io',
	277	r'(?:(?:www\|dev)\.)?invidio\.us',
	278	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
	279	r'(?:www\.)?invidious\.pussthecat\.org',
	280	r'(?:www\.)?invidious\.zee\.li',
	281	r'(?:www\.)?invidious\.ethibox\.fr',
	282	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	283	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	284	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	285	# youtube-dl invidious instances list
	286	r'(?:(?:www\|no)\.)?invidiou\.sh',
	287	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	288	r'(?:www\.)?invidious\.kabi\.tk',
	289	r'(?:www\.)?invidious\.mastodon\.host',
	290	r'(?:www\.)?invidious\.zapashcanon\.fr',
	291	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	292	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	293	r'(?:www\.)?invidious\.himiko\.cloud',
	294	r'(?:www\.)?invidious\.reallyancient\.tech',
	295	r'(?:www\.)?invidious\.tube',
	296	r'(?:www\.)?invidiou\.site',
	297	r'(?:www\.)?invidious\.site',
	298	r'(?:www\.)?invidious\.xyz',
	299	r'(?:www\.)?invidious\.nixnet\.xyz',
	300	r'(?:www\.)?invidious\.048596\.xyz',
	301	r'(?:www\.)?invidious\.drycat\.fr',
	302	r'(?:www\.)?inv\.skyn3t\.in',
	303	r'(?:www\.)?tube\.poal\.co',
	304	r'(?:www\.)?tube\.connect\.cafe',
	305	r'(?:www\.)?vid\.wxzm\.sx',
	306	r'(?:www\.)?vid\.mint\.lgbt',
	307	r'(?:www\.)?vid\.puffyan\.us',
	308	r'(?:www\.)?yewtu\.be',
	309	r'(?:www\.)?yt\.elukerio\.org',
	310	r'(?:www\.)?yt\.lelux\.fi',
	311	r'(?:www\.)?invidious\.ggc-project\.de',
	312	r'(?:www\.)?yt\.maisputain\.ovh',
	313	r'(?:www\.)?ytprivate\.com',
	314	r'(?:www\.)?invidious\.13ad\.de',
	315	r'(?:www\.)?invidious\.toot\.koeln',
	316	r'(?:www\.)?invidious\.fdn\.fr',
	317	r'(?:www\.)?watch\.nettohikari\.com',
	318	r'(?:www\.)?invidious\.namazso\.eu',
	319	r'(?:www\.)?invidious\.silkky\.cloud',
	320	r'(?:www\.)?invidious\.exonip\.de',
	321	r'(?:www\.)?invidious\.riverside\.rocks',
	322	r'(?:www\.)?invidious\.blamefran\.net',
	323	r'(?:www\.)?invidious\.moomoo\.de',
	324	r'(?:www\.)?ytb\.trom\.tf',
	325	r'(?:www\.)?yt\.cyberhost\.uk',
	326	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	327	r'(?:www\.)?qklhadlycap4cnod\.onion',
	328	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	329	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	330	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	331	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	332	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	333	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	334	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	335	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	336	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	337	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	338	)
	339
	340	def _login(self):
	341	"""
	342	Attempt to log in to YouTube.
	343	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	344	"""
	345
	346	if (self._LOGIN_REQUIRED
	347	and self.get_param('cookiefile') is None
	348	and self.get_param('cookiesfrombrowser') is None):
	349	self.raise_login_required(
	350	'Login details are needed to download this content', method='cookies')
	351	username, password = self._get_login_info()
	352	if username:
	353	self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
	354
	355	def _initialize_consent(self):
	356	cookies = self._get_cookies('https://www.youtube.com/')
	357	if cookies.get('__Secure-3PSID'):
	358	return
	359	consent_id = None
	360	consent = cookies.get('CONSENT')
	361	if consent:
	362	if 'YES' in consent.value:
	363	return
	364	consent_id = self._search_regex(
	365	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	366	if not consent_id:
	367	consent_id = random.randint(100, 999)
	368	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	369
	370	def _initialize_pref(self):
	371	cookies = self._get_cookies('https://www.youtube.com/')
	372	pref_cookie = cookies.get('PREF')
	373	pref = {}
	374	if pref_cookie:
	375	try:
	376	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	377	except ValueError:
	378	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	379	pref.update({'hl': 'en', 'tz': 'UTC'})
	380	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	381
	382	def _real_initialize(self):
	383	self._initialize_pref()
	384	self._initialize_consent()
	385	self._login()
	386
	387	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	388	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	389	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	390
	391	def _get_default_ytcfg(self, client='web'):
	392	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	393
	394	def _get_innertube_host(self, client='web'):
	395	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	396
	397	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	398	# try_get but with fallback to default ytcfg client values when present
	399	_func = lambda y: try_get(y, getter, expected_type)
	400	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	401
	402	def _extract_client_name(self, ytcfg, default_client='web'):
	403	return self._ytcfg_get_safe(
	404	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	405	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	406
	407	def _extract_client_version(self, ytcfg, default_client='web'):
	408	return self._ytcfg_get_safe(
	409	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	410	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	411
	412	def _extract_api_key(self, ytcfg=None, default_client='web'):
	413	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	414
	415	def _extract_context(self, ytcfg=None, default_client='web'):
	416	context = get_first(
	417	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	418	# Enforce language and tz for extraction
	419	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	420	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	421	return context
	422
	423	_SAPISID = None
	424
	425	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	426	time_now = round(time.time())
	427	if self._SAPISID is None:
	428	yt_cookies = self._get_cookies('https://www.youtube.com')
	429	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	430	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	431	sapisid_cookie = dict_get(
	432	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	433	if sapisid_cookie and sapisid_cookie.value:
	434	self._SAPISID = sapisid_cookie.value
	435	self.write_debug('Extracted SAPISID cookie')
	436	# SAPISID cookie is required if not already present
	437	if not yt_cookies.get('SAPISID'):
	438	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	439	self._set_cookie(
	440	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	441	else:
	442	self._SAPISID = False
	443	if not self._SAPISID:
	444	return None
	445	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	446	sapisidhash = hashlib.sha1(
	447	f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
	448	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	449
	450	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	451	note='Downloading API JSON', errnote='Unable to download API page',
	452	context=None, api_key=None, api_hostname=None, default_client='web'):
	453
	454	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	455	data.update(query)
	456	real_headers = self.generate_api_headers(default_client=default_client)
	457	real_headers.update({'content-type': 'application/json'})
	458	if headers:
	459	real_headers.update(headers)
	460	return self._download_json(
	461	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	462	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	463	data=json.dumps(data).encode('utf8'), headers=real_headers,
	464	query={'key': api_key or self._extract_api_key()})
	465
	466	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	467	data = self._search_regex(
	468	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	469	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	470	if data:
	471	return self._parse_json(data, item_id, fatal=fatal)
	472
	473	@staticmethod
	474	def _extract_session_index(*data):
	475	"""
	476	Index of current account in account list.
	477	See: https://github.com/yt-dlp/yt-dlp/pull/519
	478	"""
	479	for ytcfg in data:
	480	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	481	if session_index is not None:
	482	return session_index
	483
	484	# Deprecated?
	485	def _extract_identity_token(self, ytcfg=None, webpage=None):
	486	if ytcfg:
	487	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	488	if token:
	489	return token
	490	if webpage:
	491	return self._search_regex(
	492	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	493	'identity token', default=None, fatal=False)
	494
	495	@staticmethod
	496	def _extract_account_syncid(*args):
	497	"""
	498	Extract syncId required to download private playlists of secondary channels
	499	@params response and/or ytcfg
	500	"""

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import time

import traceback

import threading

from .common import InfoExtractor, SearchInfoExtractor

22

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

28

compat_urllib_parse_urlencode,

29

compat_urllib_parse_urlparse,

30

compat_urlparse,

31

)

32

from ..jsinterp import JSInterpreter

33

from ..utils import (

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

format_field,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

NO_DEFAULT,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

def get_first(obj, keys, **kwargs):

76

return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)

77

78

79

# any clients starting with _ cannot be explicity requested by the user

80

INNERTUBE_CLIENTS = {

81

'web': {

82

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

83

'INNERTUBE_CONTEXT': {

84

'client': {

85

'clientName': 'WEB',

86

'clientVersion': '2.20211221.00.00',

87

}

88

},

89

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

90

},

91

'web_embedded': {

92

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

93

'INNERTUBE_CONTEXT': {

94

'client': {

95

'clientName': 'WEB_EMBEDDED_PLAYER',

96

'clientVersion': '1.20211215.00.01',

97

},

98

},

99

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

100

},

101

'web_music': {

102

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

103

'INNERTUBE_HOST': 'music.youtube.com',

104

'INNERTUBE_CONTEXT': {

105

'client': {

106

'clientName': 'WEB_REMIX',

107

'clientVersion': '1.20211213.00.00',

108

}

109

},

110

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

111

},

112

'web_creator': {

113

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

114

'INNERTUBE_CONTEXT': {

115

'client': {

116

'clientName': 'WEB_CREATOR',

117

'clientVersion': '1.20211220.02.00',

118

}

119

},

120

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

121

},

122

'android': {

123

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

124

'INNERTUBE_CONTEXT': {

125

'client': {

126

'clientName': 'ANDROID',

127

'clientVersion': '16.49',

128

}

129

},

130

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

131

'REQUIRE_JS_PLAYER': False

132

},

133

'android_embedded': {

134

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

135

'INNERTUBE_CONTEXT': {

136

'client': {

137

'clientName': 'ANDROID_EMBEDDED_PLAYER',

138

'clientVersion': '16.49',

139

},

140

},

141

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

142

'REQUIRE_JS_PLAYER': False

143

},

144

'android_music': {

145

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

146

'INNERTUBE_CONTEXT': {

147

'client': {

148

'clientName': 'ANDROID_MUSIC',

149

'clientVersion': '4.57',

150

}

151

},

152

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

153

'REQUIRE_JS_PLAYER': False

154

},

155

'android_creator': {

156

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

157

'INNERTUBE_CONTEXT': {

158

'client': {

159

'clientName': 'ANDROID_CREATOR',

160

'clientVersion': '21.47',

161

},

162

},

163

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

164

'REQUIRE_JS_PLAYER': False

165

},

166

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

167

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

168

'ios': {

169

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

170

'INNERTUBE_CONTEXT': {

171

'client': {

172

'clientName': 'IOS',

173

'clientVersion': '16.46',

174

'deviceModel': 'iPhone14,3',

175

}

176

},

177

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

178

'REQUIRE_JS_PLAYER': False

179

},

180

'ios_embedded': {

181

'INNERTUBE_CONTEXT': {

182

'client': {

183

'clientName': 'IOS_MESSAGES_EXTENSION',

184

'clientVersion': '16.46',

185

'deviceModel': 'iPhone14,3',

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_music': {

192

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

193

'INNERTUBE_CONTEXT': {

194

'client': {

195

'clientName': 'IOS_MUSIC',

196

'clientVersion': '4.57',

197

},

198

},

199

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

200

'REQUIRE_JS_PLAYER': False

201

},

202

'ios_creator': {

203

'INNERTUBE_CONTEXT': {

204

'client': {

205

'clientName': 'IOS_CREATOR',

206

'clientVersion': '21.47',

207

},

208

},

209

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

210

'REQUIRE_JS_PLAYER': False

211

},

212

# mweb has 'ultralow' formats

213

# See: https://github.com/yt-dlp/yt-dlp/pull/557

214

'mweb': {

215

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

216

'INNERTUBE_CONTEXT': {

217

'client': {

218

'clientName': 'MWEB',

219

'clientVersion': '2.20211221.01.00',

220

}

221

},

222

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

}

}

def build_innertube_clients():

228

THIRD_PARTY = {

229

'embedUrl': 'https://google.com', # Can be any valid URL

230

}

231

BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')

232

priority = qualities(BASE_CLIENTS[::-1])

233

234

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

235

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

236

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

237

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

238

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

239

240

base_client, *variant = client.split('_')

241

ytcfg['priority'] = 10 * priority(base_client)

242

243

if not variant:

244

INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)

245

agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

246

agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

247

agegate_ytcfg['priority'] -= 1

248

elif variant == ['embedded']:

249

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

250

ytcfg['priority'] -= 2

251

else:

252

ytcfg['priority'] -= 3

253

254

255

build_innertube_clients()

256

257

258

class YoutubeBaseInfoExtractor(InfoExtractor):

259

"""Provide base functions for Youtube extractors"""

260

261

_RESERVED_NAMES = (

262

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

268

269

_NETRC_MACHINE = 'youtube'

270

271

# If True it will raise an error if no login info is provided

272

_LOGIN_REQUIRED = False

273

274

_INVIDIOUS_SITES = (

275

# invidious-redirect websites

276

r'(?:www\.)?redirect\.invidious\.io',

277

r'(?:(?:www|dev)\.)?invidio\.us',

278

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

279

r'(?:www\.)?invidious\.pussthecat\.org',

280

r'(?:www\.)?invidious\.zee\.li',

281

r'(?:www\.)?invidious\.ethibox\.fr',

282

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

283

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

284

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

285

# youtube-dl invidious instances list

286

r'(?:(?:www|no)\.)?invidiou\.sh',

287

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

288

r'(?:www\.)?invidious\.kabi\.tk',

289

r'(?:www\.)?invidious\.mastodon\.host',

290

r'(?:www\.)?invidious\.zapashcanon\.fr',

291

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

292

r'(?:www\.)?invidious\.tinfoil-hat\.net',

293

r'(?:www\.)?invidious\.himiko\.cloud',

294

r'(?:www\.)?invidious\.reallyancient\.tech',

295

r'(?:www\.)?invidious\.tube',

296

r'(?:www\.)?invidiou\.site',

297

r'(?:www\.)?invidious\.site',

298

r'(?:www\.)?invidious\.xyz',

299

r'(?:www\.)?invidious\.nixnet\.xyz',

300

r'(?:www\.)?invidious\.048596\.xyz',

301

r'(?:www\.)?invidious\.drycat\.fr',

302

r'(?:www\.)?inv\.skyn3t\.in',

303

r'(?:www\.)?tube\.poal\.co',

304

r'(?:www\.)?tube\.connect\.cafe',

305

r'(?:www\.)?vid\.wxzm\.sx',

306

r'(?:www\.)?vid\.mint\.lgbt',

307

r'(?:www\.)?vid\.puffyan\.us',

308

r'(?:www\.)?yewtu\.be',

309

r'(?:www\.)?yt\.elukerio\.org',

310

r'(?:www\.)?yt\.lelux\.fi',

311

r'(?:www\.)?invidious\.ggc-project\.de',

312

r'(?:www\.)?yt\.maisputain\.ovh',

313

r'(?:www\.)?ytprivate\.com',

314

r'(?:www\.)?invidious\.13ad\.de',

315

r'(?:www\.)?invidious\.toot\.koeln',

316

r'(?:www\.)?invidious\.fdn\.fr',

317

r'(?:www\.)?watch\.nettohikari\.com',

318

r'(?:www\.)?invidious\.namazso\.eu',

319

r'(?:www\.)?invidious\.silkky\.cloud',

320

r'(?:www\.)?invidious\.exonip\.de',

321

r'(?:www\.)?invidious\.riverside\.rocks',

322

r'(?:www\.)?invidious\.blamefran\.net',

323

r'(?:www\.)?invidious\.moomoo\.de',

324

r'(?:www\.)?ytb\.trom\.tf',

325

r'(?:www\.)?yt\.cyberhost\.uk',

326

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

327

r'(?:www\.)?qklhadlycap4cnod\.onion',

328

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

329

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

330

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

331

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

332

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

333

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

334

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

335

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

336

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

337

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

)

def _login(self):

"""

Attempt to log in to YouTube.

343

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

344

"""

345

346

if (self._LOGIN_REQUIRED

347

and self.get_param('cookiefile') is None

348

and self.get_param('cookiesfrombrowser') is None):

349

self.raise_login_required(

350

'Login details are needed to download this content', method='cookies')

351

username, password = self._get_login_info()

352

if username:

353

self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')

354

355

def _initialize_consent(self):

356

cookies = self._get_cookies('https://www.youtube.com/')

357

if cookies.get('__Secure-3PSID'):

358

return

359

consent_id = None

360

consent = cookies.get('CONSENT')

361

if consent:

362

if 'YES' in consent.value:

363

return

364

consent_id = self._search_regex(

365

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

366

if not consent_id:

367

consent_id = random.randint(100, 999)

368

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

369

370

def _initialize_pref(self):

371

cookies = self._get_cookies('https://www.youtube.com/')

372

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

377

except ValueError:

378

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

379

pref.update({'hl': 'en', 'tz': 'UTC'})

380

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

381

382

def _real_initialize(self):

383

self._initialize_pref()

384

self._initialize_consent()

385

self._login()

386

387

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

388

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

389

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

390

391

def _get_default_ytcfg(self, client='web'):

392

return copy.deepcopy(INNERTUBE_CLIENTS[client])

393

394

def _get_innertube_host(self, client='web'):

395

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

396

397

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

398

# try_get but with fallback to default ytcfg client values when present

399

_func = lambda y: try_get(y, getter, expected_type)

400

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

401

402

def _extract_client_name(self, ytcfg, default_client='web'):

403

return self._ytcfg_get_safe(

404

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

405

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

406

407

def _extract_client_version(self, ytcfg, default_client='web'):

408

return self._ytcfg_get_safe(

409

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

410

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

411

412

def _extract_api_key(self, ytcfg=None, default_client='web'):

413

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

414

415

def _extract_context(self, ytcfg=None, default_client='web'):

416

context = get_first(

417

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

418

# Enforce language and tz for extraction

419

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

420

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

426

time_now = round(time.time())

427

if self._SAPISID is None:

428

yt_cookies = self._get_cookies('https://www.youtube.com')

429

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

430

# See: https://github.com/yt-dlp/yt-dlp/issues/393

431

sapisid_cookie = dict_get(

432

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

433

if sapisid_cookie and sapisid_cookie.value:

434

self._SAPISID = sapisid_cookie.value

435

self.write_debug('Extracted SAPISID cookie')

436

# SAPISID cookie is required if not already present

437

if not yt_cookies.get('SAPISID'):

438

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

439

self._set_cookie(

440

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

441

else:

442

self._SAPISID = False

443

if not self._SAPISID:

444

return None

445

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

446

sapisidhash = hashlib.sha1(

447

f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()

448

return f'SAPISIDHASH {time_now}_{sapisidhash}'

449

450

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

451

note='Downloading API JSON', errnote='Unable to download API page',

452

context=None, api_key=None, api_hostname=None, default_client='web'):

453

454

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

455

data.update(query)

456

real_headers = self.generate_api_headers(default_client=default_client)

457

real_headers.update({'content-type': 'application/json'})

458

if headers:

459

real_headers.update(headers)

460

return self._download_json(

461

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

462

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

463

data=json.dumps(data).encode('utf8'), headers=real_headers,

464

query={'key': api_key or self._extract_api_key()})

465

466

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

467

data = self._search_regex(

468

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

469

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

470

if data:

471

return self._parse_json(data, item_id, fatal=fatal)

472

473

@staticmethod

474

def _extract_session_index(*data):

475

"""

476

Index of current account in account list.

477

See: https://github.com/yt-dlp/yt-dlp/pull/519

478

"""

479

for ytcfg in data:

480

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

481

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

486

if ytcfg:

487

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

492

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

493

'identity token', default=None, fatal=False)

494

495

@staticmethod

496

def _extract_account_syncid(*args):

497

"""

498

Extract syncId required to download private playlists of secondary channels

499

@params response and/or ytcfg

500

"""

501

for data in args:

502

# ytcfg includes channel_syncid if on secondary channel

503

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

508

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

509

if len(sync_ids) >= 2 and sync_ids[1]:

510

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

511

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

516

"""

517

Extracts visitorData from an API response or ytcfg

518

Appears to be used to track session state

519

"""

520

return get_first(

521

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

expected_type=str)

@property

def is_authenticated(self):

526

return bool(self._generate_sapisidhash_header())

527

528

def extract_ytcfg(self, video_id, webpage):

529

if not webpage:

530

return {}

531

return self._parse_json(

532

self._search_regex(

533

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

534

default='{}'), video_id, fatal=False) or {}

535

536

def generate_api_headers(

537

self, *, ytcfg=None, account_syncid=None, session_index=None,

538

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

539

540

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

541

headers = {

542

'X-YouTube-Client-Name': compat_str(

543

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

544

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

545

'Origin': origin,

546

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

547

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

548

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

549

}

550

if session_index is None:

551

session_index = self._extract_session_index(ytcfg)

552

if account_syncid or session_index is not None:

553

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

554

555

auth = self._generate_sapisidhash_header(origin)

556

if auth is not None:

557

headers['Authorization'] = auth

558

headers['X-Origin'] = origin

559

return {h: v for h, v in headers.items() if v is not None}

560

561

@staticmethod

562

def _build_api_continuation_query(continuation, ctp=None):

563

query = {

564

'continuation': continuation

565

}

566

# TODO: Inconsistency with clickTrackingParams.

567

# Currently we have a fixed ctp contained within context (from ytcfg)

568

# and a ctp in root query for continuation.

569

if ctp:

570

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

575

next_continuation = try_get(

576

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

577

lambda x: x['continuation']['reloadContinuationData']), dict)

578

if not next_continuation:

579

return

580

continuation = next_continuation.get('continuation')

581

if not continuation:

582

return

583

ctp = next_continuation.get('clickTrackingParams')

584

return cls._build_api_continuation_query(continuation, ctp)

585

586

@classmethod

587

def _extract_continuation_ep_data(cls, continuation_ep: dict):

588

if isinstance(continuation_ep, dict):

589

continuation = try_get(

590

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

591

if not continuation:

592

return

593

ctp = continuation_ep.get('clickTrackingParams')

594

return cls._build_api_continuation_query(continuation, ctp)

595

596

@classmethod

597

def _extract_continuation(cls, renderer):

598

next_continuation = cls._extract_next_continuation_data(renderer)

599

if next_continuation:

600

return next_continuation

601

602

contents = []

603

for key in ('contents', 'items'):

604

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

605

606

for content in contents:

607

if not isinstance(content, dict):

608

continue

609

continuation_ep = try_get(

610

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

611

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

612

dict)

613

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

619

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

620

if not isinstance(alert_dict, dict):

621

continue

622

for alert in alert_dict.values():

623

alert_type = alert.get('type')

624

if not alert_type:

625

continue

626

message = cls._get_text(alert, 'text')

627

if message:

628

yield alert_type, message

629

630

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

631

errors = []

632

warnings = []

633

for alert_type, alert_message in alerts:

634

if alert_type.lower() == 'error' and fatal:

635

errors.append([alert_type, alert_message])

636

else:

637

warnings.append([alert_type, alert_message])

638

639

for alert_type, alert_message in (warnings + errors[:-1]):

640

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)

641

if errors:

642

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

643

644

def _extract_and_report_alerts(self, data, *args, **kwargs):

645

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

646

647

def _extract_badges(self, renderer: dict):

648

badges = set()

649

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

650

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

651

if label:

652

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

657

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

662

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

663

obj = [obj]

664

for item in obj:

665

text = try_get(item, lambda x: x['simpleText'], compat_str)

666

if text:

667

return text

668

runs = try_get(item, lambda x: x['runs'], list) or []

669

if not runs and isinstance(item, list):

670

runs = item

671

672

runs = runs[:min(len(runs), max_runs or len(runs))]

673

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

678

count_text = self._get_text(data, *path_list) or ''

679

count = parse_count(count_text)

680

if count is None:

681

count = str_to_int(

682

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

687

"""

688

Extract thumbnails from thumbnails dict

689

@param path_list: path list to level that contains 'thumbnails' key

690

"""

691

thumbnails = []

692

for path in path_list or [()]:

693

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

694

thumbnail_url = url_or_none(thumbnail.get('url'))

695

if not thumbnail_url:

696

continue

697

# Sometimes youtube gives a wrong thumbnail URL. See:

698

# https://github.com/yt-dlp/yt-dlp/issues/233

699

# https://github.com/ytdl-org/youtube-dl/issues/28023

700

if 'maxresdefault' in thumbnail_url:

701

thumbnail_url = thumbnail_url.split('?')[0]

702

thumbnails.append({

703

'url': thumbnail_url,

704

'height': int_or_none(thumbnail.get('height')),

705

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

711

"""

712

Extracts a relative time from string and converts to dt object

713

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

718

if start:

719

return datetime_from_str(start)

720

try:

721

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

726

text = self._get_text(renderer, *path_list) or ''

727

dt = self.extract_relative_time(text)

728

timestamp = None

729

if isinstance(dt, datetime.datetime):

730

timestamp = calendar.timegm(dt.timetuple())

731

732

if timestamp is None:

733

timestamp = (

734

unified_timestamp(text) or unified_timestamp(

735

self._search_regex(

736

(r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'),

737

text.lower(), 'time text', default=None)))

738

739

if text and timestamp is None:

740

self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)

741

return timestamp, text

742

743

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

744

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

745

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

750

if check_get_keys is None:

751

check_get_keys = []

752

while count < retries:

753

count += 1

754

if last_error:

755

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

756

try:

757

response = self._call_api(

758

ep=ep, fatal=True, headers=headers,

759

video_id=item_id, query=query,

760

context=self._extract_context(ytcfg, default_client),

761

api_key=self._extract_api_key(ytcfg, default_client),

762

api_hostname=api_hostname, default_client=default_client,

763

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

764

except ExtractorError as e:

765

if isinstance(e.cause, network_exceptions):

766

if isinstance(e.cause, compat_HTTPError):

767

first_bytes = e.cause.read(512)

768

if not is_html(first_bytes):

769

yt_error = try_get(

770

self._parse_json(

771

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

772

lambda x: x['error']['message'], compat_str)

773

if yt_error:

774

self._report_alerts([('ERROR', yt_error)], fatal=False)

775

# Downloading page may result in intermittent 5xx HTTP error

776

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

777

# We also want to catch all other network exceptions since errors in later pages can be troublesome

778

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

779

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

780

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

792

except ExtractorError as e:

793

# YouTube servers may return errors we want to retry on in a 200 OK response

794

# See: https://github.com/yt-dlp/yt-dlp/issues/839

795

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

801

return

802

if not check_get_keys or dict_get(response, check_get_keys):

803

break

804

# Youtube sometimes sends incomplete data

805

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

806

last_error = 'Incomplete data received'

807

if count >= retries:

808

if fatal:

809

raise ExtractorError(last_error)

810

else:

811

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

817

return re.match(r'https?://music\.youtube\.com/', url) is not None

818

819

def _extract_video(self, renderer):

820

video_id = renderer.get('videoId')

821

title = self._get_text(renderer, 'title')

822

description = self._get_text(renderer, 'descriptionSnippet')

823

duration = parse_duration(self._get_text(

824

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

825

view_count = self._get_count(renderer, 'viewCountText')

826

827

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

828

channel_id = traverse_obj(

829

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)

830

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

831

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

832

overlay_style = traverse_obj(

833

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)

834

badges = self._extract_badges(renderer)

835

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

840

'id': video_id,

841

'url': f'https://www.youtube.com/watch?v={video_id}',

842

'title': title,

843

'description': description,

844

'duration': duration,

845

'view_count': view_count,

846

'uploader': uploader,

847

'channel_id': channel_id,

848

'thumbnails': thumbnails,

849

'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None,

850

'live_status': ('is_upcoming' if scheduled_timestamp is not None

851

else 'was_live' if 'streamed' in time_text.lower()

852

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

853

else None),

854

'release_timestamp': scheduled_timestamp,

855

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

860

IE_DESC = 'YouTube'

861

_VALID_URL = r"""(?x)^

862

(

863

(?:https?://|//) # http(s):// or protocol-independent URL

864

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

865

(?:www\.)?deturl\.com/www\.youtube\.com|

866

(?:www\.)?pwnyoutube\.com|

867

(?:www\.)?hooktube\.com|

868

(?:www\.)?yourepeat\.com|

869

tube\.majestyc\.net|

870

%(invidious)s|

871

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

872

(?:.*?\#/)? # handle anchor (#/) redirect urls

873

(?: # the various things that can precede the ID:

874

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

875

|(?: # or the v= param in all its forms

876

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

877

(?:\?|\#!?) # the params delimiter ? or # or #!

878

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

884

vid\.plus| # or vid.plus/xxxx

885

zwearz\.com/watch| # or zwearz.com/watch/xxxx

886

%(invidious)s

887

)/

888

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

889

)

890

)? # all until now is optional -> you can pass the naked ID

891

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

892

(?(1).+)? # if we found the ID, everything can follow

893

(?:\#|$)""" % {

894

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

895

}

896

_PLAYER_INFO_RE = (

897

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

898

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

899

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

900

)

901

_formats = {

902

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

903

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

904

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

905

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

906

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

907

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

908

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

909

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

910

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

911

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

912

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

913

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

914

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

915

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

916

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

917

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

918

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

919

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

924

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

925

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

926

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

927

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

928

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

929

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

930

931

# Apple HTTP Live Streaming

932

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

933

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

934

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

935

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

936

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

937

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

938

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

939

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

940

941

# DASH mp4 video

942

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

943

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

944

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

945

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

946

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

947

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

948

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

949

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

950

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

951

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

952

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

953

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

954

955

# Dash mp4 audio

956

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

957

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

958

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

959

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

960

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

961

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

962

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

963

964

# Dash webm

965

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

966

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

967

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

968

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

969

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

970

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

971

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

972

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

973

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

974

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

975

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

976

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

977

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

978

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

979

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

980

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

981

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

982

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

983

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

984

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

985

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

986

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

987

988

# Dash webm audio

989

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

990

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

991

992

# Dash webm audio with opus inside

993

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

994

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

995

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

996

997

# RTMP (unnamed)

998

'_rtmp': {'protocol': 'rtmp'},

999

1000

# av01 video only formats sometimes served with "unknown" codecs

1001

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1002

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1003

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1004

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1005

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1006

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1007

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1008

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1009

}

1010

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1022

'uploader': 'Philipp Hagemeister',

1023

'uploader_id': 'phihag',

1024

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1025

'channel': 'Philipp Hagemeister',

1026

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1027

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1028

'upload_date': '20121002',

1029

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1030

'categories': ['Science & Technology'],

1031

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1036

'playable_in_embed': True,

1037

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1038

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1047

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1052

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1053

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1054

'uploader': 'SET India',

1055

'uploader_id': 'setindia',

1056

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1057

'age_limit': 18,

1058

},

1059

'skip': 'Private video',

1060

},

1061

{

1062

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1063

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1068

'uploader': 'Philipp Hagemeister',

1069

'uploader_id': 'phihag',

1070

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1071

'channel': 'Philipp Hagemeister',

1072

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1073

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1074

'upload_date': '20121002',

1075

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1076

'categories': ['Science & Technology'],

1077

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1082

'playable_in_embed': True,

1083

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1084

'live_status': 'not_live',

1085

'age_limit': 0,

1086

'channel_follower_count': int

1087

},

1088

'params': {

1089

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1094

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1099

'uploader_id': '8KVIDEO',

1100

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1101

'description': '',

1102

'uploader': '8KVIDEO',

1103

'title': 'UHDTV TEST 8K VIDEO.mp4'

1104

},

1105

'params': {

1106

'youtube_include_dash_manifest': True,

1107

'format': '141',

1108

},

1109

'skip': 'format 141 not served anymore',

1110

},

1111

# DASH manifest with encrypted signature

1112

{

1113

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1118

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1119

'duration': 244,

1120

'uploader': 'AfrojackVEVO',

1121

'uploader_id': 'AfrojackVEVO',

1122

'upload_date': '20131011',

1123

'abr': 129.495,

1124

'like_count': int,

1125

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1126

'playable_in_embed': True,

1127

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1128

'view_count': int,

1129

'track': 'The Spark',

1130

'live_status': 'not_live',

1131

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1132

'channel': 'Afrojack',

1133

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1134

'tags': 'count:19',

1135

'availability': 'public',

1136

'categories': ['Music'],

1137

'age_limit': 0,

1138

'alt_title': 'The Spark',

1139

'channel_follower_count': int

1140

},

1141

'params': {

1142

'youtube_include_dash_manifest': True,

1143

'format': '141/bestaudio[ext=m4a]',

1144

},

1145

},

1146

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1147

{

1148

'note': 'Embed allowed age-gate video',

1149

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1154

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1155

'duration': 142,

1156

'uploader': 'The Witcher',

1157

'uploader_id': 'WitcherGame',

1158

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1159

'upload_date': '20140605',

1160

'age_limit': 18,

1161

'categories': ['Gaming'],

1162

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1163

'availability': 'needs_auth',

1164

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1165

'like_count': int,

1166

'channel': 'The Witcher',

1167

'live_status': 'not_live',

1168

'tags': 'count:17',

1169

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1170

'playable_in_embed': True,

1171

'view_count': int,

1172

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1177

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1182

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1183

'upload_date': '20200408',

1184

'uploader_id': 'FlyingKitty900',

1185

'uploader': 'FlyingKitty',

1186

'age_limit': 18,

1187

'availability': 'needs_auth',

1188

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1189

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1190

'channel': 'FlyingKitty',

1191

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1192

'view_count': int,

1193

'categories': ['Entertainment'],

1194

'live_status': 'not_live',

1195

'tags': ['Flyingkitty', 'godzilla 2'],

1196

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1197

'like_count': int,

1198

'duration': 177,

1199

'playable_in_embed': True,

1200

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1205

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1206

'info_dict': {

1207

'id': 'Tq92D6wQ1mg',

1208

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1209

'ext': 'mp4',

1210

'upload_date': '20191227',

1211

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1212

'uploader': 'Projekt Melody',

1213

'description': 'md5:17eccca93a786d51bc67646756894066',

1214

'age_limit': 18,

1215

'like_count': int,

1216

'availability': 'needs_auth',

1217

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1218

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1219

'view_count': int,

1220

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1221

'channel': 'Projekt Melody',

1222

'live_status': 'not_live',

1223

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1224

'playable_in_embed': True,

1225

'categories': ['Entertainment'],

1226

'duration': 106,

1227

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1228

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1233

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1238

'uploader': 'Herr Lurik',

1239

'uploader_id': 'st3in234',

1240

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1241

'upload_date': '20130730',

1242

'track': 'Such mich find mich',

1243

'age_limit': 0,

1244

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1245

'like_count': int,

1246

'playable_in_embed': False,

1247

'creator': 'OOMPH!',

1248

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1249

'view_count': int,

1250

'alt_title': 'Such mich find mich',

1251

'duration': 210,

1252

'channel': 'Herr Lurik',

1253

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1254

'categories': ['Music'],

1255

'availability': 'public',

1256

'uploader_url': 'http://www.youtube.com/user/st3in234',

1257

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1258

'live_status': 'not_live',

1259

'artist': 'OOMPH!',

1260

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1265

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1266

'only_matching': True,

1267

},

1268

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1269

# YouTube Red ad is not captured for creator

1270

{

1271

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1277

'uploader_id': 'deadmau5',

1278

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1279

'creator': 'deadmau5',

1280

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1281

'uploader': 'deadmau5',

1282

'title': 'Deadmau5 - Some Chords (HD)',

1283

'alt_title': 'Some Chords',

1284

'availability': 'public',

1285

'tags': 'count:14',

1286

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1287

'view_count': int,

1288

'live_status': 'not_live',

1289

'channel': 'deadmau5',

1290

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1291

'like_count': int,

1292

'track': 'Some Chords',

1293

'artist': 'deadmau5',

1294

'playable_in_embed': True,

1295

'age_limit': 0,

1296

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1297

'categories': ['Music'],

1298

'album': 'Some Chords',

1299

'channel_follower_count': int

1300

},

1301

'expected_warnings': [

1302

'DASH manifest missing',

1303

]

1304

},

1305

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1306

{

1307

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1313

'uploader_id': 'olympic',

1314

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1315

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1316

'uploader': 'Olympics',

1317

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1318

'like_count': int,

1319

'release_timestamp': 1343767800,

1320

'playable_in_embed': True,

1321

'categories': ['Sports'],

1322

'release_date': '20120731',

1323

'channel': 'Olympics',

1324

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1325

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1326

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1327

'age_limit': 0,

1328

'availability': 'public',

1329

'live_status': 'was_live',

1330

'view_count': int,

1331

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1332

'channel_follower_count': int

1333

},

1334

'params': {

1335

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1345

'duration': 85,

1346

'upload_date': '20110310',

1347

'uploader_id': 'AllenMeow',

1348

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1349

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1350

'uploader': '孫ᄋᄅ',

1351

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1352

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1357

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1358

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1359

'view_count': int,

1360

'categories': ['People & Blogs'],

1361

'like_count': int,

1362

'live_status': 'not_live',

1363

'availability': 'unlisted',

1364

'channel_follower_count': int

1365

},

1366

},

1367

# url_encoded_fmt_stream_map is empty string

1368

{

1369

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1374

'description': '',

1375

'upload_date': '20150404',

1376

'uploader_id': 'spbelect',

1377

'uploader': 'Наблюдатели Петербурга',

1378

},

1379

'params': {

1380

'skip_download': 'requires avconv',

1381

},

1382

'skip': 'This live event has ended.',

1383

},

1384

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1385

{

1386

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1391

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1392

'duration': 220,

1393

'upload_date': '20150625',

1394

'uploader_id': 'dorappi2000',

1395

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1396

'uploader': 'dorappi2000',

1397

'formats': 'mincount:31',

1398

},

1399

'skip': 'not actual anymore',

1400

},

1401

# DASH manifest with segment_list

1402

{

1403

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1404

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1409

'uploader': 'Airtek',

1410

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1411

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1412

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1413

},

1414

'params': {

1415

'youtube_include_dash_manifest': True,

1416

'format': '135', # bestvideo

1417

},

1418

'skip': 'This live event has ended.',

1419

},

1420

{

1421

# Multifeed videos (multiple cameras), URL is for Main Camera

1422

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1423

'info_dict': {

1424

'id': 'jvGDaLqkpTg',

1425

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1426

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1433

'description': 'md5:e03b909557865076822aa169218d6a5d',

1434

'duration': 10643,

1435

'upload_date': '20161111',

1436

'uploader': 'Team PGP',

1437

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1438

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1445

'description': 'md5:e03b909557865076822aa169218d6a5d',

1446

'duration': 10991,

1447

'upload_date': '20161111',

1448

'uploader': 'Team PGP',

1449

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1450

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1457

'description': 'md5:e03b909557865076822aa169218d6a5d',

1458

'duration': 10995,

1459

'upload_date': '20161111',

1460

'uploader': 'Team PGP',

1461

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1462

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1469

'description': 'md5:e03b909557865076822aa169218d6a5d',

1470

'duration': 10990,

1471

'upload_date': '20161111',

1472

'uploader': 'Team PGP',

1473

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1474

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1479

},

1480

'skip': 'Not multifeed anymore',

1481

},

1482

{

1483

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1484

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1485

'info_dict': {

1486

'id': 'gVfLd0zydlo',

1487

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1488

},

1489

'playlist_count': 2,

1490

'skip': 'Not multifeed anymore',

1491

},

1492

{

1493

'url': 'https://vid.plus/FlRa-iH7PGw',

1494

'only_matching': True,

1495

},

1496

{

1497

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1498

'only_matching': True,

1499

},

1500

{

1501

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1502

# Also tests cut-off URL expansion in video description (see

1503

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1504

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1505

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1510

'alt_title': 'Dark Walk',

1511

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1512

'duration': 133,

1513

'upload_date': '20151119',

1514

'uploader_id': 'IronSoulElf',

1515

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1516

'uploader': 'IronSoulElf',

1517

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1518

'track': 'Dark Walk',

1519

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1520

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1521

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1522

'categories': ['Film & Animation'],

1523

'view_count': int,

1524

'live_status': 'not_live',

1525

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1526

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1527

'tags': 'count:13',

1528

'availability': 'public',

1529

'channel': 'IronSoulElf',

1530

'playable_in_embed': True,

1531

'like_count': int,

1532

'age_limit': 0,

1533

'channel_follower_count': int

1534

},

1535

'params': {

1536

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1541

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1542

'only_matching': True,

1543

},

1544

{

1545

# Video with yt:stretch=17:0

1546

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1551

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1552

'upload_date': '20151107',

1553

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1554

'uploader': 'CH GAMER DROID',

1555

},

1556

'params': {

1557

'skip_download': True,

1558

},

1559

'skip': 'This video does not exist.',

1560

},

1561

{

1562

# Video with incomplete 'yt:stretch=16:'

1563

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1564

'only_matching': True,

1565

},

1566

{

1567

# Video licensed under Creative Commons

1568

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1573

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1574

'duration': 721,

1575

'upload_date': '20150127',

1576

'uploader_id': 'BerkmanCenter',

1577

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1578

'uploader': 'The Berkman Klein Center for Internet & Society',

1579

'license': 'Creative Commons Attribution license (reuse allowed)',

1580

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1581

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1582

'like_count': int,

1583

'age_limit': 0,

1584

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1585

'channel': 'The Berkman Klein Center for Internet & Society',

1586

'availability': 'public',

1587

'view_count': int,

1588

'categories': ['Education'],

1589

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1590

'live_status': 'not_live',

1591

'playable_in_embed': True,

1592

'channel_follower_count': int

1593

},

1594

'params': {

1595

'skip_download': True,

},

},

{

# Channel-like uploader_url

1600

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1605

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1606

'duration': 4060,

1607

'upload_date': '20151119',

1608

'uploader': 'Bernie Sanders',

1609

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1610

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1611

'license': 'Creative Commons Attribution license (reuse allowed)',

1612

'playable_in_embed': True,

1613

'tags': 'count:12',

1614

'like_count': int,

1615

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1616

'age_limit': 0,

1617

'availability': 'public',

1618

'categories': ['News & Politics'],

1619

'channel': 'Bernie Sanders',

1620

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1621

'view_count': int,

1622

'live_status': 'not_live',

1623

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1624

'channel_follower_count': int

1625

},

1626

'params': {

1627

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1632

'only_matching': True,

1633

},

1634

{

1635

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1636

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1637

'only_matching': True,

1638

},

1639

{

1640

# Rental video preview

1641

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1646

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1647

'upload_date': '20150811',

1648

'uploader': 'FlixMatrix',

1649

'uploader_id': 'FlixMatrixKaravan',

1650

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1651

'license': 'Standard YouTube License',

1652

},

1653

'params': {

1654

'skip_download': True,

1655

},

1656

'skip': 'This video is not available.',

1657

},

1658

{

1659

# YouTube Red video with episode data

1660

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1665

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1666

'duration': 2085,

1667

'upload_date': '20170118',

1668

'uploader': 'Vsauce',

1669

'uploader_id': 'Vsauce',

1670

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1671

'series': 'Mind Field',

1672

'season_number': 1,

1673

'episode_number': 1,

1674

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1675

'tags': 'count:12',

1676

'view_count': int,

1677

'availability': 'public',

1678

'age_limit': 0,

1679

'channel': 'Vsauce',

1680

'episode': 'Episode 1',

1681

'categories': ['Entertainment'],

1682

'season': 'Season 1',

1683

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1684

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1685

'like_count': int,

1686

'playable_in_embed': True,

1687

'live_status': 'not_live',

1688

'channel_follower_count': int

1689

},

1690

'params': {

1691

'skip_download': True,

1692

},

1693

'expected_warnings': [

1694

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1699

# as inappropriate or offensive to some audiences.

1700

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1705

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1706

'duration': 965,

1707

'upload_date': '20140124',

1708

'uploader': 'New Century Foundation',

1709

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1710

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1711

},

1712

'params': {

1713

'skip_download': True,

1714

},

1715

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1720

'only_matching': True,

1721

},

1722

{

1723

# geo restricted to JP

1724

'url': 'sJL6WA-aGkQ',

1725

'only_matching': True,

1726

},

1727

{

1728

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1729

'only_matching': True,

1730

},

1731

{

1732

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1733

'only_matching': True,

1734

},

1735

{

1736

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1737

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1738

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1743

'only_matching': True,

1744

},

1745

{

1746

# Video with unsupported adaptive stream type formats

1747

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1752

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1753

'duration': 433,

1754

'upload_date': '20130923',

1755

'uploader': 'Amelia Putri Harwita',

1756

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1757

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1758

'formats': 'maxcount:10',

1759

},

1760

'params': {

1761

'skip_download': True,

1762

'youtube_include_dash_manifest': False,

1763

},

1764

'skip': 'not actual anymore',

1765

},

1766

{

1767

# Youtube Music Auto-generated description

1768

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1773

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1774

'upload_date': '20190312',

1775

'uploader': 'Stephen - Topic',

1776

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1777

'artist': 'Stephen',

1778

'track': 'Voyeur Girl',

1779

'album': 'it\'s too much love to know my dear',

1780

'release_date': '20190313',

1781

'release_year': 2019,

1782

'alt_title': 'Voyeur Girl',

1783

'view_count': int,

1784

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1785

'playable_in_embed': True,

1786

'like_count': int,

1787

'categories': ['Music'],

1788

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1789

'channel': 'Stephen',

1790

'availability': 'public',

1791

'creator': 'Stephen',

1792

'duration': 169,

1793

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1794

'age_limit': 0,

1795

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1796

'tags': 'count:11',

1797

'live_status': 'not_live',

1798

'channel_follower_count': int

1799

},

1800

'params': {

1801

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1806

'only_matching': True,

1807

},

1808

{

1809

# invalid -> valid video id redirection

1810

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1815

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1816

'upload_date': '20090125',

1817

'uploader': 'Prochorowka',

1818

'uploader_id': 'Prochorowka',

1819

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1820

'artist': 'Panjabi MC',

1821

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1822

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1823

},

1824

'params': {

1825

'skip_download': True,

1826

},

1827

'skip': 'Video unavailable',

1828

},

1829

{

1830

# empty description results in an empty string

1831

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1838

'uploader_id': 'ElevageOrVert',

1839

'uploader': 'ElevageOrVert',

1840

'view_count': int,

1841

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1842

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1843

'like_count': int,

1844

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1845

'tags': [],

1846

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1847

'availability': 'public',

1848

'age_limit': 0,

1849

'categories': ['Pets & Animals'],

1850

'duration': 7,

1851

'playable_in_embed': True,

1852

'live_status': 'not_live',

1853

'channel': 'ElevageOrVert',

1854

'channel_follower_count': int

1855

},

1856

'params': {

1857

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1862

# see [2] for an example with '};' inside ytInitialPlayerResponse

1863

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1864

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1865

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1870

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1871

'upload_date': '20130831',

1872

'uploader_id': 'kudvenkat',

1873

'uploader': 'kudvenkat',

1874

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1875

'like_count': int,

1876

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1877

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1878

'live_status': 'not_live',

1879

'categories': ['Education'],

1880

'availability': 'public',

1881

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1882

'tags': 'count:12',

1883

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1888

'channel_follower_count': int

1889

},

1890

'params': {

1891

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1896

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1897

'only_matching': True,

1898

},

1899

{

1900

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1901

'only_matching': True,

1902

},

1903

{

1904

# https://github.com/ytdl-org/youtube-dl/pull/28094

1905

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1911

'upload_date': '20141120',

1912

'uploader': 'The Cinematic Orchestra - Topic',

1913

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1914

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1915

'artist': 'The Cinematic Orchestra',

1916

'track': 'Burn Out',

1917

'album': 'Every Day',

1918

'like_count': int,

1919

'live_status': 'not_live',

1920

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1925

'creator': 'The Cinematic Orchestra',

1926

'channel': 'The Cinematic Orchestra',

1927

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1928

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1929

'availability': 'public',

1930

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1931

'categories': ['Music'],

1932

'playable_in_embed': True,

1933

'channel_follower_count': int

1934

},

1935

'params': {

1936

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1941

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1942

'only_matching': True,

1943

},

1944

{

1945

# controversial video, requires bpctr/contentCheckOk

1946

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1951

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1952

'uploader': 'CBS Mornings',

1953

'uploader_id': 'CBSThisMorning',

1954

'upload_date': '20140716',

1955

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1956

'duration': 170,

1957

'categories': ['News & Politics'],

1958

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1959

'view_count': int,

1960

'channel': 'CBS Mornings',

1961

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1962

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1963

'age_limit': 18,

1964

'availability': 'needs_auth',

1965

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1966

'like_count': int,

1967

'live_status': 'not_live',

1968

'playable_in_embed': True,

1969

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1974

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1979

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1980

'upload_date': '20201120',

1981

'uploader': 'Walk around Japan',

1982

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1983

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1984

'duration': 1456,

1985

'categories': ['Travel & Events'],

1986

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1987

'view_count': int,

1988

'channel': 'Walk around Japan',

1989

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

1990

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

1991

'age_limit': 0,

1992

'availability': 'public',

1993

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1994

'live_status': 'not_live',

1995

'playable_in_embed': True,

1996

'channel_follower_count': int

1997

},

1998

'params': {

1999

'skip_download': True,

2000

},

2001

}, {

2002

# Has multiple audio streams

2003

'url': 'WaOKSUlf4TM',

2004

'only_matching': True

2005

}, {

2006

# Requires Premium: has format 141 when requested using YTM url

2007

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2008

'only_matching': True

2009

}, {

2010

# multiple subtitles with same lang_code

2011

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2012

'only_matching': True,

2013

}, {

2014

# Force use android client fallback

2015

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2016

'info_dict': {

2017

'id': 'YOelRv7fMxY',

2018

'title': 'DIGGING A SECRET TUNNEL Part 1',

2019

'ext': '3gp',

2020

'upload_date': '20210624',

2021

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2022

'uploader': 'colinfurze',

2023

'uploader_id': 'colinfurze',

2024

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2025

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2026

'duration': 596,

2027

'categories': ['Entertainment'],

2028

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2029

'view_count': int,

2030

'channel': 'colinfurze',

2031

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2032

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2033

'age_limit': 0,

2034

'availability': 'public',

2035

'like_count': int,

2036

'live_status': 'not_live',

2037

'playable_in_embed': True,

2038

'channel_follower_count': int

2039

},

2040

'params': {

2041

'format': '17', # 3gp format available on android

2042

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2047

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2048

'only_matching': True,

2049

'params': {

2050

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2055

'only_matching': True,

2056

}, {

2057

'note': 'Storyboards',

2058

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2064

'uploader_id': 'scishow',

2065

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2066

'upload_date': '20140324',

2067

'uploader': 'SciShow',

2068

'like_count': int,

2069

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2070

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2071

'view_count': int,

2072

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2073

'playable_in_embed': True,

2074

'tags': 'count:12',

2075

'uploader_url': 'http://www.youtube.com/user/scishow',

2076

'availability': 'public',

2077

'channel': 'SciShow',

2078

'live_status': 'not_live',

2079

'duration': 248,

2080

'categories': ['Education'],

2081

'age_limit': 0,

2082

'channel_follower_count': int

2083

}, 'params': {'format': 'mhtml', 'skip_download': True}

}

]

@classmethod

def suitable(cls, url):

2089

from ..utils import parse_qs

2090

2091

qs = parse_qs(url)

2092

if qs.get('list', [None])[0]:

2093

return False

2094

return super(YoutubeIE, cls).suitable(url)

2095

2096

def __init__(self, *args, **kwargs):

2097

super(YoutubeIE, self).__init__(*args, **kwargs)

2098

self._code_cache = {}

2099

self._player_cache = {}

2100

2101

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2102

lock = threading.Lock()

2103

2104

is_live = True

2105

start_time = time.time()

2106

formats = [f for f in formats if f.get('is_from_start')]

2107

2108

def refetch_manifest(format_id, delay):

2109

nonlocal formats, start_time, is_live

2110

if time.time() <= start_time + delay:

2111

return

2112

2113

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2114

video_details = traverse_obj(

2115

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2116

microformats = traverse_obj(

2117

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2118

expected_type=dict, default=[])

2119

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2120

start_time = time.time()

2121

2122

def mpd_feed(format_id, delay):

2123

"""

2124

@returns (manifest_url, manifest_stream_number, is_live) or None

2125

"""

2126

with lock:

2127

refetch_manifest(format_id, delay)

2128

2129

f = next((f for f in formats if f['format_id'] == format_id), None)

2130

if not f:

2131

if not is_live:

2132

self.to_screen(f'{video_id}: Video is no longer live')

2133

else:

2134

self.report_warning(

2135

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2136

return None

2137

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2142

f['fragments'] = functools.partial(

2143

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2144

2145

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2146

FETCH_SPAN, MAX_DURATION = 5, 432000

2147

2148

mpd_url, stream_number, is_live = None, None, True

2149

2150

begin_index = 0

2151

download_start_time = ctx.get('start') or time.time()

2152

2153

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2154

if lack_early_segments:

2155

self.report_warning(bug_reports_message(

2156

'Starting download from the last 120 hours of the live stream since '

2157

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2158

lack_early_segments = True

2159

2160

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2161

fragments, fragment_base_url = None, None

2162

2163

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2164

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2165

# Obtain from MPD's maximum seq value

2166

old_mpd_url = mpd_url

2167

last_error = ctx.pop('last_error', None)

2168

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2169

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2170

or (mpd_url, stream_number, False))

2171

if not refresh_sequence:

2172

if expire_fast and not is_live:

2173

return False, last_seq

2174

elif old_mpd_url == mpd_url:

2175

return True, last_seq

2176

try:

2177

fmts, _ = self._extract_mpd_formats_and_subtitles(

2178

mpd_url, None, note=False, errnote=False, fatal=False)

2179

except ExtractorError:

2180

fmts = None

2181

if not fmts:

2182

no_fragment_score += 2

2183

return False, last_seq

2184

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2185

fragments = fmt_info['fragments']

2186

fragment_base_url = fmt_info['fragment_base_url']

2187

assert fragment_base_url

2188

2189

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2190

return True, _last_seq

2191

2192

while is_live:

2193

fetch_time = time.time()

2194

if no_fragment_score > 30:

2195

return

2196

if last_segment_url:

2197

# Obtain from "X-Head-Seqnum" header value from each segment

2198

try:

2199

urlh = self._request_webpage(

2200

last_segment_url, None, note=False, errnote=False, fatal=False)

2201

except ExtractorError:

2202

urlh = None

2203

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2204

if last_seq is None:

2205

no_fragment_score += 2

2206

last_segment_url = None

2207

continue

2208

else:

2209

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2210

no_fragment_score += 2

2211

if not should_continue:

2212

continue

2213

2214

if known_idx > last_seq:

2215

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2221

# skip from the start when it's negative value

2222

known_idx = last_seq + begin_index

2223

if lack_early_segments:

2224

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2225

try:

2226

for idx in range(known_idx, last_seq):

2227

# do not update sequence here or you'll get skipped some part of it

2228

should_continue, _ = _extract_sequence_from_mpd(False, False)

2229

if not should_continue:

2230

known_idx = idx - 1

2231

raise ExtractorError('breaking out of outer loop')

2232

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2233

yield {

2234

'url': last_segment_url,

2235

}

2236

if known_idx == last_seq:

2237

no_fragment_score += 5

2238

else:

2239

no_fragment_score = 0

2240

known_idx = last_seq

2241

except ExtractorError:

2242

continue

2243

2244

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2245

2246

def _extract_player_url(self, *ytcfgs, webpage=None):

2247

player_url = traverse_obj(

2248

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2249

get_all=False, expected_type=compat_str)

2250

if not player_url:

2251

return

2252

return urljoin('https://www.youtube.com', player_url)

2253

2254

def _download_player_url(self, video_id, fatal=False):

2255

res = self._download_webpage(

2256

'https://www.youtube.com/iframe_api',

2257

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2258

if res:

2259

player_version = self._search_regex(

2260

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2261

if player_version:

2262

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2263

2264

def _signature_cache_id(self, example_sig):

2265

""" Return a string representation of a signature """

2266

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2267

2268

@classmethod

2269

def _extract_player_info(cls, player_url):

2270

for player_re in cls._PLAYER_INFO_RE:

2271

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2276

return id_m.group('id')

2277

2278

def _load_player(self, video_id, player_url, fatal=True):

2279

player_id = self._extract_player_info(player_url)

2280

if player_id not in self._code_cache:

2281

code = self._download_webpage(

2282

player_url, video_id, fatal=fatal,

2283

note='Downloading player ' + player_id,

2284

errnote='Download of %s failed' % player_url)

2285

if code:

2286

self._code_cache[player_id] = code

2287

return self._code_cache.get(player_id)

2288

2289

def _extract_signature_function(self, video_id, player_url, example_sig):

2290

player_id = self._extract_player_info(player_url)

2291

2292

# Read from filesystem cache

2293

func_id = 'js_%s_%s' % (

2294

player_id, self._signature_cache_id(example_sig))

2295

assert os.path.basename(func_id) == func_id

2296

2297

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2298

if cache_spec is not None:

2299

return lambda s: ''.join(s[i] for i in cache_spec)

2300

2301

code = self._load_player(video_id, player_url)

2302

if code:

2303

res = self._parse_sig_js(code)

2304

2305

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2306

cache_res = res(test_string)

2307

cache_spec = [ord(c) for c in cache_res]

2308

2309

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2310

return res

2311

2312

def _print_sig_code(self, func, example_sig):

2313

if not self.get_param('youtube_print_sig_code'):

2314

return

2315

2316

def gen_sig_code(idxs):

2317

def _genslice(start, end, step):

2318

starts = '' if start == 0 else str(start)

2319

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2320

steps = '' if step == 1 else (':%d' % step)

2321

return 's[%s%s%s]' % (starts, ends, steps)

2322

2323

step = None

2324

# Quelch pyflakes warnings - start will be set when step is set

2325

start = '(Never used)'

2326

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2331

step = None

2332

continue

2333

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2343

2344

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2345

cache_res = func(test_string)

2346

cache_spec = [ord(c) for c in cache_res]

2347

expr_code = ' + '.join(gen_sig_code(cache_spec))

2348

signature_id_tuple = '(%s)' % (

2349

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2350

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2351

' return %s\n') % (signature_id_tuple, expr_code)

2352

self.to_screen('Extracted signature function:\n' + code)

2353

2354

def _parse_sig_js(self, jscode):

2355

funcname = self._search_regex(

2356

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2357

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2358

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2359

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2360

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2361

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2362

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2363

# Obsolete patterns

2364

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2365

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2366

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2367

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2368

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2369

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2370

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2371

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2372

jscode, 'Initial JS player signature function name', group='sig')

2373

2374

jsi = JSInterpreter(jscode)

2375

initial_function = jsi.extract_function(funcname)

2376

return lambda s: initial_function([s])

2377

2378

def _decrypt_signature(self, s, video_id, player_url):

2379

"""Turn the encrypted s field into a working signature"""

2380

2381

if player_url is None:

2382

raise ExtractorError('Cannot decrypt signature without player_url')

2383

2384

try:

2385

player_id = (player_url, self._signature_cache_id(s))

2386

if player_id not in self._player_cache:

2387

func = self._extract_signature_function(

2388

video_id, player_url, s

2389

)

2390

self._player_cache[player_id] = func

2391

func = self._player_cache[player_id]

2392

self._print_sig_code(func, s)

2393

return func(s)

2394

except Exception as e:

2395

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2396

2397

def _decrypt_nsig(self, s, video_id, player_url):

2398

"""Turn the encrypted n field into a working signature"""

2399

if player_url is None:

2400

raise ExtractorError('Cannot decrypt nsig without player_url')

2401

player_url = urljoin('https://www.youtube.com', player_url)

2402

2403

sig_id = ('nsig_value', s)

2404

if sig_id in self._player_cache:

2405

return self._player_cache[sig_id]

2406

2407

try:

2408

player_id = ('nsig', player_url)

2409

if player_id not in self._player_cache:

2410

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2411

func = self._player_cache[player_id]

2412

self._player_cache[sig_id] = func(s)

2413

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2414

return self._player_cache[sig_id]

2415

except Exception as e:

2416

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2417

2418

def _extract_n_function_name(self, jscode):

2419

nfunc, idx = self._search_regex(

2420

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2421

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2422

if not idx:

2423

return nfunc

2424

return json.loads(js_to_json(self._search_regex(

2425

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2426

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2427

2428

def _extract_n_function(self, video_id, player_url):

2429

player_id = self._extract_player_info(player_url)

2430

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2431

2432

if func_code:

2433

jsi = JSInterpreter(func_code)

2434

else:

2435

jscode = self._load_player(video_id, player_url)

2436

funcname = self._extract_n_function_name(jscode)

2437

jsi = JSInterpreter(jscode)

2438

func_code = jsi.extract_function_code(funcname)

2439

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2440

2441

if self.get_param('youtube_print_sig_code'):

2442

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2443

2444

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2445

2446

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2447

"""

2448

Extract signatureTimestamp (sts)

2449

Required to tell API what sig/player version is in use.

2450

"""

2451

sts = None

2452

if isinstance(ytcfg, dict):

2453

sts = int_or_none(ytcfg.get('STS'))

2454

2455

if not sts:

2456

# Attempt to extract from player

2457

if player_url is None:

2458

error_msg = 'Cannot extract signature timestamp without player_url.'

2459

if fatal:

2460

raise ExtractorError(error_msg)

2461

self.report_warning(error_msg)

2462

return

2463

code = self._load_player(video_id, player_url, fatal=fatal)

2464

if code:

2465

sts = int_or_none(self._search_regex(

2466

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2467

'JS player signature timestamp', group='sts', fatal=fatal))

2468

return sts

2469

2470

def _mark_watched(self, video_id, player_responses):

2471

playback_url = get_first(

2472

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2473

expected_type=url_or_none)

2474

if not playback_url:

2475

self.report_warning('Unable to mark watched')

2476

return

2477

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2478

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2479

2480

# cpn generation algorithm is reverse engineered from base.js.

2481

# In fact it works even with dummy cpn.

2482

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2483

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2490

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2491

2492

self._download_webpage(

2493

playback_url, video_id, 'Marking watched',

2494

'Unable to mark watched', fatal=False)

2495

2496

@staticmethod

2497

def _extract_urls(webpage):

2498

# Embedded YouTube player

2499

entries = [

2500

unescapeHTML(mobj.group('url'))

2501

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2512

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2513

\1''', webpage)]

2514

2515

# lazyYT YouTube embed

2516

entries.extend(list(map(

2517

unescapeHTML,

2518

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2519

2520

# Wordpress "YouTube Video Importer" plugin

2521

matches = re.findall(r'''(?x)<div[^>]+

2522

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2523

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2524

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2530

urls = YoutubeIE._extract_urls(webpage)

2531

return urls[0] if urls else None

2532

2533

@classmethod

2534

def extract_id(cls, url):

2535

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2536

if mobj is None:

2537

raise ExtractorError('Invalid URL: %s' % url)

2538

return mobj.group('id')

2539

2540

def _extract_chapters_from_json(self, data, duration):

2541

chapter_list = traverse_obj(

2542

data, (

2543

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2544

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2545

), expected_type=list)

2546

2547

return self._extract_chapters(

2548

chapter_list,

2549

chapter_time=lambda chapter: float_or_none(

2550

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2551

chapter_title=lambda chapter: traverse_obj(

2552

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2553

duration=duration)

2554

2555

def _extract_chapters_from_engagement_panel(self, data, duration):

2556

content_list = traverse_obj(

2557

data,

2558

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2559

expected_type=list, default=[])

2560

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2561

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2566

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2567

chapter_time, chapter_title, duration)

2568

for contents in content_list

2569

))), [])

2570

2571

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2572

chapters = []

2573

last_chapter = {'start_time': 0}

2574

for idx, chapter in enumerate(chapter_list or []):

2575

title = chapter_title(chapter)

2576

start_time = chapter_time(chapter)

2577

if start_time is None:

2578

continue

2579

last_chapter['end_time'] = start_time

2580

if start_time < last_chapter['start_time']:

2581

if idx == 1:

2582

chapters.pop()

2583

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2584

else:

2585

self.report_warning(f'Invalid start time for chapter "{title}"')

2586

continue

2587

last_chapter = {'start_time': start_time, 'title': title}

2588

chapters.append(last_chapter)

2589

last_chapter['end_time'] = duration

2590

return chapters

2591

2592

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2593

return self._parse_json(self._search_regex(

2594

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

2595

regex), webpage, name, default='{}'), video_id, fatal=False)

2596

2597

def _extract_comment(self, comment_renderer, parent=None):

2598

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2603

2604

# note: timestamp is an estimate calculated from the current time and time_text

2605

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2606

author = self._get_text(comment_renderer, 'authorText')

2607

author_id = try_get(comment_renderer,

2608

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2609

2610

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2611

lambda x: x['likeCount']), compat_str)) or 0

2612

author_thumbnail = try_get(comment_renderer,

2613

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2614

2615

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2616

is_favorited = 'creatorHeart' in (try_get(

2617

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2622

'time_text': time_text,

2623

'like_count': votes,

2624

'is_favorited': is_favorited,

2625

'author': author,

2626

'author_id': author_id,

2627

'author_thumbnail': author_thumbnail,

2628

'author_is_uploader': author_is_uploader,

2629

'parent': parent or 'root'

2630

}

2631

2632

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2633

2634

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2635

2636

def extract_header(contents):

2637

_continuation = None

2638

for content in contents:

2639

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2640

expected_comment_count = self._get_count(

2641

comments_header_renderer, 'countText', 'commentsCount')

2642

2643

if expected_comment_count:

2644

tracker['est_total'] = expected_comment_count

2645

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2646

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2647

2648

sort_menu_item = try_get(

2649

comments_header_renderer,

2650

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2651

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2652

2653

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2654

if not _continuation:

2655

continue

2656

2657

sort_text = str_or_none(sort_menu_item.get('title'))

2658

if not sort_text:

2659

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2660

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2665

if not parent:

2666

tracker['current_page_thread'] = 0

2667

for content in contents:

2668

if not parent and tracker['total_parent_comments'] >= max_parents:

2669

yield

2670

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2671

comment_renderer = get_first(

2672

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2673

expected_type=dict, default={})

2674

2675

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2680

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2681

yield comment

2682

2683

# Attempt to get the replies

2684

comment_replies_renderer = try_get(

2685

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2686

2687

if comment_replies_renderer:

2688

tracker['current_page_thread'] += 1

2689

comment_entries_iter = self._comment_entries(

2690

comment_replies_renderer, ytcfg, video_id,

2691

parent=comment.get('id'), tracker=tracker)

2692

for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):

2693

yield reply_comment

2694

2695

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2701

total_parent_comments=0,

2702

total_reply_comments=0)

2703

2704

# TODO: Deprecated

2705

# YouTube comments have a max depth of 2

2706

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2707

if max_depth:

2708

self._downloader.deprecation_warning(

2709

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2710

if max_depth == 1 and parent:

2711

return

2712

2713

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2714

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2715

2716

continuation = self._extract_continuation(root_continuation_data)

2717

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2718

if message and not parent:

2719

self.report_warning(message, video_id=video_id)

2720

2721

response = None

2722

is_first_continuation = parent is None

2723

2724

for page_num in itertools.count(0):

2725

if not continuation:

2726

break

2727

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2728

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2729

if page_num == 0:

2730

if is_first_continuation:

2731

note_prefix = 'Downloading comment section API JSON'

2732

else:

2733

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2734

tracker['current_page_thread'], comment_prog_str)

2735

else:

2736

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2737

' ' if parent else '', ' replies' if parent else '',

2738

page_num, comment_prog_str)

2739

2740

response = self._extract_response(

2741

item_id=None, query=continuation,

2742

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2743

check_get_keys='onResponseReceivedEndpoints')

2744

2745

continuation_contents = traverse_obj(

2746

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2747

2748

continuation = None

2749

for continuation_section in continuation_contents:

2750

continuation_items = traverse_obj(

2751

continuation_section,

2752

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2753

get_all=False, expected_type=list) or []

2754

if is_first_continuation:

2755

continuation = extract_header(continuation_items)

2756

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

def _get_comments(self, ytcfg, video_id, contents, webpage):

2770

"""Entry for comment extraction"""

2771

def _real_comment_extract(contents):

2772

renderer = next((

2773

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2774

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2775

yield from self._comment_entries(renderer, ytcfg, video_id)

2776

2777

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2778

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2779

2780

@staticmethod

2781

def _get_checkok_params():

2782

return {'contentCheckOk': True, 'racyCheckOk': True}

2783

2784

@classmethod

2785

def _generate_player_context(cls, sts=None):

2786

context = {

2787

'html5Preference': 'HTML5_PREF_WANTS',

2788

}

2789

if sts is not None:

2790

context['signatureTimestamp'] = sts

2791

return {

2792

'playbackContext': {

2793

'contentPlaybackContext': context

2794

},

2795

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2800

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2801

return True

2802

2803

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2804

AGE_GATE_REASONS = (

2805

'confirm your age', 'age-restricted', 'inappropriate', # reason

2806

'age_verification_required', 'age_check_required', # status

2807

)

2808

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2809

2810

@staticmethod

2811

def _is_unplayable(player_response):

2812

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2813

2814

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2815

2816

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2817

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2818

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2819

headers = self.generate_api_headers(

2820

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

2821

2822

yt_query = {'videoId': video_id}

2823

yt_query.update(self._generate_player_context(sts))

2824

return self._extract_response(

2825

item_id=video_id, ep='player', query=yt_query,

2826

ytcfg=player_ytcfg, headers=headers, fatal=True,

2827

default_client=client,

2828

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2829

) or None

2830

2831

def _get_requested_clients(self, url, smuggled_data):

2832

requested_clients = []

2833

default = ['android', 'web']

2834

allowed_clients = sorted(

2835

[client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],

2836

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

2837

for client in self._configuration_arg('player_client'):

2838

if client in allowed_clients:

2839

requested_clients.append(client)

2840

elif client == 'default':

2841

requested_clients.extend(default)

2842

elif client == 'all':

2843

requested_clients.extend(allowed_clients)

2844

else:

2845

self.report_warning(f'Skipping unsupported client {client}')

2846

if not requested_clients:

2847

requested_clients = default

2848

2849

if smuggled_data.get('is_music_url') or self.is_music_url(url):

2850

requested_clients.extend(

2851

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

2852

2853

return orderedSet(requested_clients)

2854

2855

def _extract_player_ytcfg(self, client, video_id):

2856

url = {

2857

'web_music': 'https://music.youtube.com',

2858

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())

2863

return self.extract_ytcfg(video_id, webpage) or {}

2864

2865

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

2866

initial_pr = None

2867

if webpage:

2868

initial_pr = self._extract_yt_initial_variable(

2869

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2870

video_id, 'initial player response')

2871

2872

original_clients = clients

2873

clients = clients[::-1]

2874

prs = []

2875

2876

def append_client(client_name):

2877

if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:

2878

clients.append(client_name)

2879

2880

# Android player_response does not have microFormats which are needed for

2881

# extraction of some data. So we return the initial_pr with formats

2882

# stripped out even if not requested by the user

2883

# See: https://github.com/yt-dlp/yt-dlp/issues/501

2884

if initial_pr:

2885

pr = dict(initial_pr)

2886

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

2891

player_url = None

2892

while clients:

2893

client = clients.pop()

2894

player_ytcfg = master_ytcfg if client == 'web' else {}

2895

if 'configs' not in self._configuration_arg('player_skip'):

2896

player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg

2897

2898

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

2899

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

2900

if 'js' in self._configuration_arg('player_skip'):

2901

require_js_player = False

2902

player_url = None

2903

2904

if not player_url and not tried_iframe_fallback and require_js_player:

2905

player_url = self._download_player_url(video_id)

2906

tried_iframe_fallback = True

2907

2908

try:

2909

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

2910

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

2911

except ExtractorError as e:

2912

if last_error:

2913

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

2921

if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:

2922

append_client(client.replace('_agegate', '_creator'))

2923

elif self._is_agegated(pr):

2924

append_client(f'{client}_agegate')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

2930

return prs, player_url

2931

2932

def _extract_formats(self, streaming_data, video_id, player_url, is_live):

2933

itags, stream_ids = {}, []

2934

itag_qualities, res_qualities = {}, {}

2935

q = qualities([

2936

# Normally tiny is the smallest video-only formats. But

2937

# audio-only formats with unknown quality may get tagged as tiny

2938

'tiny',

2939

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

2940

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

2941

])

2942

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

2943

approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None

2944

2945

for fmt in streaming_formats:

2946

if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):

2947

continue

2948

2949

itag = str_or_none(fmt.get('itag'))

2950

audio_track = fmt.get('audioTrack') or {}

2951

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

2952

if stream_id in stream_ids:

2953

continue

2954

2955

quality = fmt.get('quality')

2956

height = int_or_none(fmt.get('height'))

2957

if quality == 'tiny' or not quality:

2958

quality = fmt.get('audioQuality', '').lower() or quality

2959

# The 3gp format (17) in android client has a quality of "small",

2960

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

2966

if height:

2967

res_qualities[height] = quality

2968

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

2969

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

2970

# number of fragment that would subsequently requested with (`&sq=N`)

2971

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

2972

continue

2973

2974

fmt_url = fmt.get('url')

2975

if not fmt_url:

2976

sc = compat_parse_qs(fmt.get('signatureCipher'))

2977

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

2978

encrypted_sig = try_get(sc, lambda x: x['s'][0])

2979

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

2984

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

2985

fmt_url += '&' + sp + '=' + signature

2986

2987

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

2992

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

2993

except ExtractorError as e:

2994

self.report_warning(

2995

f'nsig extraction failed: You may experience throttling for some formats\n'

2996

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3001

stream_ids.append(stream_id)

3002

3003

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3004

language_preference = (

3005

10 if audio_track.get('audioIsDefault') and 10

3006

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3007

else -1)

3008

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3009

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3010

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)

3011

dct = {

3012

'asr': int_or_none(fmt.get('audioSampleRate')),

3013

'filesize': int_or_none(fmt.get('contentLength')),

3014

'format_id': itag,

3015

'format_note': join_nonempty(

3016

'%s%s' % (audio_track.get('displayName') or '',

3017

' (default)' if language_preference > 0 else ''),

3018

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3019

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3020

'source_preference': -10 if throttled else -1,

3021

'fps': int_or_none(fmt.get('fps')) or None,

3022

'height': height,

3023

'quality': q(quality),

3024

'tbr': tbr,

3025

'url': fmt_url,

3026

'width': int_or_none(fmt.get('width')),

3027

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3028

'desc' if language_preference < -1 else ''),

3029

'language_preference': language_preference,

3030

'preference': -10 if is_damaged else None,

3031

}

3032

mime_mobj = re.match(

3033

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3034

if mime_mobj:

3035

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3036

dct.update(parse_codecs(mime_mobj.group(2)))

3037

no_audio = dct.get('acodec') == 'none'

3038

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3044

dct['downloader_options'] = {

3045

# Youtube throttles chunks >~10M

3046

'http_chunk_size': 10485760,

3047

}

3048

if dct.get('ext'):

3049

dct['container'] = dct['ext'] + '_dash'

3050

yield dct

3051

3052

live_from_start = is_live and self.get_param('live_from_start')

3053

skip_manifests = self._configuration_arg('skip')

3054

if not self.get_param('youtube_include_hls_manifest', True):

3055

skip_manifests.append('hls')

3056

get_dash = 'dash' not in skip_manifests and (

3057

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3058

get_hls = not live_from_start and 'hls' not in skip_manifests

3059

3060

def process_manifest_format(f, proto, itag):

3061

if itag in itags:

3062

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3063

return False

3064

itag = f'{itag}-{proto}'

3065

if itag:

3066

f['format_id'] = itag

3067

itags[itag] = proto

3068

3069

f['quality'] = next((

3070

q(qdict[val])

3071

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3072

if val in qdict), -1)

3073

return True

3074

3075

for sd in streaming_data:

3076

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3077

if hls_manifest_url:

3078

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3079

if process_manifest_format(f, 'hls', self._search_regex(

3080

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3081

yield f

3082

3083

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3084

if dash_manifest_url:

3085

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3086

if process_manifest_format(f, 'dash', f['format_id']):

3087

f['filesize'] = int_or_none(self._search_regex(

3088

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3089

if live_from_start:

3090

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3095

spec = get_first(

3096

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3097

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3102

args = args.split('#')

3103

counts = list(map(int_or_none, args[:5]))

3104

if len(args) != 8 or not all(counts):

3105

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3106

continue

3107

width, height, frame_count, cols, rows = counts

3108

N, sigh = args[6:]

3109

3110

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3111

fragment_count = frame_count / (cols * rows)

3112

fragment_duration = duration / fragment_count

3113

yield {

3114

'format_id': f'sb{i}',

3115

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'path': url.replace('$M', str(j)),

3125

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3126

} for j in range(math.ceil(fragment_count))],

3127

}

3128

3129

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3130

webpage = None

3131

if 'webpage' not in self._configuration_arg('player_skip'):

3132

webpage = self._download_webpage(

3133

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

3134

3135

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3136

3137

player_responses, player_url = self._extract_player_responses(

3138

self._get_requested_clients(url, smuggled_data),

3139

video_id, webpage, master_ytcfg)

3140

3141

return webpage, master_ytcfg, player_responses, player_url

3142

3143

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):

3144

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3145

is_live = get_first(video_details, 'isLive')

3146

if is_live is None:

3147

is_live = get_first(live_broadcast_details, 'isLiveNow')

3148

3149

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3150

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))

3151

3152

return live_broadcast_details, is_live, streaming_data, formats

3153

3154

def _real_extract(self, url):

3155

url, smuggled_data = unsmuggle_url(url, {})

3156

video_id = self._match_id(url)

3157

3158

base_url = self.http_scheme() + '//www.youtube.com/'

3159

webpage_url = base_url + 'watch?v=' + video_id

3160

3161

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3162

3163

playability_statuses = traverse_obj(

3164

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3165

3166

trailer_video_id = get_first(

3167

playability_statuses,

3168

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3169

expected_type=str)

3170

if trailer_video_id:

3171

return self.url_result(

3172

trailer_video_id, self.ie_key(), trailer_video_id)

3173

3174

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3175

if webpage else (lambda x: None))

3176

3177

video_details = traverse_obj(

3178

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3179

microformats = traverse_obj(

3180

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3181

expected_type=dict, default=[])

3182

video_title = (

3183

get_first(video_details, 'title')

3184

or self._get_text(microformats, (..., 'title'))

3185

or search_meta(['og:title', 'twitter:title', 'title']))

3186

video_description = get_first(video_details, 'shortDescription')

3187

3188

multifeed_metadata_list = get_first(

3189

player_responses,

3190

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3191

expected_type=str)

3192

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3193

if self.get_param('noplaylist'):

3194

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3199

# Unquote should take place before split on comma (,) since textual

3200

# fields may contain comma as well (see

3201

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3202

feed_data = compat_parse_qs(

3203

compat_urllib_parse_unquote_plus(feed))

3204

3205

def feed_entry(name):

3206

return try_get(

3207

feed_data, lambda x: x[name][0], compat_str)

3208

3209

feed_id = feed_entry('id')

3210

if not feed_id:

3211

continue

3212

feed_title = feed_entry('title')

3213

title = video_title

3214

if feed_title:

3215

title += ' (%s)' % feed_title

3216

entries.append({

3217

'_type': 'url_transparent',

3218

'ie_key': 'Youtube',

3219

'url': smuggle_url(

3220

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3221

{'force_singlefeed': True}),

3222

'title': title,

3223

})

3224

feed_ids.append(feed_id)

3225

self.to_screen(

3226

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3227

% (', '.join(feed_ids), video_id))

3228

return self.playlist_result(

3229

entries, video_id, video_title, video_description)

3230

3231

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3232

3233

if not formats:

3234

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3235

self.report_drm(video_id)

3236

pemr = get_first(

3237

playability_statuses,

3238

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3239

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3240

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3241

if subreason:

3242

if subreason == 'The uploader has not made this video available in your country.':

3243

countries = get_first(microformats, 'availableCountries')

3244

if not countries:

3245

regions_allowed = search_meta('regionsAllowed')

3246

countries = regions_allowed.split(',') if regions_allowed else None

3247

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3248

reason += f'. {subreason}'

3249

if reason:

3250

self.raise_no_formats(reason, expected=True)

3251

3252

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3253

if not keywords and webpage:

3254

keywords = [

3255

unescapeHTML(m.group('content'))

3256

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3257

for keyword in keywords:

3258

if keyword.startswith('yt:stretch='):

3259

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3260

if mobj:

3261

# NB: float is intentional for forcing float division

3262

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3267

f['stretched_ratio'] = ratio

3268

break

3269

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3270

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3271

if thumbnail_url:

3272

thumbnails.append({

3273

'url': thumbnail_url,

3274

})

3275

original_thumbnails = thumbnails.copy()

3276

3277

# The best resolution thumbnails sometimes does not appear in the webpage

3278

# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340

3279

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3280

thumbnail_names = [

3281

'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',

3282

'hqdefault', 'hq1', 'hq2', 'hq3', '0',

3283

'mqdefault', 'mq1', 'mq2', 'mq3',

3284

'default', '1', '2', '3'

3285

]

3286

n_thumbnail_names = len(thumbnail_names)

3287

thumbnails.extend({

3288

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3289

video_id=video_id, name=name, ext=ext,

3290

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3291

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3292

for thumb in thumbnails:

3293

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3294

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3295

self._remove_duplicate_formats(thumbnails)

3296

self._downloader._sort_thumbnails(original_thumbnails)

3297

3298

category = get_first(microformats, 'category') or search_meta('genre')

3299

channel_id = str_or_none(

3300

get_first(video_details, 'channelId')

3301

or get_first(microformats, 'externalChannelId')

3302

or search_meta('channelId'))

3303

duration = int_or_none(

3304

get_first(video_details, 'lengthSeconds')

3305

or get_first(microformats, 'lengthSeconds')

3306

or parse_duration(search_meta('duration'))) or None

3307

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3308

3309

live_content = get_first(video_details, 'isLiveContent')

3310

is_upcoming = get_first(video_details, 'isUpcoming')

3311

if is_live is None:

3312

if is_upcoming or live_content is False:

3313

is_live = False

3314

if is_upcoming is None and (live_content or is_live):

3315

is_upcoming = False

3316

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3317

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3318

if not duration and live_end_time and live_start_time:

3319

duration = live_end_time - live_start_time

3320

3321

if is_live and self.get_param('live_from_start'):

3322

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3323

3324

formats.extend(self._extract_storyboard(player_responses, duration))

3325

3326

# Source is given priority since formats that throttle are given lower source_preference

3327

# When throttling issue is fully fixed, remove this

3328

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3333

'formats': formats,

3334

'thumbnails': thumbnails,

3335

# The best thumbnail that we are sure exists. Prevents unnecessary

3336

# URL checking if user don't care about getting the best possible thumbnail

3337

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3338

'description': video_description,

3339

'upload_date': unified_strdate(

3340

get_first(microformats, 'uploadDate')

3341

or search_meta('uploadDate')),

3342

'uploader': get_first(video_details, 'author'),

3343

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3344

'uploader_url': owner_profile_url,

3345

'channel_id': channel_id,

3346

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3347

'duration': duration,

3348

'view_count': int_or_none(

3349

get_first((video_details, microformats), (..., 'viewCount'))

3350

or search_meta('interactionCount')),

3351

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3352

'age_limit': 18 if (

3353

get_first(microformats, 'isFamilySafe') is False

3354

or search_meta('isFamilyFriendly') == 'false'

3355

or search_meta('og:restrictions:age') == '18+') else 0,

3356

'webpage_url': webpage_url,

3357

'categories': [category] if category else None,

3358

'tags': keywords,

3359

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3360

'is_live': is_live,

3361

'was_live': (False if is_live or is_upcoming or live_content is False

3362

else None if is_live is None or is_upcoming is None

3363

else live_content),

3364

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3365

'release_timestamp': live_start_time,

3366

}

3367

3368

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3369

if pctr:

3370

def get_lang_code(track):

3371

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3372

or track.get('languageCode'))

3373

3374

# Converted into dicts to remove duplicates

3375

captions = {

3376

get_lang_code(sub): sub

3377

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3378

translation_languages = {

3379

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3380

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3381

3382

def process_language(container, base_url, lang_code, sub_name, query):

3383

lang_subs = container.setdefault(lang_code, [])

3384

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3395

for lang_code, caption_track in captions.items():

3396

base_url = caption_track.get('baseUrl')

3397

if not base_url:

3398

continue

3399

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3400

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3405

if not caption_track.get('isTranslatable'):

3406

continue

3407

for trans_code, trans_name in translation_languages.items():

3408

if not trans_code:

3409

continue

3410

if caption_track.get('kind') != 'asr':

3411

trans_code += f'-{lang_code}'

3412

trans_name += format_field(lang_name, template=' from %s')

3413

process_language(

3414

automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})

3415

if lang_code == f'a-{trans_code}':

3416

process_language(

3417

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {'tlang': trans_code})

3418

info['automatic_captions'] = automatic_captions

3419

info['subtitles'] = subtitles

3420

3421

parsed_url = compat_urllib_parse_urlparse(url)

3422

for component in [parsed_url.fragment, parsed_url.query]:

3423

query = compat_parse_qs(component)

3424

for k, v in query.items():

3425

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3426

d_k += '_time'

3427

if d_k not in info and k in s_ks:

3428

info[d_k] = parse_duration(query[k][0])

3429

3430

# Youtube Music Auto-generated description

3431

if video_description:

3432

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3433

if mobj:

3434

release_year = mobj.group('release_year')

3435

release_date = mobj.group('release_date')

3436

if release_date:

3437

release_date = release_date.replace('-', '')

3438

if not release_year:

3439

release_year = release_date[:4]

3440

info.update({

3441

'album': mobj.group('album'.strip()),

3442

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3443

'track': mobj.group('track').strip(),

3444

'release_date': release_date,

3445

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3451

webpage, self._YT_INITIAL_DATA_RE, video_id,

3452

'yt initial data')

3453

if not initial_data:

3454

query = {'videoId': video_id}

3455

query.update(self._get_checkok_params())

3456

initial_data = self._extract_response(

3457

item_id=video_id, ep='next', fatal=False,

3458

ytcfg=master_ytcfg, query=query,

3459

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3460

note='Downloading initial data API JSON')

3461

3462

try:

3463

# This will error if there is no livechat

3464

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3465

info.setdefault('subtitles', {})['live_chat'] = [{

3466

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

3467

'video_id': video_id,

3468

'ext': 'json',

3469

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

3470

}]

3471

except (KeyError, IndexError, TypeError):

pass

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3477

or self._extract_chapters_from_engagement_panel(initial_data, duration)

or None)

contents = try_get(

initial_data,

lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],

3483

list) or []

3484

for content in contents:

3485

vpir = content.get('videoPrimaryInfoRenderer')

3486

if vpir:

3487

stl = vpir.get('superTitleLink')

3488

if stl:

3489

stl = self._get_text(stl)

3490

if try_get(

3491

vpir,

3492

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3493

info['location'] = stl

3494

else:

3495

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

3496

if mobj:

3497

info.update({

3498

'series': mobj.group(1),

3499

'season_number': int(mobj.group(2)),

3500

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3505

list) or []):

3506

tbr = tlb.get('toggleButtonRenderer') or {}

3507

for getter, regex in [(

3508

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3509

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3510

lambda x: x['accessibility'],

3511

lambda x: x['accessibilityData']['accessibilityData'],

3512

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3513

label = (try_get(tbr, getter, dict) or {}).get('label')

3514

if label:

3515

mobj = re.match(regex, label)

3516

if mobj:

3517

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3518

break

3519

sbr_tooltip = try_get(

3520

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3521

if sbr_tooltip:

3522

like_count, dislike_count = sbr_tooltip.split(' / ')

3523

info.update({

3524

'like_count': str_to_int(like_count),

3525

'dislike_count': str_to_int(dislike_count),

3526

})

3527

vsir = content.get('videoSecondaryInfoRenderer')

3528

if vsir:

3529

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3530

info.update({

3531

'channel': self._get_text(vor, 'title'),

3532

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3537

list) or []

3538

multiple_songs = False

3539

for row in rows:

3540

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3541

multiple_songs = True

3542

break

3543

for row in rows:

3544

mrr = row.get('metadataRowRenderer') or {}

3545

mrr_title = mrr.get('title')

3546

if not mrr_title:

3547

continue

3548

mrr_title = self._get_text(mrr, 'title')

3549

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3550

if mrr_title == 'License':

3551

info['license'] = mrr_contents_text

3552

elif not multiple_songs:

3553

if mrr_title == 'Album':

3554

info['album'] = mrr_contents_text

3555

elif mrr_title == 'Artist':

3556

info['artist'] = mrr_contents_text

3557

elif mrr_title == 'Song':

3558

info['track'] = mrr_contents_text

3559

3560

fallbacks = {

3561

'channel': 'uploader',

3562

'channel_id': 'uploader_id',

3563

'channel_url': 'uploader_url',

3564

}

3565

for to, frm in fallbacks.items():

3566

if not info.get(to):

3567

info[to] = info.get(frm)

3568

3569

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3575

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3576

is_membersonly = None

3577

is_premium = None

3578

if initial_data and is_private is not None:

3579

is_membersonly = False

3580

is_premium = False

3581

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3582

badge_labels = set()

3583

for content in contents:

3584

if not isinstance(content, dict):

3585

continue

3586

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3587

for badge_label in badge_labels:

3588

if badge_label.lower() == 'members only':

3589

is_membersonly = True

3590

elif badge_label.lower() == 'premium':

3591

is_premium = True

3592

elif badge_label.lower() == 'unlisted':

3593

is_unlisted = True

3594

3595

info['availability'] = self._availability(

3596

is_private=is_private,

3597

needs_premium=is_premium,

3598

needs_subscription=is_membersonly,

3599

needs_auth=info['age_limit'] >= 18,

3600

is_unlisted=None if is_private is None else is_unlisted)

3601

3602

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3603

3604

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3610

3611

@staticmethod

3612

def passthrough_smuggled_data(func):

3613

def _smuggle(entries, smuggled_data):

3614

for entry in entries:

3615

# TODO: Convert URL to music.youtube instead.

3616

# Do we need to passthrough any other smuggled_data?

3617

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3618

yield entry

3619

3620

@functools.wraps(func)

3621

def wrapper(self, url):

3622

url, smuggled_data = unsmuggle_url(url, {})

3623

if self.is_music_url(url):

3624

smuggled_data['is_music_url'] = True

3625

info_dict = func(self, url, smuggled_data)

3626

if smuggled_data and info_dict.get('entries'):

3627

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3632

channel_id = self._html_search_meta(

3633

'channelId', webpage, 'channel id', default=None)

3634

if channel_id:

3635

return channel_id

3636

channel_url = self._html_search_meta(

3637

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3638

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3639

'twitter:app:url:googleplay'), webpage, 'channel url')

3640

return self._search_regex(

3641

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3642

channel_url, 'channel id')

3643

3644

@staticmethod

3645

def _extract_basic_item_renderer(item):

3646

# Modified from _extract_grid_item_renderer

3647

known_basic_renderers = (

3648

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'

3649

)

3650

for key, renderer in item.items():

3651

if not isinstance(renderer, dict):

3652

continue

3653

elif key in known_basic_renderers:

3654

return renderer

3655

elif key.startswith('grid') and key.endswith('Renderer'):

3656

return renderer

3657

3658

def _grid_entries(self, grid_renderer):

3659

for item in grid_renderer['items']:

3660

if not isinstance(item, dict):

3661

continue

3662

renderer = self._extract_basic_item_renderer(item)

3663

if not isinstance(renderer, dict):

3664

continue

3665

title = self._get_text(renderer, 'title')

3666

3667

# playlist

3668

playlist_id = renderer.get('playlistId')

3669

if playlist_id:

3670

yield self.url_result(

3671

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3672

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3677

if video_id:

3678

yield self._extract_video(renderer)

3679

continue

3680

# channel

3681

channel_id = renderer.get('channelId')

3682

if channel_id:

3683

yield self.url_result(

3684

'https://www.youtube.com/channel/%s' % channel_id,

3685

ie=YoutubeTabIE.ie_key(), video_title=title)

3686

continue

3687

# generic endpoint URL support

3688

ep_url = urljoin('https://www.youtube.com/', try_get(

3689

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3690

compat_str))

3691

if ep_url:

3692

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3693

if ie.suitable(ep_url):

3694

yield self.url_result(

3695

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3696

break

3697

3698

def _music_reponsive_list_entry(self, renderer):

3699

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3700

if video_id:

3701

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3702

ie=YoutubeIE.ie_key(), video_id=video_id)

3703

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3704

if playlist_id:

3705

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3706

if video_id:

3707

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3708

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3709

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3710

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3711

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3712

if browse_id:

3713

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3714

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3715

3716

def _shelf_entries_from_content(self, shelf_renderer):

3717

content = shelf_renderer.get('content')

3718

if not isinstance(content, dict):

3719

return

3720

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3721

if renderer:

3722

# TODO: add support for nested playlists so each shelf is processed

3723

# as separate playlist

3724

# TODO: this includes only first N items

3725

for entry in self._grid_entries(renderer):

3726

yield entry

3727

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3733

ep = try_get(

3734

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3735

compat_str)

3736

shelf_url = urljoin('https://www.youtube.com', ep)

3737

if shelf_url:

3738

# Skipping links to another channels, note that checking for

3739

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3740

# will not work

3741

if skip_channels and '/channels?' in shelf_url:

3742

return

3743

title = self._get_text(shelf_renderer, 'title')

3744

yield self.url_result(shelf_url, video_title=title)

3745

# Shelf may not contain shelf URL, fallback to extraction from content

3746

for entry in self._shelf_entries_from_content(shelf_renderer):

3747

yield entry

3748

3749

def _playlist_entries(self, video_list_renderer):

3750

for content in video_list_renderer['contents']:

3751

if not isinstance(content, dict):

3752

continue

3753

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3754

if not isinstance(renderer, dict):

3755

continue

3756

video_id = renderer.get('videoId')

3757

if not video_id:

3758

continue

3759

yield self._extract_video(renderer)

3760

3761

def _rich_entries(self, rich_grid_renderer):

3762

renderer = try_get(

3763

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3764

video_id = renderer.get('videoId')

3765

if not video_id:

3766

return

3767

yield self._extract_video(renderer)

3768

3769

def _video_entry(self, video_renderer):

3770

video_id = video_renderer.get('videoId')

3771

if video_id:

3772

return self._extract_video(video_renderer)

3773

3774

def _post_thread_entries(self, post_thread_renderer):

3775

post_renderer = try_get(

3776

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3777

if not post_renderer:

3778

return

3779

# video attachment

3780

video_renderer = try_get(

3781

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3782

video_id = video_renderer.get('videoId')

3783

if video_id:

3784

entry = self._extract_video(video_renderer)

3785

if entry:

3786

yield entry

3787

# playlist attachment

3788

playlist_id = try_get(

3789

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3790

if playlist_id:

3791

yield self.url_result(

3792

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3793

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3794

# inline video links

3795

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3796

for run in runs:

3797

if not isinstance(run, dict):

3798

continue

3799

ep_url = try_get(

3800

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3801

if not ep_url:

3802

continue

3803

if not YoutubeIE.suitable(ep_url):

3804

continue

3805

ep_video_id = YoutubeIE._match_id(ep_url)

3806

if video_id == ep_video_id:

3807

continue

3808

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3809

3810

def _post_thread_continuation_entries(self, post_thread_continuation):

3811

contents = post_thread_continuation.get('contents')

3812

if not isinstance(contents, list):

3813

return

3814

for content in contents:

3815

renderer = content.get('backstagePostThreadRenderer')

3816

if not isinstance(renderer, dict):

3817

continue

3818

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3823

for content in contents:

3824

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3825

if video_renderer:

3826

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

3831

# continuation_list is modified in-place with continuation_list = [continuation_token]

3832

continuation_list[:] = [None]

3833

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3834

for content in contents:

3835

if not isinstance(content, dict):

3836

continue

3837

is_renderer = traverse_obj(

3838

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

3839

expected_type=dict)

3840

if not is_renderer:

3841

renderer = content.get('richItemRenderer')

3842

if renderer:

3843

for entry in self._rich_entries(renderer):

3844

yield entry

3845

continuation_list[0] = self._extract_continuation(parent_renderer)

3846

continue

3847

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3848

for isr_content in isr_contents:

3849

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3854

'gridRenderer': self._grid_entries,

3855

'shelfRenderer': lambda x: self._shelf_entries(x),

3856

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

3857

'backstagePostThreadRenderer': self._post_thread_entries,

3858

'videoRenderer': lambda x: [self._video_entry(x)],

3859

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

3860

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

3861

}

3862

for key, renderer in isr_content.items():

3863

if key not in known_renderers:

3864

continue

3865

for entry in known_renderers[key](renderer):

3866

if entry:

3867

yield entry

3868

continuation_list[0] = self._extract_continuation(renderer)

3869

break

3870

3871

if not continuation_list[0]:

3872

continuation_list[0] = self._extract_continuation(is_renderer)

3873

3874

if not continuation_list[0]:

3875

continuation_list[0] = self._extract_continuation(parent_renderer)

3876

3877

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

3878

continuation_list = [None]

3879

extract_entries = lambda x: self._extract_entries(x, continuation_list)

3880

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3885

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3886

for entry in extract_entries(parent_renderer):

3887

yield entry

3888

continuation = continuation_list[0]

3889

3890

for page_num in itertools.count(1):

3891

if not continuation:

3892

break

3893

headers = self.generate_api_headers(

3894

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

3895

response = self._extract_response(

3896

item_id='%s page %s' % (item_id, page_num),

3897

query=continuation, headers=headers, ytcfg=ytcfg,

3898

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

3903

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

3904

visitor_data = self._extract_visitor_data(response) or visitor_data

3905

3906

known_continuation_renderers = {

3907

'playlistVideoListContinuation': self._playlist_entries,

3908

'gridContinuation': self._grid_entries,

3909

'itemSectionContinuation': self._post_thread_continuation_entries,

3910

'sectionListContinuation': extract_entries, # for feeds

3911

}

3912

continuation_contents = try_get(

3913

response, lambda x: x['continuationContents'], dict) or {}

3914

continuation_renderer = None

3915

for key, value in continuation_contents.items():

3916

if key not in known_continuation_renderers:

3917

continue

3918

continuation_renderer = value

3919

continuation_list = [None]

3920

for entry in known_continuation_renderers[key](continuation_renderer):

3921

yield entry

3922

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

3923

break

3924

if continuation_renderer:

continue

known_renderers = {

'gridPlaylistRenderer': (self._grid_entries, 'items'),

3929

'gridVideoRenderer': (self._grid_entries, 'items'),

3930

'gridChannelRenderer': (self._grid_entries, 'items'),

3931

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

3932

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

3933

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

3934

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

3935

}

3936

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

3937

continuation_items = try_get(

3938

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

3939

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

3940

video_items_renderer = None

3941

for key, value in continuation_item.items():

3942

if key not in known_renderers:

3943

continue

3944

video_items_renderer = {known_renderers[key][1]: continuation_items}

3945

continuation_list = [None]

3946

for entry in known_renderers[key][0](video_items_renderer):

3947

yield entry

3948

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

3949

break

3950

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

3956

for tab in tabs:

3957

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

3958

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

3963

3964

@classmethod

3965

def _extract_uploader(cls, data):

3966

uploader = {}

3967

renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

3968

owner = try_get(

3969

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3970

if owner:

3971

uploader['uploader'] = owner.get('text')

3972

uploader['uploader_id'] = try_get(

3973

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3974

uploader['uploader_url'] = urljoin(

3975

'https://www.youtube.com/',

3976

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3977

return {k: v for k, v in uploader.items() if v is not None}

3978

3979

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

3980

playlist_id = title = description = channel_url = channel_name = channel_id = None

3981

tags = []

3982

3983

selected_tab = self._extract_selected_tab(tabs)

3984

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

3985

renderer = try_get(

3986

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3987

if renderer:

3988

channel_name = renderer.get('title')

3989

channel_url = renderer.get('channelUrl')

3990

channel_id = renderer.get('externalId')

3991

else:

3992

renderer = try_get(

3993

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3994

3995

if renderer:

3996

title = renderer.get('title')

3997

description = renderer.get('description', '')

3998

playlist_id = channel_id

3999

tags = renderer.get('keywords', '').split()

4000

4001

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4002

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4003

def _get_uncropped(url):

4004

return url_or_none((url or '').split('=')[0] + '=s0')

4005

4006

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4007

if avatar_thumbnails:

4008

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4009

if uncropped_avatar:

4010

avatar_thumbnails.append({

4011

'url': uncropped_avatar,

4012

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4017

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4018

for banner in channel_banners:

4019

banner['preference'] = -10

4020

4021

if channel_banners:

4022

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4023

if uncropped_banner:

4024

channel_banners.append({

4025

'url': uncropped_banner,

4026

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4031

primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail'))

4032

4033

if playlist_id is None:

4034

playlist_id = item_id

4035

4036

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4037

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4038

if title is None:

4039

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4040

title += format_field(selected_tab, 'title', ' - %s')

4041

title += format_field(selected_tab, 'expandedText', ' - %s')

4042

4043

metadata = {

4044

'playlist_id': playlist_id,

4045

'playlist_title': title,

4046

'playlist_description': description,

4047

'uploader': channel_name,

4048

'uploader_id': channel_id,

4049

'uploader_url': channel_url,

4050

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4051

'tags': tags,

4052

'view_count': self._get_count(playlist_stats, 1),

4053

'availability': self._extract_availability(data),

4054

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4055

'playlist_count': self._get_count(playlist_stats, 0),

4056

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4057

}

4058

if not channel_id:

4059

metadata.update(self._extract_uploader(data))

4060

metadata.update({

4061

'channel': metadata['uploader'],

4062

'channel_id': metadata['uploader_id'],

4063

'channel_url': metadata['uploader_url']})

4064

return self.playlist_result(

4065

self._entries(

4066

selected_tab, playlist_id, ytcfg,

4067

self._extract_account_syncid(ytcfg, data),

4068

self._extract_visitor_data(data, ytcfg)),

4069

**metadata)

4070

4071

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):

4072

first_id = last_id = response = None

4073

for page_num in itertools.count(1):

4074

videos = list(self._playlist_entries(playlist))

4075

if not videos:

4076

return

4077

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4078

if start >= len(videos):

4079

return

4080

for video in videos[start:]:

4081

if video['id'] == first_id:

4082

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

4083

return

4084

yield video

4085

first_id = first_id or videos[0]['id']

4086

last_id = videos[-1]['id']

4087

watch_endpoint = try_get(

4088

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4089

headers = self.generate_api_headers(

4090

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4091

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4092

query = {

4093

'playlistId': playlist_id,

4094

'videoId': watch_endpoint.get('videoId') or last_id,

4095

'index': watch_endpoint.get('index') or len(videos),

4096

'params': watch_endpoint.get('params') or 'OAE%3D'

4097

}

4098

response = self._extract_response(

4099

item_id='%s page %d' % (playlist_id, page_num),

4100

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4101

check_get_keys='contents'

4102

)

4103

playlist = try_get(

4104

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4105

4106

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4107

title = playlist.get('title') or try_get(

4108

data, lambda x: x['titleText']['simpleText'], compat_str)

4109

playlist_id = playlist.get('playlistId') or item_id

4110

4111

# Delegating everything except mix playlists to regular tab-based playlist URL

4112

playlist_url = urljoin(url, try_get(

4113

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4114

compat_str))

4115

if playlist_url and playlist_url != url:

4116

return self.url_result(

4117

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4118

video_title=title)

4119

4120

return self.playlist_result(

4121

self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),

4122

playlist_id=playlist_id, playlist_title=title)

4123

4124

def _extract_availability(self, data):

4125

"""

4126

Gets the availability of a given playlist/tab.

4127

Note: Unless YouTube tells us explicitly, we do not assume it is public

4128

@param data: response

4129

"""

4130

is_private = is_unlisted = None

4131

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4132

badge_labels = self._extract_badges(renderer)

4133

4134

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4135

privacy_dropdown_entries = try_get(

4136

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4137

for renderer_dict in privacy_dropdown_entries:

4138

is_selected = try_get(

4139

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4140

if not is_selected:

4141

continue

4142

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4143

if label:

4144

badge_labels.add(label.lower())

4145

break

4146

4147

for badge_label in badge_labels:

4148

if badge_label == 'unlisted':

4149

is_unlisted = True

4150

elif badge_label == 'private':

4151

is_private = True

4152

elif badge_label == 'public':

4153

is_unlisted = is_private = False

4154

return self._availability(is_private, False, False, False, is_unlisted)

4155

4156

@staticmethod

4157

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4158

sidebar_renderer = try_get(

4159

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4160

for item in sidebar_renderer:

4161

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4166

"""

4167

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4168

"""

4169

browse_id = params = None

4170

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4171

if not renderer:

4172

return

4173

menu_renderer = try_get(

4174

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4175

for menu_item in menu_renderer:

4176

if not isinstance(menu_item, dict):

4177

continue

4178

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4179

text = try_get(

4180

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4181

if not text or text.lower() != 'show unavailable videos':

4182

continue

4183

browse_endpoint = try_get(

4184

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4185

browse_id = browse_endpoint.get('browseId')

4186

params = browse_endpoint.get('params')

4187

break

4188

4189

headers = self.generate_api_headers(

4190

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4191

visitor_data=self._extract_visitor_data(data, ytcfg))

4192

query = {

4193

'params': params or 'wgYCCAA=',

4194

'browseId': browse_id or 'VL%s' % item_id

4195

}

4196

return self._extract_response(

4197

item_id=item_id, headers=headers, query=query,

4198

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4199

note='Downloading API JSON with unavailable videos')

4200

4201

def _extract_webpage(self, url, item_id, fatal=True):

4202

retries = self.get_param('extractor_retries', 3)

4203

count = -1

4204

webpage = data = last_error = None

4205

while count < retries:

4206

count += 1

4207

# Sometimes youtube returns a webpage with incomplete ytInitialData

4208

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4209

if last_error:

4210

self.report_warning('%s. Retrying ...' % last_error)

4211

try:

4212

webpage = self._download_webpage(

4213

url, item_id,

4214

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4215

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4216

except ExtractorError as e:

4217

if isinstance(e.cause, network_exceptions):

4218

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4219

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4229

except ExtractorError as e:

4230

if fatal:

4231

raise

4232

self.report_warning(error_to_compat_str(e))

4233

break

4234

4235

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4236

break

4237

4238

last_error = 'Incomplete yt initial data received'

4239

if count >= retries:

4240

if fatal:

4241

raise ExtractorError(last_error)

4242

self.report_warning(last_error)

break

return webpage, data

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4248

data = None

4249

if 'webpage' not in self._configuration_arg('skip'):

4250

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4251

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4252

# Reject webpage data if redirected to home page without explicitly requesting

4253

selected_tab = self._extract_selected_tab(traverse_obj(

4254

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4255

if (url != 'https://www.youtube.com/feed/recommended'

4256

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4257

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4258

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4259

if fatal:

4260

raise ExtractorError(msg, expected=True)

4261

self.report_warning(msg, only_once=True)

4262

if not data:

4263

if not ytcfg and self.is_authenticated:

4264

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

4265

if 'authcheck' not in self._configuration_arg('skip') and fatal:

4266

raise ExtractorError(

4267

msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'

4268

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4269

expected=True)

4270

self.report_warning(msg, only_once=True)

4271

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4272

return data, ytcfg

4273

4274

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4275

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4276

resolve_response = self._extract_response(

4277

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4278

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4279

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4280

for ep_key, ep in endpoints.items():

4281

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4282

if params:

4283

return self._extract_response(

4284

item_id=item_id, query=params, ep=ep, headers=headers,

4285

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4286

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4287

err_note = 'Failed to resolve url (does the playlist exist?)'

4288

if fatal:

4289

raise ExtractorError(err_note, expected=True)

4290

self.report_warning(err_note, item_id)

4291

4292

_SEARCH_PARAMS = None

4293

4294

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4295

data = {'query': query}

4296

if params is NO_DEFAULT:

4297

params = self._SEARCH_PARAMS

4298

if params:

4299

data['params'] = params

4300

4301

content_keys = (

4302

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4303

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4304

# ytmusic search

4305

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4306

('continuationContents', ),

4307

)

4308

check_get_keys = tuple(set(keys[0] for keys in content_keys))

4309

4310

continuation_list = [None]

4311

for page_num in itertools.count(1):

4312

data.update(continuation_list[0] or {})

4313

search = self._extract_response(

4314

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4315

default_client=default_client, check_get_keys=check_get_keys)

4316

slr_contents = traverse_obj(search, *content_keys)

4317

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4318

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4323

IE_DESC = 'YouTube Tabs'

4324

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4333

(?P<not_channel>

4334

feed/|hashtag/|

4335

(?:playlist|watch)\?.*?\blist=

4336

)|

4337

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4342

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4343

}

4344

IE_NAME = 'youtube:tab'

4345

4346

_TESTS = [{

4347

'note': 'playlists, multipage',

4348

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4349

'playlist_mincount': 94,

4350

'info_dict': {

4351

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4352

'title': 'Igor Kleiner - Playlists',

4353

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4354

'uploader': 'Igor Kleiner',

4355

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4356

'channel': 'Igor Kleiner',

4357

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4358

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4359

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4360

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4361

'channel_follower_count': int

4362

},

4363

}, {

4364

'note': 'playlists, multipage, different order',

4365

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4366

'playlist_mincount': 94,

4367

'info_dict': {

4368

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4369

'title': 'Igor Kleiner - Playlists',

4370

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4371

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4372

'uploader': 'Igor Kleiner',

4373

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4374

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4375

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4376

'channel': 'Igor Kleiner',

4377

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4378

'channel_follower_count': int

4379

},

4380

}, {

4381

'note': 'playlists, series',

4382

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4383

'playlist_mincount': 5,

4384

'info_dict': {

4385

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4386

'title': '3Blue1Brown - Playlists',

4387

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4388

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4389

'uploader': '3Blue1Brown',

4390

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4391

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4392

'channel': '3Blue1Brown',

4393

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4394

'tags': ['Mathematics'],

4395

'channel_follower_count': int

4396

},

4397

}, {

4398

'note': 'playlists, singlepage',

4399

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4400

'playlist_mincount': 4,

4401

'info_dict': {

4402

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4403

'title': 'ThirstForScience - Playlists',

4404

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4405

'uploader': 'ThirstForScience',

4406

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4407

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4408

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4409

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4410

'tags': 'count:13',

4411

'channel': 'ThirstForScience',

4412

'channel_follower_count': int

4413

}

4414

}, {

4415

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4416

'only_matching': True,

4417

}, {

4418

'note': 'basic, single video playlist',

4419

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4420

'info_dict': {

4421

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4422

'uploader': 'Sergey M.',

4423

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4424

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4429

'channel': 'Sergey M.',

4430

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4431

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4432

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4437

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4438

'info_dict': {

4439

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4440

'uploader': 'Sergey M.',

4441

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4442

'title': 'youtube-dl empty playlist',

4443

'tags': [],

4444

'channel': 'Sergey M.',

4445

'description': '',

4446

'modified_date': '20160902',

4447

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4448

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4449

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4455

'info_dict': {

4456

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4457

'title': 'lex will - Home',

4458

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4459

'uploader': 'lex will',

4460

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4461

'channel': 'lex will',

4462

'tags': ['bible', 'history', 'prophesy'],

4463

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4464

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4465

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4466

'channel_follower_count': int

4467

},

4468

'playlist_mincount': 2,

4469

}, {

4470

'note': 'Videos tab',

4471

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4472

'info_dict': {

4473

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4474

'title': 'lex will - Videos',

4475

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4476

'uploader': 'lex will',

4477

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4478

'tags': ['bible', 'history', 'prophesy'],

4479

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4480

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4481

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4482

'channel': 'lex will',

4483

'channel_follower_count': int

4484

},

4485

'playlist_mincount': 975,

4486

}, {

4487

'note': 'Videos tab, sorted by popular',

4488

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4489

'info_dict': {

4490

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4491

'title': 'lex will - Videos',

4492

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4493

'uploader': 'lex will',

4494

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4495

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4496

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4497

'channel': 'lex will',

4498

'tags': ['bible', 'history', 'prophesy'],

4499

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4500

'channel_follower_count': int

4501

},

4502

'playlist_mincount': 199,

4503

}, {

4504

'note': 'Playlists tab',

4505

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4506

'info_dict': {

4507

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4508

'title': 'lex will - Playlists',

4509

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4510

'uploader': 'lex will',

4511

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4512

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4513

'channel': 'lex will',

4514

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4515

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4516

'tags': ['bible', 'history', 'prophesy'],

4517

'channel_follower_count': int

4518

},

4519

'playlist_mincount': 17,

4520

}, {

4521

'note': 'Community tab',

4522

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4523

'info_dict': {

4524

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4525

'title': 'lex will - Community',

4526

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4527

'uploader': 'lex will',

4528

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4529

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4530

'channel': 'lex will',

4531

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4532

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4533

'tags': ['bible', 'history', 'prophesy'],

4534

'channel_follower_count': int

4535

},

4536

'playlist_mincount': 18,

4537

}, {

4538

'note': 'Channels tab',

4539

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4540

'info_dict': {

4541

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4542

'title': 'lex will - Channels',

4543

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4544

'uploader': 'lex will',

4545

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4546

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4547

'channel': 'lex will',

4548

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4549

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4550

'tags': ['bible', 'history', 'prophesy'],

4551

'channel_follower_count': int

4552

},

4553

'playlist_mincount': 12,

4554

}, {

4555

'note': 'Search tab',

4556

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4557

'playlist_mincount': 40,

4558

'info_dict': {

4559

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4560

'title': '3Blue1Brown - Search - linear algebra',

4561

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4562

'uploader': '3Blue1Brown',

4563

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4564

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4565

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4566

'tags': ['Mathematics'],

4567

'channel': '3Blue1Brown',

4568

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4569

'channel_follower_count': int

4570

},

4571

}, {

4572

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4573

'only_matching': True,

4574

}, {

4575

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4576

'only_matching': True,

4577

}, {

4578

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4579

'only_matching': True,

4580

}, {

4581

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4582

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4583

'info_dict': {

4584

'title': '29C3: Not my department',

4585

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4586

'uploader': 'Christiaan008',

4587

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4588

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4589

'tags': [],

4590

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4591

'view_count': int,

4592

'modified_date': '20150605',

4593

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4594

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4595

'channel': 'Christiaan008',

4596

},

4597

'playlist_count': 96,

4598

}, {

4599

'note': 'Large playlist',

4600

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4601

'info_dict': {

4602

'title': 'Uploads from Cauchemar',

4603

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4604

'uploader': 'Cauchemar',

4605

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4606

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4607

'tags': [],

4608

'modified_date': r're:\d{8}',

4609

'channel': 'Cauchemar',

4610

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4611

'view_count': int,

4612

'description': '',

4613

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4614

},

4615

'playlist_mincount': 1123,

4616

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4617

}, {

4618

'note': 'even larger playlist, 8832 videos',

4619

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4620

'only_matching': True,

4621

}, {

4622

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4623

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4624

'info_dict': {

4625

'title': 'Uploads from Interstellar Movie',

4626

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4627

'uploader': 'Interstellar Movie',

4628

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4629

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4630

'tags': [],

4631

'view_count': int,

4632

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4633

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4634

'channel': 'Interstellar Movie',

4635

'description': '',

4636

'modified_date': r're:\d{8}',

4637

},

4638

'playlist_mincount': 21,

4639

}, {

4640

'note': 'Playlist with "show unavailable videos" button',

4641

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4642

'info_dict': {

4643

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4644

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4645

'uploader': 'Phim Siêu Nhân Nhật Bản',

4646

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4647

'view_count': int,

4648

'channel': 'Phim Siêu Nhân Nhật Bản',

4649

'tags': [],

4650

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4651

'description': '',

4652

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4653

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4654

'modified_date': r're:\d{8}',

4655

},

4656

'playlist_mincount': 200,

4657

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4658

}, {

4659

'note': 'Playlist with unavailable videos in page 7',

4660

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4661

'info_dict': {

4662

'title': 'Uploads from BlankTV',

4663

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4664

'uploader': 'BlankTV',

4665

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4666

'channel': 'BlankTV',

4667

'channel_url': 'https://www.youtube.com/c/blanktv',

4668

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4669

'view_count': int,

4670

'tags': [],

4671

'uploader_url': 'https://www.youtube.com/c/blanktv',

4672

'modified_date': r're:\d{8}',

4673

'description': '',

4674

},

4675

'playlist_mincount': 1000,

4676

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4677

}, {

4678

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4679

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4680

'info_dict': {

4681

'title': 'Data Analysis with Dr Mike Pound',

4682

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4683

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4684

'uploader': 'Computerphile',

4685

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4686

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4687

'tags': [],

4688

'view_count': int,

4689

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4690

'channel_url': 'https://www.youtube.com/user/Computerphile',

4691

'channel': 'Computerphile',

4692

},

4693

'playlist_mincount': 11,

4694

}, {

4695

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4696

'only_matching': True,

4697

}, {

4698

'note': 'Playlist URL that does not actually serve a playlist',

4699

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4704

'uploader': 'STREEM',

4705

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4706

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4707

'upload_date': '20150526',

4708

'license': 'Standard YouTube License',

4709

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4710

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4717

},

4718

'skip': 'This video is not available.',

4719

'add_ie': [YoutubeIE.ie_key()],

4720

}, {

4721

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4722

'only_matching': True,

4723

}, {

4724

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4725

'only_matching': True,

4726

}, {

4727

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4728

'info_dict': {

4729

'id': 'GgL890LIznQ', # This will keep changing

4730

'ext': 'mp4',

4731

'title': str,

4732

'uploader': 'Sky News',

4733

'uploader_id': 'skynews',

4734

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4735

'upload_date': r're:\d{8}',

4736

'description': str,

4737

'categories': ['News & Politics'],

4738

'tags': list,

4739

'like_count': int,

4740

'release_timestamp': 1642502819,

4741

'channel': 'Sky News',

4742

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4743

'age_limit': 0,

4744

'view_count': int,

4745

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

4746

'playable_in_embed': True,

4747

'release_date': '20220118',

4748

'availability': 'public',

4749

'live_status': 'is_live',

4750

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4751

'channel_follower_count': int

4752

},

4753

'params': {

4754

'skip_download': True,

4755

},

4756

'expected_warnings': ['Ignoring subtitle tracks found in '],

4757

}, {

4758

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4763

'uploader': 'The Young Turks',

4764

'uploader_id': 'TheYoungTurks',

4765

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4766

'upload_date': '20150715',

4767

'license': 'Standard YouTube License',

4768

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4769

'categories': ['News & Politics'],

4770

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4775

},

4776

'only_matching': True,

4777

}, {

4778

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4779

'only_matching': True,

4780

}, {

4781

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4782

'only_matching': True,

4783

}, {

4784

'note': 'A channel that is not live. Should raise error',

4785

'url': 'https://www.youtube.com/user/numberphile/live',

4786

'only_matching': True,

4787

}, {

4788

'url': 'https://www.youtube.com/feed/trending',

4789

'only_matching': True,

4790

}, {

4791

'url': 'https://www.youtube.com/feed/library',

4792

'only_matching': True,

4793

}, {

4794

'url': 'https://www.youtube.com/feed/history',

4795

'only_matching': True,

4796

}, {

4797

'url': 'https://www.youtube.com/feed/subscriptions',

4798

'only_matching': True,

4799

}, {

4800

'url': 'https://www.youtube.com/feed/watch_later',

4801

'only_matching': True,

4802

}, {

4803

'note': 'Recommended - redirects to home page.',

4804

'url': 'https://www.youtube.com/feed/recommended',

4805

'only_matching': True,

4806

}, {

4807

'note': 'inline playlist with not always working continuations',

4808

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

4809

'only_matching': True,

4810

}, {

4811

'url': 'https://www.youtube.com/course',

4812

'only_matching': True,

4813

}, {

4814

'url': 'https://www.youtube.com/zsecurity',

4815

'only_matching': True,

4816

}, {

4817

'url': 'http://www.youtube.com/NASAgovVideo/videos',

4818

'only_matching': True,

4819

}, {

4820

'url': 'https://www.youtube.com/TheYoungTurks/live',

4821

'only_matching': True,

4822

}, {

4823

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

4830

}, {

4831

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

4832

'only_matching': True,

4833

}, {

4834

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

4835

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4836

'only_matching': True

4837

}, {

4838

'note': '/browse/ should redirect to /channel/',

4839

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

4840

'only_matching': True

4841

}, {

4842

'note': 'VLPL, should redirect to playlist?list=PL...',

4843

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4844

'info_dict': {

4845

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4846

'uploader': 'NoCopyrightSounds',

4847

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

4848

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4849

'title': 'NCS Releases',

4850

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4851

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4852

'modified_date': r're:\d{8}',

4853

'view_count': int,

4854

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4855

'tags': [],

4856

'channel': 'NoCopyrightSounds',

4857

},

4858

'playlist_mincount': 166,

4859

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4860

}, {

4861

'note': 'Topic, should redirect to playlist?list=UU...',

4862

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4863

'info_dict': {

4864

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4865

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4866

'title': 'Uploads from Royalty Free Music - Topic',

4867

'uploader': 'Royalty Free Music - Topic',

4868

'tags': [],

4869

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4870

'channel': 'Royalty Free Music - Topic',

4871

'view_count': int,

4872

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4873

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4874

'modified_date': r're:\d{8}',

4875

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4876

'description': '',

4877

},

4878

'expected_warnings': [

4879

'The URL does not have a videos tab',

4880

r'[Uu]navailable videos (are|will be) hidden',

4881

],

4882

'playlist_mincount': 101,

4883

}, {

4884

'note': 'Topic without a UU playlist',

4885

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

4886

'info_dict': {

4887

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

4888

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

4889

'tags': [],

4890

},

4891

'expected_warnings': [

4892

'the playlist redirect gave error',

4893

],

4894

'playlist_mincount': 9,

4895

}, {

4896

'note': 'Youtube music Album',

4897

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

4898

'info_dict': {

4899

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

4900

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

4905

'modified_date': r're:\d{8}',

4906

},

4907

'playlist_count': 50,

4908

}, {

4909

'note': 'unlisted single video playlist',

4910

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4911

'info_dict': {

4912

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4913

'uploader': 'colethedj',

4914

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4915

'title': 'yt-dlp unlisted playlist test',

4916

'availability': 'unlisted',

4917

'tags': [],

4918

'modified_date': '20211208',

4919

'channel': 'colethedj',

4920

'view_count': int,

4921

'description': '',

4922

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

4923

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4924

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

4929

'url': 'https://www.youtube.com/feed/recommended',

4930

'info_dict': {

4931

'id': 'recommended',

4932

'title': 'recommended',

4933

'tags': [],

4934

},

4935

'playlist_mincount': 50,

4936

'params': {

4937

'skip_download': True,

4938

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4939

},

4940

}, {

4941

'note': 'API Fallback: /videos tab, sorted by oldest first',

4942

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

4943

'info_dict': {

4944

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4945

'title': 'Cody\'sLab - Videos',

4946

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

4947

'uploader': 'Cody\'sLab',

4948

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4949

'channel': 'Cody\'sLab',

4950

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4951

'tags': [],

4952

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4953

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4954

'channel_follower_count': int

4955

},

4956

'playlist_mincount': 650,

4957

'params': {

4958

'skip_download': True,

4959

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4960

},

4961

}, {

4962

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

4963

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4964

'info_dict': {

4965

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4966

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4967

'title': 'Uploads from Royalty Free Music - Topic',

4968

'uploader': 'Royalty Free Music - Topic',

4969

'modified_date': r're:\d{8}',

4970

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4971

'description': '',

4972

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4973

'tags': [],

4974

'channel': 'Royalty Free Music - Topic',

4975

'view_count': int,

4976

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4977

},

4978

'expected_warnings': [

4979

'does not have a videos tab',

4980

r'[Uu]navailable videos (are|will be) hidden',

4981

],

4982

'playlist_mincount': 101,

4983

'params': {

4984

'skip_download': True,

4985

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4986

},

4987

}, {

4988

'note': 'non-standard redirect to regional channel',

4989

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

4990

'only_matching': True

}]

@classmethod

def suitable(cls, url):

4995

return False if YoutubeIE.suitable(url) else super(

4996

YoutubeTabIE, cls).suitable(url)

4997

4998

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

4999

5000

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5001

def _real_extract(self, url, smuggled_data):

5002

item_id = self._match_id(url)

5003

url = compat_urlparse.urlunparse(

5004

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5005

compat_opts = self.get_param('compat_opts', [])

5006

5007

def get_mobj(url):

5008

mobj = self._URL_RE.match(url).groupdict()

5009

mobj.update((k, '') for k, v in mobj.items() if v is None)

5010

return mobj

5011

5012

mobj, redirect_warning = get_mobj(url), None

5013

# Youtube returns incomplete data if tabname is not lower case

5014

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5015

if is_channel:

5016

if smuggled_data.get('is_music_url'):

5017

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5018

item_id = item_id[2:]

5019

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5020

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5021

mdata = self._extract_tab_endpoint(

5022

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5023

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5024

get_all=False, expected_type=compat_str)

5025

if not murl:

5026

raise ExtractorError('Failed to resolve album to playlist')

5027

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5028

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5029

pre = f'https://www.youtube.com/channel/{item_id}'

5030

5031

original_tab_name = tab

5032

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5033

# Home URLs should redirect to /videos/

5034

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5035

'To download only the videos in the home page, add a "/featured" to the URL')

5036

tab = '/videos'

5037

5038

url = ''.join((pre, tab, post))

5039

mobj = get_mobj(url)

5040

5041

# Handle both video/playlist URLs

5042

qs = parse_qs(url)

5043

video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]

5044

5045

if not video_id and mobj['not_channel'].startswith('watch'):

5046

if not playlist_id:

5047

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5048

raise ExtractorError('Unable to recognize tab page')

5049

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5050

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5051

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5052

mobj = get_mobj(url)

5053

5054

if video_id and playlist_id:

5055

if self.get_param('noplaylist'):

5056

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5057

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5058

ie=YoutubeIE.ie_key(), video_id=video_id)

5059

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5060

5061

data, ytcfg = self._extract_data(url, item_id)

5062

5063

# YouTube may provide a non-standard redirect to the regional channel

5064

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5065

redirect_url = traverse_obj(

5066

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5067

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5068

redirect_url = ''.join((

5069

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5070

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5071

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5072

5073

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5074

if tabs:

5075

selected_tab = self._extract_selected_tab(tabs)

5076

selected_tab_name = selected_tab.get('title', '').lower()

5077

if selected_tab_name == 'home':

5078

selected_tab_name = 'featured'

5079

requested_tab_name = mobj['tab'][1:]

5080

if 'no-youtube-channel-redirect' not in compat_opts:

5081

if requested_tab_name == 'live':

5082

# Live tab should have redirected to the video

5083

raise ExtractorError('The channel is not currently live', expected=True)

5084

if requested_tab_name not in ('', selected_tab_name):

5085

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5086

if not original_tab_name:

5087

if item_id[:2] == 'UC':

5088

# Topic channels don't have /videos. Use the equivalent playlist instead

5089

pl_id = f'UU{item_id[2:]}'

5090

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5091

try:

5092

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5093

except ExtractorError:

5094

redirect_warning += ' and the playlist redirect gave error'

5095

else:

5096

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5097

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5098

if selected_tab_name and selected_tab_name != requested_tab_name:

5099

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5100

else:

5101

raise ExtractorError(redirect_warning, expected=True)

5102

5103

if redirect_warning:

5104

self.to_screen(redirect_warning)

5105

self.write_debug(f'Final URL: {url}')

5106

5107

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5108

if 'no-youtube-unavailable-videos' not in compat_opts:

5109

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5110

self._extract_and_report_alerts(data, only_once=True)

5111

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5112

if tabs:

5113

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5114

5115

playlist = traverse_obj(

5116

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5117

if playlist:

5118

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5119

5120

video_id = traverse_obj(

5121

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5122

if video_id:

5123

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5124

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5125

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5126

ie=YoutubeIE.ie_key(), video_id=video_id)

5127

5128

raise ExtractorError('Unable to recognize tab page')

5129

5130

5131

class YoutubePlaylistIE(InfoExtractor):

5132

IE_DESC = 'YouTube playlists'

5133

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5144

)''' % {

5145

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5146

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5147

}

5148

IE_NAME = 'youtube:playlist'

5149

_TESTS = [{

5150

'note': 'issue #673',

5151

'url': 'PLBB231211A4F62143',

5152

'info_dict': {

5153

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5154

'id': 'PLBB231211A4F62143',

5155

'uploader': 'Wickman',

5156

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5157

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5158

'view_count': int,

5159

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5160

'modified_date': r're:\d{8}',

5161

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5162

'channel': 'Wickman',

5163

'tags': [],

5164

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5165

},

5166

'playlist_mincount': 29,

5167

}, {

5168

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5169

'info_dict': {

5170

'title': 'YDL_safe_search',

5171

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5172

},

5173

'playlist_count': 2,

5174

'skip': 'This playlist is private',

5175

}, {

5176

'note': 'embedded',

5177

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5182

'uploader': 'milan',

5183

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5184

'description': '',

5185

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5186

'tags': [],

5187

'modified_date': '20140919',

5188

'view_count': int,

5189

'channel': 'milan',

5190

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5191

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5192

},

5193

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5194

}, {

5195

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5196

'playlist_mincount': 654,

5197

'info_dict': {

5198

'title': '2018 Chinese New Singles (11/6 updated)',

5199

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5200

'uploader': 'LBK',

5201

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5202

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5203

'channel': 'LBK',

5204

'view_count': int,

5205

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5206

'tags': [],

5207

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5208

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5209

'modified_date': r're:\d{8}',

5210

},

5211

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5212

}, {

5213

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5214

'only_matching': True,

5215

}, {

5216

# music album playlist

5217

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5218

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5223

if YoutubeTabIE.suitable(url):

5224

return False

5225

from ..utils import parse_qs

5226

qs = parse_qs(url)

5227

if qs.get('v', [None])[0]:

5228

return False

5229

return super(YoutubePlaylistIE, cls).suitable(url)

5230

5231

def _real_extract(self, url):

5232

playlist_id = self._match_id(url)

5233

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5234

url = update_url_query(

5235

'https://www.youtube.com/playlist',

5236

parse_qs(url) or {'list': playlist_id})

5237

if is_music_url:

5238

url = smuggle_url(url, {'is_music_url': True})

5239

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5240

5241

5242

class YoutubeYtBeIE(InfoExtractor):

5243

IE_DESC = 'youtu.be'

5244

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5245

_TESTS = [{

5246

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5251

'uploader': 'Backus-Page House Museum',

5252

'uploader_id': 'backuspagemuseum',

5253

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5254

'upload_date': '20161008',

5255

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5256

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5261

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5262

'channel': 'Backus-Page House Museum',

5263

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5264

'live_status': 'not_live',

5265

'view_count': int,

5266

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5267

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5273

},

5274

}, {

5275

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5276

'only_matching': True,

5277

}]

5278

5279

def _real_extract(self, url):

5280

mobj = self._match_valid_url(url)

5281

video_id = mobj.group('id')

5282

playlist_id = mobj.group('playlist_id')

5283

return self.url_result(

5284

update_url_query('https://www.youtube.com/watch', {

5285

'v': video_id,

5286

'list': playlist_id,

5287

'feature': 'youtu.be',

5288

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5289

5290

5291

class YoutubeLivestreamEmbedIE(InfoExtractor):

5292

IE_DESC = 'YouTube livestream embeds'

5293

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5294

_TESTS = [{

5295

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5296

'only_matching': True,

5297

}]

5298

5299

def _real_extract(self, url):

5300

channel_id = self._match_id(url)

5301

return self.url_result(

5302

f'https://www.youtube.com/channel/{channel_id}/live',

5303

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5304

5305

5306

class YoutubeYtUserIE(InfoExtractor):

5307

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5308

IE_NAME = 'youtube:user'

5309

_VALID_URL = r'ytuser:(?P<id>.+)'

5310

_TESTS = [{

5311

'url': 'ytuser:phihag',

5312

'only_matching': True,

5313

}]

5314

5315

def _real_extract(self, url):

5316

user_id = self._match_id(url)

5317

return self.url_result(

5318

'https://www.youtube.com/user/%s/videos' % user_id,

5319

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5320

5321

5322

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5323

IE_NAME = 'youtube:favorites'

5324

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5325

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5326

_LOGIN_REQUIRED = True

5327

_TESTS = [{

5328

'url': ':ytfav',

5329

'only_matching': True,

5330

}, {

5331

'url': ':ytfavorites',

5332

'only_matching': True,

5333

}]

5334

5335

def _real_extract(self, url):

5336

return self.url_result(

5337

'https://www.youtube.com/playlist?list=LL',

5338

ie=YoutubeTabIE.ie_key())

5339

5340

5341

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5342

IE_DESC = 'YouTube search'

5343

IE_NAME = 'youtube:search'

5344

_SEARCH_KEY = 'ytsearch'

5345

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5346

_TESTS = [{

5347

'url': 'ytsearch5:youtube-dl test video',

5348

'playlist_count': 5,

5349

'info_dict': {

5350

'id': 'youtube-dl test video',

5351

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5357

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5358

_SEARCH_KEY = 'ytsearchdate'

5359

IE_DESC = 'YouTube search, newest videos first'

5360

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5361

_TESTS = [{

5362

'url': 'ytsearchdate5:youtube-dl test video',

5363

'playlist_count': 5,

5364

'info_dict': {

5365

'id': 'youtube-dl test video',

5366

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5372

IE_DESC = 'YouTube search URLs with sorting and filter support'

5373

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5374

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5375

_TESTS = [{

5376

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5377

'playlist_mincount': 5,

5378

'info_dict': {

5379

'id': 'youtube-dl test video',

5380

'title': 'youtube-dl test video',

5381

}

5382

}, {

5383

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5384

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5392

'only_matching': True,

5393

}]

5394

5395

def _real_extract(self, url):

5396

qs = parse_qs(url)

5397

query = (qs.get('search_query') or qs.get('q'))[0]

5398

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5399

5400

5401

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5402

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5403

IE_NAME = 'youtube:music:search_url'

5404

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5405

_TESTS = [{

5406

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5407

'playlist_count': 16,

5408

'info_dict': {

5409

'id': 'royalty free music',

5410

'title': 'royalty free music',

5411

}

5412

}, {

5413

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5414

'playlist_mincount': 30,

5415

'info_dict': {

5416

'id': 'royalty free music - songs',

5417

'title': 'royalty free music - songs',

5418

},

5419

'params': {'extract_flat': 'in_playlist'}

5420

}, {

5421

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5422

'playlist_mincount': 30,

5423

'info_dict': {

5424

'id': 'royalty free music - community playlists',

5425

'title': 'royalty free music - community playlists',

5426

},

5427

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5432

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5433

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5434

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5435

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5436

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5437

}

5438

5439

def _real_extract(self, url):

5440

qs = parse_qs(url)

5441

query = (qs.get('search_query') or qs.get('q'))[0]

5442

params = qs.get('sp', (None,))[0]

5443

if params:

5444

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5445

else:

5446

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5447

params = self._SECTIONS.get(section)

5448

if not params:

5449

section = None

5450

title = join_nonempty(query, section, delim=' - ')

5451

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5452

5453

5454

class YoutubeFeedsInfoExtractor(InfoExtractor):

5455

"""

5456

Base class for feed extractors

5457

Subclasses must define the _FEED_NAME property.

5458

"""

5459

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5465

5466

def _real_extract(self, url):

5467

return self.url_result(

5468

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5469

5470

5471

class YoutubeWatchLaterIE(InfoExtractor):

5472

IE_NAME = 'youtube:watchlater'

5473

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5474

_VALID_URL = r':ytwatchlater'

5475

_TESTS = [{

5476

'url': ':ytwatchlater',

5477

'only_matching': True,

5478

}]

5479

5480

def _real_extract(self, url):

5481

return self.url_result(

5482

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5483

5484

5485

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5486

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5487

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5488

_FEED_NAME = 'recommended'

5489

_LOGIN_REQUIRED = False

5490

_TESTS = [{

5491

'url': ':ytrec',

5492

'only_matching': True,

5493

}, {

5494

'url': ':ytrecommended',

5495

'only_matching': True,

5496

}, {

5497

'url': 'https://youtube.com',

5498

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5503

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5504

_VALID_URL = r':ytsub(?:scription)?s?'

5505

_FEED_NAME = 'subscriptions'

5506

_TESTS = [{

5507

'url': ':ytsubs',

5508

'only_matching': True,

5509

}, {

5510

'url': ':ytsubscriptions',

5511

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5516

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5517

_VALID_URL = r':ythis(?:tory)?'

5518

_FEED_NAME = 'history'

5519

_TESTS = [{

5520

'url': ':ythistory',

5521

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

5526

IE_NAME = 'youtube:truncated_url'

5527

IE_DESC = False # Do not list

5528

_VALID_URL = r'''(?x)

5529

(?:https?://)?

5530

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5531

(?:watch\?(?:

5532

feature=[a-z_]+|

5533

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5546

'only_matching': True,

5547

}, {

5548

'url': 'https://www.youtube.com/watch?',

5549

'only_matching': True,

5550

}, {

5551

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5552

'only_matching': True,

5553

}, {

5554

'url': 'https://www.youtube.com/watch?feature=foo',

5555

'only_matching': True,

5556

}, {

5557

'url': 'https://www.youtube.com/watch?hl=en-GB',

5558

'only_matching': True,

5559

}, {

5560

'url': 'https://www.youtube.com/watch?t=2372',

5561

'only_matching': True,

5562

}]

5563

5564

def _real_extract(self, url):

5565

raise ExtractorError(

5566

'Did you forget to quote the URL? Remember that & is a meta '

5567

'character in most shells, so you want to put the URL in quotes, '

5568

'like youtube-dl '

5569

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5570

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5575

IE_NAME = 'youtube:clip'

5576

IE_DESC = False # Do not list

5577

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5578

5579

def _real_extract(self, url):

5580

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5581

return self.url_result(url, 'Generic')

5582

5583

5584

class YoutubeTruncatedIDIE(InfoExtractor):

5585

IE_NAME = 'youtube:truncated_id'

5586

IE_DESC = False # Do not list

5587

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5588

5589

_TESTS = [{

5590

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5591

'only_matching': True,

5592

}]

5593

5594

def _real_extract(self, url):

5595

video_id = self._match_id(url)

5596

raise ExtractorError(

5597

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

5598

expected=True)