jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import calendar
	6	import copy
	7	import datetime
	8	import functools
	9	import hashlib
	10	import itertools
	11	import json
	12	import math
	13	import os.path
	14	import random
	15	import re
	16	import sys
	17	import time
	18	import traceback
	19	import threading
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from ..compat import (
	23	compat_chr,
	24	compat_HTTPError,
	25	compat_parse_qs,
	26	compat_str,
	27	compat_urllib_parse_unquote_plus,
	28	compat_urllib_parse_urlencode,
	29	compat_urllib_parse_urlparse,
	30	compat_urlparse,
	31	)
	32	from ..jsinterp import JSInterpreter
	33	from ..utils import (
	34	bug_reports_message,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	ExtractorError,
	40	float_or_none,
	41	format_field,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	js_to_json,
	46	mimetype2ext,
	47	network_exceptions,
	48	NO_DEFAULT,
	49	orderedSet,
	50	parse_codecs,
	51	parse_count,
	52	parse_duration,
	53	parse_iso8601,
	54	parse_qs,
	55	qualities,
	56	remove_end,
	57	remove_start,
	58	smuggle_url,
	59	str_or_none,
	60	str_to_int,
	61	strftime_or_none,
	62	traverse_obj,
	63	try_get,
	64	unescapeHTML,
	65	unified_strdate,
	66	unified_timestamp,
	67	unsmuggle_url,
	68	update_url_query,
	69	url_or_none,
	70	urljoin,
	71	variadic,
	72	)
	73
	74
	75	def get_first(obj, keys, **kwargs):
	76	return traverse_obj(obj, (..., variadic(keys)), *kwargs, get_all=False)
	77
	78
	79	# any clients starting with _ cannot be explicity requested by the user
	80	INNERTUBE_CLIENTS = {
	81	'web': {
	82	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	83	'INNERTUBE_CONTEXT': {
	84	'client': {
	85	'clientName': 'WEB',
	86	'clientVersion': '2.20211221.00.00',
	87	}
	88	},
	89	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	90	},
	91	'web_embedded': {
	92	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	93	'INNERTUBE_CONTEXT': {
	94	'client': {
	95	'clientName': 'WEB_EMBEDDED_PLAYER',
	96	'clientVersion': '1.20211215.00.01',
	97	},
	98	},
	99	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	100	},
	101	'web_music': {
	102	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	103	'INNERTUBE_HOST': 'music.youtube.com',
	104	'INNERTUBE_CONTEXT': {
	105	'client': {
	106	'clientName': 'WEB_REMIX',
	107	'clientVersion': '1.20211213.00.00',
	108	}
	109	},
	110	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	111	},
	112	'web_creator': {
	113	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	114	'INNERTUBE_CONTEXT': {
	115	'client': {
	116	'clientName': 'WEB_CREATOR',
	117	'clientVersion': '1.20211220.02.00',
	118	}
	119	},
	120	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	121	},
	122	'android': {
	123	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	124	'INNERTUBE_CONTEXT': {
	125	'client': {
	126	'clientName': 'ANDROID',
	127	'clientVersion': '16.49',
	128	}
	129	},
	130	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	131	'REQUIRE_JS_PLAYER': False
	132	},
	133	'android_embedded': {
	134	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	135	'INNERTUBE_CONTEXT': {
	136	'client': {
	137	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	138	'clientVersion': '16.49',
	139	},
	140	},
	141	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	142	'REQUIRE_JS_PLAYER': False
	143	},
	144	'android_music': {
	145	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	146	'INNERTUBE_CONTEXT': {
	147	'client': {
	148	'clientName': 'ANDROID_MUSIC',
	149	'clientVersion': '4.57',
	150	}
	151	},
	152	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	153	'REQUIRE_JS_PLAYER': False
	154	},
	155	'android_creator': {
	156	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	157	'INNERTUBE_CONTEXT': {
	158	'client': {
	159	'clientName': 'ANDROID_CREATOR',
	160	'clientVersion': '21.47',
	161	},
	162	},
	163	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	164	'REQUIRE_JS_PLAYER': False
	165	},
	166	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	167	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	168	'ios': {
	169	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	170	'INNERTUBE_CONTEXT': {
	171	'client': {
	172	'clientName': 'IOS',
	173	'clientVersion': '16.46',
	174	'deviceModel': 'iPhone14,3',
	175	}
	176	},
	177	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	178	'REQUIRE_JS_PLAYER': False
	179	},
	180	'ios_embedded': {
	181	'INNERTUBE_CONTEXT': {
	182	'client': {
	183	'clientName': 'IOS_MESSAGES_EXTENSION',
	184	'clientVersion': '16.46',
	185	'deviceModel': 'iPhone14,3',
	186	},
	187	},
	188	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	189	'REQUIRE_JS_PLAYER': False
	190	},
	191	'ios_music': {
	192	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	193	'INNERTUBE_CONTEXT': {
	194	'client': {
	195	'clientName': 'IOS_MUSIC',
	196	'clientVersion': '4.57',
	197	},
	198	},
	199	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	200	'REQUIRE_JS_PLAYER': False
	201	},
	202	'ios_creator': {
	203	'INNERTUBE_CONTEXT': {
	204	'client': {
	205	'clientName': 'IOS_CREATOR',
	206	'clientVersion': '21.47',
	207	},
	208	},
	209	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	210	'REQUIRE_JS_PLAYER': False
	211	},
	212	# mweb has 'ultralow' formats
	213	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	214	'mweb': {
	215	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	216	'INNERTUBE_CONTEXT': {
	217	'client': {
	218	'clientName': 'MWEB',
	219	'clientVersion': '2.20211221.01.00',
	220	}
	221	},
	222	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	223	}
	224	}
	225
	226
	227	def build_innertube_clients():
	228	THIRD_PARTY = {
	229	'embedUrl': 'https://google.com', # Can be any valid URL
	230	}
	231	BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')
	232	priority = qualities(BASE_CLIENTS[::-1])
	233
	234	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	235	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	236	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	237	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	238	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	239
	240	base_client, *variant = client.split('_')
	241	ytcfg['priority'] = 10 * priority(base_client)
	242
	243	if variant == ['embedded']:
	244	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	245	INNERTUBE_CLIENTS[f'{base_client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
	246	agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	247	agegate_ytcfg['priority'] -= 1
	248	ytcfg['priority'] -= 2
	249	elif variant:
	250	ytcfg['priority'] -= 3
	251
	252
	253	build_innertube_clients()
	254
	255
	256	class YoutubeBaseInfoExtractor(InfoExtractor):
	257	"""Provide base functions for Youtube extractors"""
	258
	259	_RESERVED_NAMES = (
	260	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	261	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	262	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	263	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	264
	265	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	266
	267	_NETRC_MACHINE = 'youtube'
	268
	269	# If True it will raise an error if no login info is provided
	270	_LOGIN_REQUIRED = False
	271
	272	_INVIDIOUS_SITES = (
	273	# invidious-redirect websites
	274	r'(?:www\.)?redirect\.invidious\.io',
	275	r'(?:(?:www\|dev)\.)?invidio\.us',
	276	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
	277	r'(?:www\.)?invidious\.pussthecat\.org',
	278	r'(?:www\.)?invidious\.zee\.li',
	279	r'(?:www\.)?invidious\.ethibox\.fr',
	280	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	281	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	282	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	283	# youtube-dl invidious instances list
	284	r'(?:(?:www\|no)\.)?invidiou\.sh',
	285	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	286	r'(?:www\.)?invidious\.kabi\.tk',
	287	r'(?:www\.)?invidious\.mastodon\.host',
	288	r'(?:www\.)?invidious\.zapashcanon\.fr',
	289	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	290	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	291	r'(?:www\.)?invidious\.himiko\.cloud',
	292	r'(?:www\.)?invidious\.reallyancient\.tech',
	293	r'(?:www\.)?invidious\.tube',
	294	r'(?:www\.)?invidiou\.site',
	295	r'(?:www\.)?invidious\.site',
	296	r'(?:www\.)?invidious\.xyz',
	297	r'(?:www\.)?invidious\.nixnet\.xyz',
	298	r'(?:www\.)?invidious\.048596\.xyz',
	299	r'(?:www\.)?invidious\.drycat\.fr',
	300	r'(?:www\.)?inv\.skyn3t\.in',
	301	r'(?:www\.)?tube\.poal\.co',
	302	r'(?:www\.)?tube\.connect\.cafe',
	303	r'(?:www\.)?vid\.wxzm\.sx',
	304	r'(?:www\.)?vid\.mint\.lgbt',
	305	r'(?:www\.)?vid\.puffyan\.us',
	306	r'(?:www\.)?yewtu\.be',
	307	r'(?:www\.)?yt\.elukerio\.org',
	308	r'(?:www\.)?yt\.lelux\.fi',
	309	r'(?:www\.)?invidious\.ggc-project\.de',
	310	r'(?:www\.)?yt\.maisputain\.ovh',
	311	r'(?:www\.)?ytprivate\.com',
	312	r'(?:www\.)?invidious\.13ad\.de',
	313	r'(?:www\.)?invidious\.toot\.koeln',
	314	r'(?:www\.)?invidious\.fdn\.fr',
	315	r'(?:www\.)?watch\.nettohikari\.com',
	316	r'(?:www\.)?invidious\.namazso\.eu',
	317	r'(?:www\.)?invidious\.silkky\.cloud',
	318	r'(?:www\.)?invidious\.exonip\.de',
	319	r'(?:www\.)?invidious\.riverside\.rocks',
	320	r'(?:www\.)?invidious\.blamefran\.net',
	321	r'(?:www\.)?invidious\.moomoo\.de',
	322	r'(?:www\.)?ytb\.trom\.tf',
	323	r'(?:www\.)?yt\.cyberhost\.uk',
	324	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	325	r'(?:www\.)?qklhadlycap4cnod\.onion',
	326	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	327	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	328	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	329	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	330	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	331	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	332	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	333	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	334	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	335	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	336	)
	337
	338	def _login(self):
	339	"""
	340	Attempt to log in to YouTube.
	341	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	342	"""
	343
	344	if (self._LOGIN_REQUIRED
	345	and self.get_param('cookiefile') is None
	346	and self.get_param('cookiesfrombrowser') is None):
	347	self.raise_login_required(
	348	'Login details are needed to download this content', method='cookies')
	349	username, password = self._get_login_info()
	350	if username:
	351	self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
	352
	353	def _initialize_consent(self):
	354	cookies = self._get_cookies('https://www.youtube.com/')
	355	if cookies.get('__Secure-3PSID'):
	356	return
	357	consent_id = None
	358	consent = cookies.get('CONSENT')
	359	if consent:
	360	if 'YES' in consent.value:
	361	return
	362	consent_id = self._search_regex(
	363	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	364	if not consent_id:
	365	consent_id = random.randint(100, 999)
	366	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	367
	368	def _initialize_pref(self):
	369	cookies = self._get_cookies('https://www.youtube.com/')
	370	pref_cookie = cookies.get('PREF')
	371	pref = {}
	372	if pref_cookie:
	373	try:
	374	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	375	except ValueError:
	376	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	377	pref.update({'hl': 'en', 'tz': 'UTC'})
	378	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	379
	380	def _real_initialize(self):
	381	self._initialize_pref()
	382	self._initialize_consent()
	383	self._login()
	384
	385	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	386	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	387	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	388
	389	def _get_default_ytcfg(self, client='web'):
	390	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	391
	392	def _get_innertube_host(self, client='web'):
	393	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	394
	395	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	396	# try_get but with fallback to default ytcfg client values when present
	397	_func = lambda y: try_get(y, getter, expected_type)
	398	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	399
	400	def _extract_client_name(self, ytcfg, default_client='web'):
	401	return self._ytcfg_get_safe(
	402	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	403	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	404
	405	def _extract_client_version(self, ytcfg, default_client='web'):
	406	return self._ytcfg_get_safe(
	407	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	408	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	409
	410	def _extract_api_key(self, ytcfg=None, default_client='web'):
	411	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	412
	413	def _extract_context(self, ytcfg=None, default_client='web'):
	414	context = get_first(
	415	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	416	# Enforce language and tz for extraction
	417	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	418	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	419	return context
	420
	421	_SAPISID = None
	422
	423	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	424	time_now = round(time.time())
	425	if self._SAPISID is None:
	426	yt_cookies = self._get_cookies('https://www.youtube.com')
	427	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	428	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	429	sapisid_cookie = dict_get(
	430	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	431	if sapisid_cookie and sapisid_cookie.value:
	432	self._SAPISID = sapisid_cookie.value
	433	self.write_debug('Extracted SAPISID cookie')
	434	# SAPISID cookie is required if not already present
	435	if not yt_cookies.get('SAPISID'):
	436	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	437	self._set_cookie(
	438	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	439	else:
	440	self._SAPISID = False
	441	if not self._SAPISID:
	442	return None
	443	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	444	sapisidhash = hashlib.sha1(
	445	f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
	446	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	447
	448	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	449	note='Downloading API JSON', errnote='Unable to download API page',
	450	context=None, api_key=None, api_hostname=None, default_client='web'):
	451
	452	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	453	data.update(query)
	454	real_headers = self.generate_api_headers(default_client=default_client)
	455	real_headers.update({'content-type': 'application/json'})
	456	if headers:
	457	real_headers.update(headers)
	458	return self._download_json(
	459	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	460	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	461	data=json.dumps(data).encode('utf8'), headers=real_headers,
	462	query={'key': api_key or self._extract_api_key()})
	463
	464	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	465	data = self._search_regex(
	466	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	467	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	468	if data:
	469	return self._parse_json(data, item_id, fatal=fatal)
	470
	471	@staticmethod
	472	def _extract_session_index(*data):
	473	"""
	474	Index of current account in account list.
	475	See: https://github.com/yt-dlp/yt-dlp/pull/519
	476	"""
	477	for ytcfg in data:
	478	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	479	if session_index is not None:
	480	return session_index
	481
	482	# Deprecated?
	483	def _extract_identity_token(self, ytcfg=None, webpage=None):
	484	if ytcfg:
	485	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	486	if token:
	487	return token
	488	if webpage:
	489	return self._search_regex(
	490	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	491	'identity token', default=None, fatal=False)
	492
	493	@staticmethod
	494	def _extract_account_syncid(*args):
	495	"""
	496	Extract syncId required to download private playlists of secondary channels
	497	@params response and/or ytcfg
	498	"""
	499	for data in args:
	500	# ytcfg includes channel_syncid if on secondary channel

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import time

import traceback

import threading

from .common import InfoExtractor, SearchInfoExtractor

22

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

28

compat_urllib_parse_urlencode,

29

compat_urllib_parse_urlparse,

30

compat_urlparse,

31

)

32

from ..jsinterp import JSInterpreter

33

from ..utils import (

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

format_field,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

NO_DEFAULT,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

def get_first(obj, keys, **kwargs):

76

return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)

77

78

79

# any clients starting with _ cannot be explicity requested by the user

80

INNERTUBE_CLIENTS = {

81

'web': {

82

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

83

'INNERTUBE_CONTEXT': {

84

'client': {

85

'clientName': 'WEB',

86

'clientVersion': '2.20211221.00.00',

87

}

88

},

89

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

90

},

91

'web_embedded': {

92

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

93

'INNERTUBE_CONTEXT': {

94

'client': {

95

'clientName': 'WEB_EMBEDDED_PLAYER',

96

'clientVersion': '1.20211215.00.01',

97

},

98

},

99

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

100

},

101

'web_music': {

102

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

103

'INNERTUBE_HOST': 'music.youtube.com',

104

'INNERTUBE_CONTEXT': {

105

'client': {

106

'clientName': 'WEB_REMIX',

107

'clientVersion': '1.20211213.00.00',

108

}

109

},

110

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

111

},

112

'web_creator': {

113

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

114

'INNERTUBE_CONTEXT': {

115

'client': {

116

'clientName': 'WEB_CREATOR',

117

'clientVersion': '1.20211220.02.00',

118

}

119

},

120

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

121

},

122

'android': {

123

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

124

'INNERTUBE_CONTEXT': {

125

'client': {

126

'clientName': 'ANDROID',

127

'clientVersion': '16.49',

128

}

129

},

130

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

131

'REQUIRE_JS_PLAYER': False

132

},

133

'android_embedded': {

134

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

135

'INNERTUBE_CONTEXT': {

136

'client': {

137

'clientName': 'ANDROID_EMBEDDED_PLAYER',

138

'clientVersion': '16.49',

139

},

140

},

141

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

142

'REQUIRE_JS_PLAYER': False

143

},

144

'android_music': {

145

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

146

'INNERTUBE_CONTEXT': {

147

'client': {

148

'clientName': 'ANDROID_MUSIC',

149

'clientVersion': '4.57',

150

}

151

},

152

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

153

'REQUIRE_JS_PLAYER': False

154

},

155

'android_creator': {

156

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

157

'INNERTUBE_CONTEXT': {

158

'client': {

159

'clientName': 'ANDROID_CREATOR',

160

'clientVersion': '21.47',

161

},

162

},

163

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

164

'REQUIRE_JS_PLAYER': False

165

},

166

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

167

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

168

'ios': {

169

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

170

'INNERTUBE_CONTEXT': {

171

'client': {

172

'clientName': 'IOS',

173

'clientVersion': '16.46',

174

'deviceModel': 'iPhone14,3',

175

}

176

},

177

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

178

'REQUIRE_JS_PLAYER': False

179

},

180

'ios_embedded': {

181

'INNERTUBE_CONTEXT': {

182

'client': {

183

'clientName': 'IOS_MESSAGES_EXTENSION',

184

'clientVersion': '16.46',

185

'deviceModel': 'iPhone14,3',

186

},

187

},

188

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

189

'REQUIRE_JS_PLAYER': False

190

},

191

'ios_music': {

192

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

193

'INNERTUBE_CONTEXT': {

194

'client': {

195

'clientName': 'IOS_MUSIC',

196

'clientVersion': '4.57',

197

},

198

},

199

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

200

'REQUIRE_JS_PLAYER': False

201

},

202

'ios_creator': {

203

'INNERTUBE_CONTEXT': {

204

'client': {

205

'clientName': 'IOS_CREATOR',

206

'clientVersion': '21.47',

207

},

208

},

209

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

210

'REQUIRE_JS_PLAYER': False

211

},

212

# mweb has 'ultralow' formats

213

# See: https://github.com/yt-dlp/yt-dlp/pull/557

214

'mweb': {

215

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

216

'INNERTUBE_CONTEXT': {

217

'client': {

218

'clientName': 'MWEB',

219

'clientVersion': '2.20211221.01.00',

220

}

221

},

222

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

}

}

def build_innertube_clients():

228

THIRD_PARTY = {

229

'embedUrl': 'https://google.com', # Can be any valid URL

230

}

231

BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')

232

priority = qualities(BASE_CLIENTS[::-1])

233

234

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

235

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

236

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

237

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

238

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

239

240

base_client, *variant = client.split('_')

241

ytcfg['priority'] = 10 * priority(base_client)

242

243

if variant == ['embedded']:

244

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

245

INNERTUBE_CLIENTS[f'{base_client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)

246

agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

247

agegate_ytcfg['priority'] -= 1

248

ytcfg['priority'] -= 2

249

elif variant:

250

ytcfg['priority'] -= 3

251

252

253

build_innertube_clients()

254

255

256

class YoutubeBaseInfoExtractor(InfoExtractor):

257

"""Provide base functions for Youtube extractors"""

258

259

_RESERVED_NAMES = (

260

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

266

267

_NETRC_MACHINE = 'youtube'

268

269

# If True it will raise an error if no login info is provided

270

_LOGIN_REQUIRED = False

271

272

_INVIDIOUS_SITES = (

273

# invidious-redirect websites

274

r'(?:www\.)?redirect\.invidious\.io',

275

r'(?:(?:www|dev)\.)?invidio\.us',

276

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

277

r'(?:www\.)?invidious\.pussthecat\.org',

278

r'(?:www\.)?invidious\.zee\.li',

279

r'(?:www\.)?invidious\.ethibox\.fr',

280

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

281

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

282

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

283

# youtube-dl invidious instances list

284

r'(?:(?:www|no)\.)?invidiou\.sh',

285

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

286

r'(?:www\.)?invidious\.kabi\.tk',

287

r'(?:www\.)?invidious\.mastodon\.host',

288

r'(?:www\.)?invidious\.zapashcanon\.fr',

289

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

290

r'(?:www\.)?invidious\.tinfoil-hat\.net',

291

r'(?:www\.)?invidious\.himiko\.cloud',

292

r'(?:www\.)?invidious\.reallyancient\.tech',

293

r'(?:www\.)?invidious\.tube',

294

r'(?:www\.)?invidiou\.site',

295

r'(?:www\.)?invidious\.site',

296

r'(?:www\.)?invidious\.xyz',

297

r'(?:www\.)?invidious\.nixnet\.xyz',

298

r'(?:www\.)?invidious\.048596\.xyz',

299

r'(?:www\.)?invidious\.drycat\.fr',

300

r'(?:www\.)?inv\.skyn3t\.in',

301

r'(?:www\.)?tube\.poal\.co',

302

r'(?:www\.)?tube\.connect\.cafe',

303

r'(?:www\.)?vid\.wxzm\.sx',

304

r'(?:www\.)?vid\.mint\.lgbt',

305

r'(?:www\.)?vid\.puffyan\.us',

306

r'(?:www\.)?yewtu\.be',

307

r'(?:www\.)?yt\.elukerio\.org',

308

r'(?:www\.)?yt\.lelux\.fi',

309

r'(?:www\.)?invidious\.ggc-project\.de',

310

r'(?:www\.)?yt\.maisputain\.ovh',

311

r'(?:www\.)?ytprivate\.com',

312

r'(?:www\.)?invidious\.13ad\.de',

313

r'(?:www\.)?invidious\.toot\.koeln',

314

r'(?:www\.)?invidious\.fdn\.fr',

315

r'(?:www\.)?watch\.nettohikari\.com',

316

r'(?:www\.)?invidious\.namazso\.eu',

317

r'(?:www\.)?invidious\.silkky\.cloud',

318

r'(?:www\.)?invidious\.exonip\.de',

319

r'(?:www\.)?invidious\.riverside\.rocks',

320

r'(?:www\.)?invidious\.blamefran\.net',

321

r'(?:www\.)?invidious\.moomoo\.de',

322

r'(?:www\.)?ytb\.trom\.tf',

323

r'(?:www\.)?yt\.cyberhost\.uk',

324

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

325

r'(?:www\.)?qklhadlycap4cnod\.onion',

326

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

327

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

328

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

329

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

330

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

331

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

332

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

333

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

334

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

335

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

)

def _login(self):

"""

Attempt to log in to YouTube.

341

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

342

"""

343

344

if (self._LOGIN_REQUIRED

345

and self.get_param('cookiefile') is None

346

and self.get_param('cookiesfrombrowser') is None):

347

self.raise_login_required(

348

'Login details are needed to download this content', method='cookies')

349

username, password = self._get_login_info()

350

if username:

351

self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')

352

353

def _initialize_consent(self):

354

cookies = self._get_cookies('https://www.youtube.com/')

355

if cookies.get('__Secure-3PSID'):

356

return

357

consent_id = None

358

consent = cookies.get('CONSENT')

359

if consent:

360

if 'YES' in consent.value:

361

return

362

consent_id = self._search_regex(

363

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

364

if not consent_id:

365

consent_id = random.randint(100, 999)

366

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

367

368

def _initialize_pref(self):

369

cookies = self._get_cookies('https://www.youtube.com/')

370

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

375

except ValueError:

376

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

377

pref.update({'hl': 'en', 'tz': 'UTC'})

378

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

379

380

def _real_initialize(self):

381

self._initialize_pref()

382

self._initialize_consent()

383

self._login()

384

385

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

386

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

387

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

388

389

def _get_default_ytcfg(self, client='web'):

390

return copy.deepcopy(INNERTUBE_CLIENTS[client])

391

392

def _get_innertube_host(self, client='web'):

393

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

394

395

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

396

# try_get but with fallback to default ytcfg client values when present

397

_func = lambda y: try_get(y, getter, expected_type)

398

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

399

400

def _extract_client_name(self, ytcfg, default_client='web'):

401

return self._ytcfg_get_safe(

402

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

403

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

404

405

def _extract_client_version(self, ytcfg, default_client='web'):

406

return self._ytcfg_get_safe(

407

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

408

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

409

410

def _extract_api_key(self, ytcfg=None, default_client='web'):

411

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

412

413

def _extract_context(self, ytcfg=None, default_client='web'):

414

context = get_first(

415

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

416

# Enforce language and tz for extraction

417

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

418

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

424

time_now = round(time.time())

425

if self._SAPISID is None:

426

yt_cookies = self._get_cookies('https://www.youtube.com')

427

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

428

# See: https://github.com/yt-dlp/yt-dlp/issues/393

429

sapisid_cookie = dict_get(

430

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

431

if sapisid_cookie and sapisid_cookie.value:

432

self._SAPISID = sapisid_cookie.value

433

self.write_debug('Extracted SAPISID cookie')

434

# SAPISID cookie is required if not already present

435

if not yt_cookies.get('SAPISID'):

436

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

437

self._set_cookie(

438

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

439

else:

440

self._SAPISID = False

441

if not self._SAPISID:

442

return None

443

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

444

sapisidhash = hashlib.sha1(

445

f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()

446

return f'SAPISIDHASH {time_now}_{sapisidhash}'

447

448

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

449

note='Downloading API JSON', errnote='Unable to download API page',

450

context=None, api_key=None, api_hostname=None, default_client='web'):

451

452

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

453

data.update(query)

454

real_headers = self.generate_api_headers(default_client=default_client)

455

real_headers.update({'content-type': 'application/json'})

456

if headers:

457

real_headers.update(headers)

458

return self._download_json(

459

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

460

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

461

data=json.dumps(data).encode('utf8'), headers=real_headers,

462

query={'key': api_key or self._extract_api_key()})

463

464

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

465

data = self._search_regex(

466

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

467

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

468

if data:

469

return self._parse_json(data, item_id, fatal=fatal)

470

471

@staticmethod

472

def _extract_session_index(*data):

473

"""

474

Index of current account in account list.

475

See: https://github.com/yt-dlp/yt-dlp/pull/519

476

"""

477

for ytcfg in data:

478

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

479

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

484

if ytcfg:

485

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

490

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

491

'identity token', default=None, fatal=False)

492

493

@staticmethod

494

def _extract_account_syncid(*args):

495

"""

496

Extract syncId required to download private playlists of secondary channels

497

@params response and/or ytcfg

498

"""

499

for data in args:

500

# ytcfg includes channel_syncid if on secondary channel

501

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

506

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

507

if len(sync_ids) >= 2 and sync_ids[1]:

508

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

509

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

514

"""

515

Extracts visitorData from an API response or ytcfg

516

Appears to be used to track session state

517

"""

518

return get_first(

519

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

expected_type=str)

@property

def is_authenticated(self):

524

return bool(self._generate_sapisidhash_header())

525

526

def extract_ytcfg(self, video_id, webpage):

527

if not webpage:

528

return {}

529

return self._parse_json(

530

self._search_regex(

531

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

532

default='{}'), video_id, fatal=False) or {}

533

534

def generate_api_headers(

535

self, *, ytcfg=None, account_syncid=None, session_index=None,

536

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

537

538

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

539

headers = {

540

'X-YouTube-Client-Name': compat_str(

541

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

542

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

543

'Origin': origin,

544

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

545

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

546

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

547

}

548

if session_index is None:

549

session_index = self._extract_session_index(ytcfg)

550

if account_syncid or session_index is not None:

551

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

552

553

auth = self._generate_sapisidhash_header(origin)

554

if auth is not None:

555

headers['Authorization'] = auth

556

headers['X-Origin'] = origin

557

return {h: v for h, v in headers.items() if v is not None}

558

559

@staticmethod

560

def _build_api_continuation_query(continuation, ctp=None):

561

query = {

562

'continuation': continuation

563

}

564

# TODO: Inconsistency with clickTrackingParams.

565

# Currently we have a fixed ctp contained within context (from ytcfg)

566

# and a ctp in root query for continuation.

567

if ctp:

568

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

573

next_continuation = try_get(

574

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

575

lambda x: x['continuation']['reloadContinuationData']), dict)

576

if not next_continuation:

577

return

578

continuation = next_continuation.get('continuation')

579

if not continuation:

580

return

581

ctp = next_continuation.get('clickTrackingParams')

582

return cls._build_api_continuation_query(continuation, ctp)

583

584

@classmethod

585

def _extract_continuation_ep_data(cls, continuation_ep: dict):

586

if isinstance(continuation_ep, dict):

587

continuation = try_get(

588

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

589

if not continuation:

590

return

591

ctp = continuation_ep.get('clickTrackingParams')

592

return cls._build_api_continuation_query(continuation, ctp)

593

594

@classmethod

595

def _extract_continuation(cls, renderer):

596

next_continuation = cls._extract_next_continuation_data(renderer)

597

if next_continuation:

598

return next_continuation

599

600

contents = []

601

for key in ('contents', 'items'):

602

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

603

604

for content in contents:

605

if not isinstance(content, dict):

606

continue

607

continuation_ep = try_get(

608

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

609

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

610

dict)

611

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

617

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

618

if not isinstance(alert_dict, dict):

619

continue

620

for alert in alert_dict.values():

621

alert_type = alert.get('type')

622

if not alert_type:

623

continue

624

message = cls._get_text(alert, 'text')

625

if message:

626

yield alert_type, message

627

628

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

629

errors = []

630

warnings = []

631

for alert_type, alert_message in alerts:

632

if alert_type.lower() == 'error' and fatal:

633

errors.append([alert_type, alert_message])

634

else:

635

warnings.append([alert_type, alert_message])

636

637

for alert_type, alert_message in (warnings + errors[:-1]):

638

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)

639

if errors:

640

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

641

642

def _extract_and_report_alerts(self, data, *args, **kwargs):

643

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

644

645

def _extract_badges(self, renderer: dict):

646

badges = set()

647

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

648

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

649

if label:

650

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

655

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

660

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

661

obj = [obj]

662

for item in obj:

663

text = try_get(item, lambda x: x['simpleText'], compat_str)

664

if text:

665

return text

666

runs = try_get(item, lambda x: x['runs'], list) or []

667

if not runs and isinstance(item, list):

668

runs = item

669

670

runs = runs[:min(len(runs), max_runs or len(runs))]

671

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

676

count_text = self._get_text(data, *path_list) or ''

677

count = parse_count(count_text)

678

if count is None:

679

count = str_to_int(

680

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

685

"""

686

Extract thumbnails from thumbnails dict

687

@param path_list: path list to level that contains 'thumbnails' key

688

"""

689

thumbnails = []

690

for path in path_list or [()]:

691

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

692

thumbnail_url = url_or_none(thumbnail.get('url'))

693

if not thumbnail_url:

694

continue

695

# Sometimes youtube gives a wrong thumbnail URL. See:

696

# https://github.com/yt-dlp/yt-dlp/issues/233

697

# https://github.com/ytdl-org/youtube-dl/issues/28023

698

if 'maxresdefault' in thumbnail_url:

699

thumbnail_url = thumbnail_url.split('?')[0]

700

thumbnails.append({

701

'url': thumbnail_url,

702

'height': int_or_none(thumbnail.get('height')),

703

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

709

"""

710

Extracts a relative time from string and converts to dt object

711

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

716

if start:

717

return datetime_from_str(start)

718

try:

719

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

724

text = self._get_text(renderer, *path_list) or ''

725

dt = self.extract_relative_time(text)

726

timestamp = None

727

if isinstance(dt, datetime.datetime):

728

timestamp = calendar.timegm(dt.timetuple())

729

730

if timestamp is None:

731

timestamp = (

732

unified_timestamp(text) or unified_timestamp(

733

self._search_regex(

734

(r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'),

735

text.lower(), 'time text', default=None)))

736

737

if text and timestamp is None:

738

self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)

739

return timestamp, text

740

741

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

742

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

743

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

748

if check_get_keys is None:

749

check_get_keys = []

750

while count < retries:

751

count += 1

752

if last_error:

753

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

754

try:

755

response = self._call_api(

756

ep=ep, fatal=True, headers=headers,

757

video_id=item_id, query=query,

758

context=self._extract_context(ytcfg, default_client),

759

api_key=self._extract_api_key(ytcfg, default_client),

760

api_hostname=api_hostname, default_client=default_client,

761

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

762

except ExtractorError as e:

763

if isinstance(e.cause, network_exceptions):

764

if isinstance(e.cause, compat_HTTPError):

765

first_bytes = e.cause.read(512)

766

if not is_html(first_bytes):

767

yt_error = try_get(

768

self._parse_json(

769

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

770

lambda x: x['error']['message'], compat_str)

771

if yt_error:

772

self._report_alerts([('ERROR', yt_error)], fatal=False)

773

# Downloading page may result in intermittent 5xx HTTP error

774

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

775

# We also want to catch all other network exceptions since errors in later pages can be troublesome

776

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

777

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

778

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

790

except ExtractorError as e:

791

# YouTube servers may return errors we want to retry on in a 200 OK response

792

# See: https://github.com/yt-dlp/yt-dlp/issues/839

793

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

799

return

800

if not check_get_keys or dict_get(response, check_get_keys):

801

break

802

# Youtube sometimes sends incomplete data

803

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

804

last_error = 'Incomplete data received'

805

if count >= retries:

806

if fatal:

807

raise ExtractorError(last_error)

808

else:

809

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

815

return re.match(r'https?://music\.youtube\.com/', url) is not None

816

817

def _extract_video(self, renderer):

818

video_id = renderer.get('videoId')

819

title = self._get_text(renderer, 'title')

820

description = self._get_text(renderer, 'descriptionSnippet')

821

duration = parse_duration(self._get_text(

822

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

823

view_count = self._get_count(renderer, 'viewCountText')

824

825

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

826

channel_id = traverse_obj(

827

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)

828

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

829

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

830

overlay_style = traverse_obj(

831

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)

832

badges = self._extract_badges(renderer)

833

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

838

'id': video_id,

839

'url': f'https://www.youtube.com/watch?v={video_id}',

840

'title': title,

841

'description': description,

842

'duration': duration,

843

'view_count': view_count,

844

'uploader': uploader,

845

'channel_id': channel_id,

846

'thumbnails': thumbnails,

847

'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None,

848

'live_status': ('is_upcoming' if scheduled_timestamp is not None

849

else 'was_live' if 'streamed' in time_text.lower()

850

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

851

else None),

852

'release_timestamp': scheduled_timestamp,

853

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

858

IE_DESC = 'YouTube'

859

_VALID_URL = r"""(?x)^

860

(

861

(?:https?://|//) # http(s):// or protocol-independent URL

862

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

863

(?:www\.)?deturl\.com/www\.youtube\.com|

864

(?:www\.)?pwnyoutube\.com|

865

(?:www\.)?hooktube\.com|

866

(?:www\.)?yourepeat\.com|

867

tube\.majestyc\.net|

868

%(invidious)s|

869

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

870

(?:.*?\#/)? # handle anchor (#/) redirect urls

871

(?: # the various things that can precede the ID:

872

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

873

|(?: # or the v= param in all its forms

874

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

875

(?:\?|\#!?) # the params delimiter ? or # or #!

876

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

882

vid\.plus| # or vid.plus/xxxx

883

zwearz\.com/watch| # or zwearz.com/watch/xxxx

884

%(invidious)s

885

)/

886

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

887

)

888

)? # all until now is optional -> you can pass the naked ID

889

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

890

(?(1).+)? # if we found the ID, everything can follow

891

(?:\#|$)""" % {

892

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

893

}

894

_PLAYER_INFO_RE = (

895

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

896

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

897

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

898

)

899

_formats = {

900

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

901

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

902

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

903

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

904

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

905

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

906

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

907

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

908

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

909

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

910

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

911

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

912

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

913

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

914

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

915

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

916

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

917

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

922

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

923

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

924

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

925

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

926

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

927

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

928

929

# Apple HTTP Live Streaming

930

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

931

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

932

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

933

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

934

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

935

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

936

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

937

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

938

939

# DASH mp4 video

940

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

941

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

942

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

943

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

944

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

945

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

946

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

947

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

948

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

949

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

950

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

951

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

952

953

# Dash mp4 audio

954

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

955

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

956

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

957

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

958

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

959

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

960

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

961

962

# Dash webm

963

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

964

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

965

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

966

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

967

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

968

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

969

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

970

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

971

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

972

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

973

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

974

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

975

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

976

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

977

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

978

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

979

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

980

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

981

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

982

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

983

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

984

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

985

986

# Dash webm audio

987

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

988

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

989

990

# Dash webm audio with opus inside

991

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

992

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

993

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

994

995

# RTMP (unnamed)

996

'_rtmp': {'protocol': 'rtmp'},

997

998

# av01 video only formats sometimes served with "unknown" codecs

999

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1000

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1001

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1002

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1003

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1004

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1005

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1006

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1007

}

1008

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1020

'uploader': 'Philipp Hagemeister',

1021

'uploader_id': 'phihag',

1022

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1023

'channel': 'Philipp Hagemeister',

1024

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1025

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1026

'upload_date': '20121002',

1027

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1028

'categories': ['Science & Technology'],

1029

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1034

'playable_in_embed': True,

1035

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1036

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1045

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1050

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1051

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1052

'uploader': 'SET India',

1053

'uploader_id': 'setindia',

1054

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1055

'age_limit': 18,

1056

},

1057

'skip': 'Private video',

1058

},

1059

{

1060

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1061

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1066

'uploader': 'Philipp Hagemeister',

1067

'uploader_id': 'phihag',

1068

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1069

'channel': 'Philipp Hagemeister',

1070

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1071

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1072

'upload_date': '20121002',

1073

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1074

'categories': ['Science & Technology'],

1075

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1080

'playable_in_embed': True,

1081

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1082

'live_status': 'not_live',

1083

'age_limit': 0,

1084

'channel_follower_count': int

1085

},

1086

'params': {

1087

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1092

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1097

'uploader_id': '8KVIDEO',

1098

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1099

'description': '',

1100

'uploader': '8KVIDEO',

1101

'title': 'UHDTV TEST 8K VIDEO.mp4'

1102

},

1103

'params': {

1104

'youtube_include_dash_manifest': True,

1105

'format': '141',

1106

},

1107

'skip': 'format 141 not served anymore',

1108

},

1109

# DASH manifest with encrypted signature

1110

{

1111

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1116

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1117

'duration': 244,

1118

'uploader': 'AfrojackVEVO',

1119

'uploader_id': 'AfrojackVEVO',

1120

'upload_date': '20131011',

1121

'abr': 129.495,

1122

'like_count': int,

1123

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1124

'playable_in_embed': True,

1125

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1126

'view_count': int,

1127

'track': 'The Spark',

1128

'live_status': 'not_live',

1129

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1130

'channel': 'Afrojack',

1131

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1132

'tags': 'count:19',

1133

'availability': 'public',

1134

'categories': ['Music'],

1135

'age_limit': 0,

1136

'alt_title': 'The Spark',

1137

'channel_follower_count': int

1138

},

1139

'params': {

1140

'youtube_include_dash_manifest': True,

1141

'format': '141/bestaudio[ext=m4a]',

1142

},

1143

},

1144

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1145

{

1146

'note': 'Embed allowed age-gate video',

1147

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1152

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1153

'duration': 142,

1154

'uploader': 'The Witcher',

1155

'uploader_id': 'WitcherGame',

1156

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1157

'upload_date': '20140605',

1158

'age_limit': 18,

1159

'categories': ['Gaming'],

1160

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1161

'availability': 'needs_auth',

1162

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1163

'like_count': int,

1164

'channel': 'The Witcher',

1165

'live_status': 'not_live',

1166

'tags': 'count:17',

1167

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1168

'playable_in_embed': True,

1169

'view_count': int,

1170

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1175

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1180

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1181

'upload_date': '20200408',

1182

'uploader_id': 'FlyingKitty900',

1183

'uploader': 'FlyingKitty',

1184

'age_limit': 18,

1185

'availability': 'needs_auth',

1186

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1187

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1188

'channel': 'FlyingKitty',

1189

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1190

'view_count': int,

1191

'categories': ['Entertainment'],

1192

'live_status': 'not_live',

1193

'tags': ['Flyingkitty', 'godzilla 2'],

1194

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1195

'like_count': int,

1196

'duration': 177,

1197

'playable_in_embed': True,

1198

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1203

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1204

'info_dict': {

1205

'id': 'Tq92D6wQ1mg',

1206

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1207

'ext': 'mp4',

1208

'upload_date': '20191227',

1209

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1210

'uploader': 'Projekt Melody',

1211

'description': 'md5:17eccca93a786d51bc67646756894066',

1212

'age_limit': 18,

1213

'like_count': int,

1214

'availability': 'needs_auth',

1215

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1216

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1217

'view_count': int,

1218

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1219

'channel': 'Projekt Melody',

1220

'live_status': 'not_live',

1221

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1222

'playable_in_embed': True,

1223

'categories': ['Entertainment'],

1224

'duration': 106,

1225

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1226

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1231

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1236

'uploader': 'Herr Lurik',

1237

'uploader_id': 'st3in234',

1238

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1239

'upload_date': '20130730',

1240

'track': 'Such mich find mich',

1241

'age_limit': 0,

1242

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1243

'like_count': int,

1244

'playable_in_embed': False,

1245

'creator': 'OOMPH!',

1246

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1247

'view_count': int,

1248

'alt_title': 'Such mich find mich',

1249

'duration': 210,

1250

'channel': 'Herr Lurik',

1251

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1252

'categories': ['Music'],

1253

'availability': 'public',

1254

'uploader_url': 'http://www.youtube.com/user/st3in234',

1255

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1256

'live_status': 'not_live',

1257

'artist': 'OOMPH!',

1258

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1263

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1264

'only_matching': True,

1265

},

1266

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1267

# YouTube Red ad is not captured for creator

1268

{

1269

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1275

'uploader_id': 'deadmau5',

1276

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1277

'creator': 'deadmau5',

1278

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1279

'uploader': 'deadmau5',

1280

'title': 'Deadmau5 - Some Chords (HD)',

1281

'alt_title': 'Some Chords',

1282

'availability': 'public',

1283

'tags': 'count:14',

1284

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1285

'view_count': int,

1286

'live_status': 'not_live',

1287

'channel': 'deadmau5',

1288

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1289

'like_count': int,

1290

'track': 'Some Chords',

1291

'artist': 'deadmau5',

1292

'playable_in_embed': True,

1293

'age_limit': 0,

1294

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1295

'categories': ['Music'],

1296

'album': 'Some Chords',

1297

'channel_follower_count': int

1298

},

1299

'expected_warnings': [

1300

'DASH manifest missing',

1301

]

1302

},

1303

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1304

{

1305

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1311

'uploader_id': 'olympic',

1312

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1313

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1314

'uploader': 'Olympics',

1315

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1316

'like_count': int,

1317

'release_timestamp': 1343767800,

1318

'playable_in_embed': True,

1319

'categories': ['Sports'],

1320

'release_date': '20120731',

1321

'channel': 'Olympics',

1322

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1323

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1324

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1325

'age_limit': 0,

1326

'availability': 'public',

1327

'live_status': 'was_live',

1328

'view_count': int,

1329

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1330

'channel_follower_count': int

1331

},

1332

'params': {

1333

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1343

'duration': 85,

1344

'upload_date': '20110310',

1345

'uploader_id': 'AllenMeow',

1346

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1347

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1348

'uploader': '孫ᄋᄅ',

1349

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1350

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1355

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1356

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1357

'view_count': int,

1358

'categories': ['People & Blogs'],

1359

'like_count': int,

1360

'live_status': 'not_live',

1361

'availability': 'unlisted',

1362

'channel_follower_count': int

1363

},

1364

},

1365

# url_encoded_fmt_stream_map is empty string

1366

{

1367

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1372

'description': '',

1373

'upload_date': '20150404',

1374

'uploader_id': 'spbelect',

1375

'uploader': 'Наблюдатели Петербурга',

1376

},

1377

'params': {

1378

'skip_download': 'requires avconv',

1379

},

1380

'skip': 'This live event has ended.',

1381

},

1382

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1383

{

1384

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1389

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1390

'duration': 220,

1391

'upload_date': '20150625',

1392

'uploader_id': 'dorappi2000',

1393

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1394

'uploader': 'dorappi2000',

1395

'formats': 'mincount:31',

1396

},

1397

'skip': 'not actual anymore',

1398

},

1399

# DASH manifest with segment_list

1400

{

1401

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1402

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1407

'uploader': 'Airtek',

1408

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1409

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1410

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1411

},

1412

'params': {

1413

'youtube_include_dash_manifest': True,

1414

'format': '135', # bestvideo

1415

},

1416

'skip': 'This live event has ended.',

1417

},

1418

{

1419

# Multifeed videos (multiple cameras), URL is for Main Camera

1420

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1421

'info_dict': {

1422

'id': 'jvGDaLqkpTg',

1423

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1424

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1431

'description': 'md5:e03b909557865076822aa169218d6a5d',

1432

'duration': 10643,

1433

'upload_date': '20161111',

1434

'uploader': 'Team PGP',

1435

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1436

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1443

'description': 'md5:e03b909557865076822aa169218d6a5d',

1444

'duration': 10991,

1445

'upload_date': '20161111',

1446

'uploader': 'Team PGP',

1447

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1448

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1455

'description': 'md5:e03b909557865076822aa169218d6a5d',

1456

'duration': 10995,

1457

'upload_date': '20161111',

1458

'uploader': 'Team PGP',

1459

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1460

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1467

'description': 'md5:e03b909557865076822aa169218d6a5d',

1468

'duration': 10990,

1469

'upload_date': '20161111',

1470

'uploader': 'Team PGP',

1471

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1472

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1477

},

1478

'skip': 'Not multifeed anymore',

1479

},

1480

{

1481

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1482

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1483

'info_dict': {

1484

'id': 'gVfLd0zydlo',

1485

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1486

},

1487

'playlist_count': 2,

1488

'skip': 'Not multifeed anymore',

1489

},

1490

{

1491

'url': 'https://vid.plus/FlRa-iH7PGw',

1492

'only_matching': True,

1493

},

1494

{

1495

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1496

'only_matching': True,

1497

},

1498

{

1499

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1500

# Also tests cut-off URL expansion in video description (see

1501

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1502

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1503

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1508

'alt_title': 'Dark Walk',

1509

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1510

'duration': 133,

1511

'upload_date': '20151119',

1512

'uploader_id': 'IronSoulElf',

1513

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1514

'uploader': 'IronSoulElf',

1515

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1516

'track': 'Dark Walk',

1517

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1518

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1519

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1520

'categories': ['Film & Animation'],

1521

'view_count': int,

1522

'live_status': 'not_live',

1523

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1524

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1525

'tags': 'count:13',

1526

'availability': 'public',

1527

'channel': 'IronSoulElf',

1528

'playable_in_embed': True,

1529

'like_count': int,

1530

'age_limit': 0,

1531

'channel_follower_count': int

1532

},

1533

'params': {

1534

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1539

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1540

'only_matching': True,

1541

},

1542

{

1543

# Video with yt:stretch=17:0

1544

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1549

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1550

'upload_date': '20151107',

1551

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1552

'uploader': 'CH GAMER DROID',

1553

},

1554

'params': {

1555

'skip_download': True,

1556

},

1557

'skip': 'This video does not exist.',

1558

},

1559

{

1560

# Video with incomplete 'yt:stretch=16:'

1561

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1562

'only_matching': True,

1563

},

1564

{

1565

# Video licensed under Creative Commons

1566

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1571

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1572

'duration': 721,

1573

'upload_date': '20150127',

1574

'uploader_id': 'BerkmanCenter',

1575

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1576

'uploader': 'The Berkman Klein Center for Internet & Society',

1577

'license': 'Creative Commons Attribution license (reuse allowed)',

1578

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1579

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1580

'like_count': int,

1581

'age_limit': 0,

1582

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1583

'channel': 'The Berkman Klein Center for Internet & Society',

1584

'availability': 'public',

1585

'view_count': int,

1586

'categories': ['Education'],

1587

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1588

'live_status': 'not_live',

1589

'playable_in_embed': True,

1590

'channel_follower_count': int

1591

},

1592

'params': {

1593

'skip_download': True,

},

},

{

# Channel-like uploader_url

1598

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1603

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1604

'duration': 4060,

1605

'upload_date': '20151119',

1606

'uploader': 'Bernie Sanders',

1607

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1608

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1609

'license': 'Creative Commons Attribution license (reuse allowed)',

1610

'playable_in_embed': True,

1611

'tags': 'count:12',

1612

'like_count': int,

1613

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1614

'age_limit': 0,

1615

'availability': 'public',

1616

'categories': ['News & Politics'],

1617

'channel': 'Bernie Sanders',

1618

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1619

'view_count': int,

1620

'live_status': 'not_live',

1621

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1622

'channel_follower_count': int

1623

},

1624

'params': {

1625

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1630

'only_matching': True,

1631

},

1632

{

1633

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1634

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1635

'only_matching': True,

1636

},

1637

{

1638

# Rental video preview

1639

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1644

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1645

'upload_date': '20150811',

1646

'uploader': 'FlixMatrix',

1647

'uploader_id': 'FlixMatrixKaravan',

1648

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1649

'license': 'Standard YouTube License',

1650

},

1651

'params': {

1652

'skip_download': True,

1653

},

1654

'skip': 'This video is not available.',

1655

},

1656

{

1657

# YouTube Red video with episode data

1658

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1663

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1664

'duration': 2085,

1665

'upload_date': '20170118',

1666

'uploader': 'Vsauce',

1667

'uploader_id': 'Vsauce',

1668

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1669

'series': 'Mind Field',

1670

'season_number': 1,

1671

'episode_number': 1,

1672

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1673

'tags': 'count:12',

1674

'view_count': int,

1675

'availability': 'public',

1676

'age_limit': 0,

1677

'channel': 'Vsauce',

1678

'episode': 'Episode 1',

1679

'categories': ['Entertainment'],

1680

'season': 'Season 1',

1681

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1682

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1683

'like_count': int,

1684

'playable_in_embed': True,

1685

'live_status': 'not_live',

1686

'channel_follower_count': int

1687

},

1688

'params': {

1689

'skip_download': True,

1690

},

1691

'expected_warnings': [

1692

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1697

# as inappropriate or offensive to some audiences.

1698

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1703

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1704

'duration': 965,

1705

'upload_date': '20140124',

1706

'uploader': 'New Century Foundation',

1707

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1708

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1709

},

1710

'params': {

1711

'skip_download': True,

1712

},

1713

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1718

'only_matching': True,

1719

},

1720

{

1721

# geo restricted to JP

1722

'url': 'sJL6WA-aGkQ',

1723

'only_matching': True,

1724

},

1725

{

1726

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1727

'only_matching': True,

1728

},

1729

{

1730

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1731

'only_matching': True,

1732

},

1733

{

1734

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1735

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1736

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1741

'only_matching': True,

1742

},

1743

{

1744

# Video with unsupported adaptive stream type formats

1745

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1750

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1751

'duration': 433,

1752

'upload_date': '20130923',

1753

'uploader': 'Amelia Putri Harwita',

1754

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1755

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1756

'formats': 'maxcount:10',

1757

},

1758

'params': {

1759

'skip_download': True,

1760

'youtube_include_dash_manifest': False,

1761

},

1762

'skip': 'not actual anymore',

1763

},

1764

{

1765

# Youtube Music Auto-generated description

1766

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1771

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1772

'upload_date': '20190312',

1773

'uploader': 'Stephen - Topic',

1774

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1775

'artist': 'Stephen',

1776

'track': 'Voyeur Girl',

1777

'album': 'it\'s too much love to know my dear',

1778

'release_date': '20190313',

1779

'release_year': 2019,

1780

'alt_title': 'Voyeur Girl',

1781

'view_count': int,

1782

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1783

'playable_in_embed': True,

1784

'like_count': int,

1785

'categories': ['Music'],

1786

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1787

'channel': 'Stephen',

1788

'availability': 'public',

1789

'creator': 'Stephen',

1790

'duration': 169,

1791

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1792

'age_limit': 0,

1793

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1794

'tags': 'count:11',

1795

'live_status': 'not_live',

1796

'channel_follower_count': int

1797

},

1798

'params': {

1799

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1804

'only_matching': True,

1805

},

1806

{

1807

# invalid -> valid video id redirection

1808

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1813

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1814

'upload_date': '20090125',

1815

'uploader': 'Prochorowka',

1816

'uploader_id': 'Prochorowka',

1817

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1818

'artist': 'Panjabi MC',

1819

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1820

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1821

},

1822

'params': {

1823

'skip_download': True,

1824

},

1825

'skip': 'Video unavailable',

1826

},

1827

{

1828

# empty description results in an empty string

1829

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1836

'uploader_id': 'ElevageOrVert',

1837

'uploader': 'ElevageOrVert',

1838

'view_count': int,

1839

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1840

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1841

'like_count': int,

1842

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1843

'tags': [],

1844

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1845

'availability': 'public',

1846

'age_limit': 0,

1847

'categories': ['Pets & Animals'],

1848

'duration': 7,

1849

'playable_in_embed': True,

1850

'live_status': 'not_live',

1851

'channel': 'ElevageOrVert',

1852

'channel_follower_count': int

1853

},

1854

'params': {

1855

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1860

# see [2] for an example with '};' inside ytInitialPlayerResponse

1861

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1862

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1863

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1868

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1869

'upload_date': '20130831',

1870

'uploader_id': 'kudvenkat',

1871

'uploader': 'kudvenkat',

1872

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1873

'like_count': int,

1874

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1875

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1876

'live_status': 'not_live',

1877

'categories': ['Education'],

1878

'availability': 'public',

1879

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1880

'tags': 'count:12',

1881

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1886

'channel_follower_count': int

1887

},

1888

'params': {

1889

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1894

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1895

'only_matching': True,

1896

},

1897

{

1898

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1899

'only_matching': True,

1900

},

1901

{

1902

# https://github.com/ytdl-org/youtube-dl/pull/28094

1903

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1909

'upload_date': '20141120',

1910

'uploader': 'The Cinematic Orchestra - Topic',

1911

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1912

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1913

'artist': 'The Cinematic Orchestra',

1914

'track': 'Burn Out',

1915

'album': 'Every Day',

1916

'like_count': int,

1917

'live_status': 'not_live',

1918

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1923

'creator': 'The Cinematic Orchestra',

1924

'channel': 'The Cinematic Orchestra',

1925

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1926

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1927

'availability': 'public',

1928

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1929

'categories': ['Music'],

1930

'playable_in_embed': True,

1931

'channel_follower_count': int

1932

},

1933

'params': {

1934

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1939

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1940

'only_matching': True,

1941

},

1942

{

1943

# controversial video, requires bpctr/contentCheckOk

1944

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1949

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1950

'uploader': 'CBS Mornings',

1951

'uploader_id': 'CBSThisMorning',

1952

'upload_date': '20140716',

1953

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1954

'duration': 170,

1955

'categories': ['News & Politics'],

1956

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1957

'view_count': int,

1958

'channel': 'CBS Mornings',

1959

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1960

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1961

'age_limit': 18,

1962

'availability': 'needs_auth',

1963

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1964

'like_count': int,

1965

'live_status': 'not_live',

1966

'playable_in_embed': True,

1967

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1972

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1977

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1978

'upload_date': '20201120',

1979

'uploader': 'Walk around Japan',

1980

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1981

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1982

'duration': 1456,

1983

'categories': ['Travel & Events'],

1984

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1985

'view_count': int,

1986

'channel': 'Walk around Japan',

1987

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

1988

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

1989

'age_limit': 0,

1990

'availability': 'public',

1991

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1992

'live_status': 'not_live',

1993

'playable_in_embed': True,

1994

'channel_follower_count': int

1995

},

1996

'params': {

1997

'skip_download': True,

1998

},

1999

}, {

2000

# Has multiple audio streams

2001

'url': 'WaOKSUlf4TM',

2002

'only_matching': True

2003

}, {

2004

# Requires Premium: has format 141 when requested using YTM url

2005

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2006

'only_matching': True

2007

}, {

2008

# multiple subtitles with same lang_code

2009

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2010

'only_matching': True,

2011

}, {

2012

# Force use android client fallback

2013

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2014

'info_dict': {

2015

'id': 'YOelRv7fMxY',

2016

'title': 'DIGGING A SECRET TUNNEL Part 1',

2017

'ext': '3gp',

2018

'upload_date': '20210624',

2019

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2020

'uploader': 'colinfurze',

2021

'uploader_id': 'colinfurze',

2022

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2023

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2024

'duration': 596,

2025

'categories': ['Entertainment'],

2026

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2027

'view_count': int,

2028

'channel': 'colinfurze',

2029

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2030

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2031

'age_limit': 0,

2032

'availability': 'public',

2033

'like_count': int,

2034

'live_status': 'not_live',

2035

'playable_in_embed': True,

2036

'channel_follower_count': int

2037

},

2038

'params': {

2039

'format': '17', # 3gp format available on android

2040

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2045

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2046

'only_matching': True,

2047

'params': {

2048

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2053

'only_matching': True,

2054

}, {

2055

'note': 'Storyboards',

2056

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2062

'uploader_id': 'scishow',

2063

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2064

'upload_date': '20140324',

2065

'uploader': 'SciShow',

2066

'like_count': int,

2067

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2068

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2069

'view_count': int,

2070

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2071

'playable_in_embed': True,

2072

'tags': 'count:12',

2073

'uploader_url': 'http://www.youtube.com/user/scishow',

2074

'availability': 'public',

2075

'channel': 'SciShow',

2076

'live_status': 'not_live',

2077

'duration': 248,

2078

'categories': ['Education'],

2079

'age_limit': 0,

2080

'channel_follower_count': int

2081

}, 'params': {'format': 'mhtml', 'skip_download': True}

}

]

@classmethod

def suitable(cls, url):

2087

from ..utils import parse_qs

2088

2089

qs = parse_qs(url)

2090

if qs.get('list', [None])[0]:

2091

return False

2092

return super(YoutubeIE, cls).suitable(url)

2093

2094

def __init__(self, *args, **kwargs):

2095

super(YoutubeIE, self).__init__(*args, **kwargs)

2096

self._code_cache = {}

2097

self._player_cache = {}

2098

2099

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2100

lock = threading.Lock()

2101

2102

is_live = True

2103

start_time = time.time()

2104

formats = [f for f in formats if f.get('is_from_start')]

2105

2106

def refetch_manifest(format_id, delay):

2107

nonlocal formats, start_time, is_live

2108

if time.time() <= start_time + delay:

2109

return

2110

2111

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2112

video_details = traverse_obj(

2113

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2114

microformats = traverse_obj(

2115

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2116

expected_type=dict, default=[])

2117

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2118

start_time = time.time()

2119

2120

def mpd_feed(format_id, delay):

2121

"""

2122

@returns (manifest_url, manifest_stream_number, is_live) or None

2123

"""

2124

with lock:

2125

refetch_manifest(format_id, delay)

2126

2127

f = next((f for f in formats if f['format_id'] == format_id), None)

2128

if not f:

2129

if not is_live:

2130

self.to_screen(f'{video_id}: Video is no longer live')

2131

else:

2132

self.report_warning(

2133

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2134

return None

2135

return f['manifest_url'], f['manifest_stream_number'], is_live

2136

2137

for f in formats:

2138

f['protocol'] = 'http_dash_segments_generator'

2139

f['fragments'] = functools.partial(

2140

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2141

2142

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2143

FETCH_SPAN, MAX_DURATION = 5, 432000

2144

2145

mpd_url, stream_number, is_live = None, None, True

2146

2147

begin_index = 0

2148

download_start_time = ctx.get('start') or time.time()

2149

2150

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2151

if lack_early_segments:

2152

self.report_warning(bug_reports_message(

2153

'Starting download from the last 120 hours of the live stream since '

2154

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2155

lack_early_segments = True

2156

2157

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2158

fragments, fragment_base_url = None, None

2159

2160

def _extract_sequence_from_mpd(refresh_sequence):

2161

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2162

# Obtain from MPD's maximum seq value

2163

old_mpd_url = mpd_url

2164

last_error = ctx.pop('last_error', None)

2165

expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2166

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2167

or (mpd_url, stream_number, False))

2168

if not refresh_sequence:

2169

if expire_fast and not is_live:

2170

return False, last_seq

2171

elif old_mpd_url == mpd_url:

2172

return True, last_seq

2173

try:

2174

fmts, _ = self._extract_mpd_formats_and_subtitles(

2175

mpd_url, None, note=False, errnote=False, fatal=False)

2176

except ExtractorError:

2177

fmts = None

2178

if not fmts:

2179

no_fragment_score += 1

2180

return False, last_seq

2181

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2182

fragments = fmt_info['fragments']

2183

fragment_base_url = fmt_info['fragment_base_url']

2184

assert fragment_base_url

2185

2186

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2187

return True, _last_seq

2188

2189

while is_live:

2190

fetch_time = time.time()

2191

if no_fragment_score > 30:

2192

return

2193

if last_segment_url:

2194

# Obtain from "X-Head-Seqnum" header value from each segment

2195

try:

2196

urlh = self._request_webpage(

2197

last_segment_url, None, note=False, errnote=False, fatal=False)

2198

except ExtractorError:

2199

urlh = None

2200

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2201

if last_seq is None:

2202

no_fragment_score += 1

2203

last_segment_url = None

2204

continue

2205

else:

2206

should_continue, last_seq = _extract_sequence_from_mpd(True)

2207

if not should_continue:

2208

continue

2209

2210

if known_idx > last_seq:

2211

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2217

# skip from the start when it's negative value

2218

known_idx = last_seq + begin_index

2219

if lack_early_segments:

2220

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2221

try:

2222

for idx in range(known_idx, last_seq):

2223

# do not update sequence here or you'll get skipped some part of it

2224

should_continue, _ = _extract_sequence_from_mpd(False)

2225

if not should_continue:

2226

known_idx = idx - 1

2227

raise ExtractorError('breaking out of outer loop')

2228

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2229

yield {

2230

'url': last_segment_url,

2231

}

2232

if known_idx == last_seq:

2233

no_fragment_score += 5

2234

else:

2235

no_fragment_score = 0

2236

known_idx = last_seq

2237

except ExtractorError:

2238

continue

2239

2240

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2241

2242

def _extract_player_url(self, *ytcfgs, webpage=None):

2243

player_url = traverse_obj(

2244

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2245

get_all=False, expected_type=compat_str)

2246

if not player_url:

2247

return

2248

return urljoin('https://www.youtube.com', player_url)

2249

2250

def _download_player_url(self, video_id, fatal=False):

2251

res = self._download_webpage(

2252

'https://www.youtube.com/iframe_api',

2253

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2254

if res:

2255

player_version = self._search_regex(

2256

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2257

if player_version:

2258

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2259

2260

def _signature_cache_id(self, example_sig):

2261

""" Return a string representation of a signature """

2262

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2263

2264

@classmethod

2265

def _extract_player_info(cls, player_url):

2266

for player_re in cls._PLAYER_INFO_RE:

2267

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2272

return id_m.group('id')

2273

2274

def _load_player(self, video_id, player_url, fatal=True):

2275

player_id = self._extract_player_info(player_url)

2276

if player_id not in self._code_cache:

2277

code = self._download_webpage(

2278

player_url, video_id, fatal=fatal,

2279

note='Downloading player ' + player_id,

2280

errnote='Download of %s failed' % player_url)

2281

if code:

2282

self._code_cache[player_id] = code

2283

return self._code_cache.get(player_id)

2284

2285

def _extract_signature_function(self, video_id, player_url, example_sig):

2286

player_id = self._extract_player_info(player_url)

2287

2288

# Read from filesystem cache

2289

func_id = 'js_%s_%s' % (

2290

player_id, self._signature_cache_id(example_sig))

2291

assert os.path.basename(func_id) == func_id

2292

2293

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2294

if cache_spec is not None:

2295

return lambda s: ''.join(s[i] for i in cache_spec)

2296

2297

code = self._load_player(video_id, player_url)

2298

if code:

2299

res = self._parse_sig_js(code)

2300

2301

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2302

cache_res = res(test_string)

2303

cache_spec = [ord(c) for c in cache_res]

2304

2305

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2306

return res

2307

2308

def _print_sig_code(self, func, example_sig):

2309

if not self.get_param('youtube_print_sig_code'):

2310

return

2311

2312

def gen_sig_code(idxs):

2313

def _genslice(start, end, step):

2314

starts = '' if start == 0 else str(start)

2315

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2316

steps = '' if step == 1 else (':%d' % step)

2317

return 's[%s%s%s]' % (starts, ends, steps)

2318

2319

step = None

2320

# Quelch pyflakes warnings - start will be set when step is set

2321

start = '(Never used)'

2322

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2327

step = None

2328

continue

2329

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2339

2340

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2341

cache_res = func(test_string)

2342

cache_spec = [ord(c) for c in cache_res]

2343

expr_code = ' + '.join(gen_sig_code(cache_spec))

2344

signature_id_tuple = '(%s)' % (

2345

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2346

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2347

' return %s\n') % (signature_id_tuple, expr_code)

2348

self.to_screen('Extracted signature function:\n' + code)

2349

2350

def _parse_sig_js(self, jscode):

2351

funcname = self._search_regex(

2352

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2353

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2354

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2355

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2356

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2357

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2358

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2359

# Obsolete patterns

2360

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2361

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2362

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2363

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2364

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2365

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2366

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2367

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2368

jscode, 'Initial JS player signature function name', group='sig')

2369

2370

jsi = JSInterpreter(jscode)

2371

initial_function = jsi.extract_function(funcname)

2372

return lambda s: initial_function([s])

2373

2374

def _decrypt_signature(self, s, video_id, player_url):

2375

"""Turn the encrypted s field into a working signature"""

2376

2377

if player_url is None:

2378

raise ExtractorError('Cannot decrypt signature without player_url')

2379

2380

try:

2381

player_id = (player_url, self._signature_cache_id(s))

2382

if player_id not in self._player_cache:

2383

func = self._extract_signature_function(

2384

video_id, player_url, s

2385

)

2386

self._player_cache[player_id] = func

2387

func = self._player_cache[player_id]

2388

self._print_sig_code(func, s)

2389

return func(s)

2390

except Exception as e:

2391

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2392

2393

def _decrypt_nsig(self, s, video_id, player_url):

2394

"""Turn the encrypted n field into a working signature"""

2395

if player_url is None:

2396

raise ExtractorError('Cannot decrypt nsig without player_url')

2397

player_url = urljoin('https://www.youtube.com', player_url)

2398

2399

sig_id = ('nsig_value', s)

2400

if sig_id in self._player_cache:

2401

return self._player_cache[sig_id]

2402

2403

try:

2404

player_id = ('nsig', player_url)

2405

if player_id not in self._player_cache:

2406

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2407

func = self._player_cache[player_id]

2408

self._player_cache[sig_id] = func(s)

2409

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2410

return self._player_cache[sig_id]

2411

except Exception as e:

2412

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2413

2414

def _extract_n_function_name(self, jscode):

2415

nfunc, idx = self._search_regex(

2416

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2417

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2418

if not idx:

2419

return nfunc

2420

return json.loads(js_to_json(self._search_regex(

2421

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2422

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2423

2424

def _extract_n_function(self, video_id, player_url):

2425

player_id = self._extract_player_info(player_url)

2426

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2427

2428

if func_code:

2429

jsi = JSInterpreter(func_code)

2430

else:

2431

jscode = self._load_player(video_id, player_url)

2432

funcname = self._extract_n_function_name(jscode)

2433

jsi = JSInterpreter(jscode)

2434

func_code = jsi.extract_function_code(funcname)

2435

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2436

2437

if self.get_param('youtube_print_sig_code'):

2438

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2439

2440

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2441

2442

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2443

"""

2444

Extract signatureTimestamp (sts)

2445

Required to tell API what sig/player version is in use.

2446

"""

2447

sts = None

2448

if isinstance(ytcfg, dict):

2449

sts = int_or_none(ytcfg.get('STS'))

2450

2451

if not sts:

2452

# Attempt to extract from player

2453

if player_url is None:

2454

error_msg = 'Cannot extract signature timestamp without player_url.'

2455

if fatal:

2456

raise ExtractorError(error_msg)

2457

self.report_warning(error_msg)

2458

return

2459

code = self._load_player(video_id, player_url, fatal=fatal)

2460

if code:

2461

sts = int_or_none(self._search_regex(

2462

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2463

'JS player signature timestamp', group='sts', fatal=fatal))

2464

return sts

2465

2466

def _mark_watched(self, video_id, player_responses):

2467

playback_url = get_first(

2468

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2469

expected_type=url_or_none)

2470

if not playback_url:

2471

self.report_warning('Unable to mark watched')

2472

return

2473

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2474

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2475

2476

# cpn generation algorithm is reverse engineered from base.js.

2477

# In fact it works even with dummy cpn.

2478

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2479

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2486

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2487

2488

self._download_webpage(

2489

playback_url, video_id, 'Marking watched',

2490

'Unable to mark watched', fatal=False)

2491

2492

@staticmethod

2493

def _extract_urls(webpage):

2494

# Embedded YouTube player

2495

entries = [

2496

unescapeHTML(mobj.group('url'))

2497

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2508

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2509

\1''', webpage)]

2510

2511

# lazyYT YouTube embed

2512

entries.extend(list(map(

2513

unescapeHTML,

2514

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2515

2516

# Wordpress "YouTube Video Importer" plugin

2517

matches = re.findall(r'''(?x)<div[^>]+

2518

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2519

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2520

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2526

urls = YoutubeIE._extract_urls(webpage)

2527

return urls[0] if urls else None

2528

2529

@classmethod

2530

def extract_id(cls, url):

2531

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2532

if mobj is None:

2533

raise ExtractorError('Invalid URL: %s' % url)

2534

return mobj.group('id')

2535

2536

def _extract_chapters_from_json(self, data, duration):

2537

chapter_list = traverse_obj(

2538

data, (

2539

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2540

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2541

), expected_type=list)

2542

2543

return self._extract_chapters(

2544

chapter_list,

2545

chapter_time=lambda chapter: float_or_none(

2546

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2547

chapter_title=lambda chapter: traverse_obj(

2548

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2549

duration=duration)

2550

2551

def _extract_chapters_from_engagement_panel(self, data, duration):

2552

content_list = traverse_obj(

2553

data,

2554

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2555

expected_type=list, default=[])

2556

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2557

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2562

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2563

chapter_time, chapter_title, duration)

2564

for contents in content_list

2565

))), [])

2566

2567

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2568

chapters = []

2569

last_chapter = {'start_time': 0}

2570

for idx, chapter in enumerate(chapter_list or []):

2571

title = chapter_title(chapter)

2572

start_time = chapter_time(chapter)

2573

if start_time is None:

2574

continue

2575

last_chapter['end_time'] = start_time

2576

if start_time < last_chapter['start_time']:

2577

if idx == 1:

2578

chapters.pop()

2579

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2580

else:

2581

self.report_warning(f'Invalid start time for chapter "{title}"')

2582

continue

2583

last_chapter = {'start_time': start_time, 'title': title}

2584

chapters.append(last_chapter)

2585

last_chapter['end_time'] = duration

2586

return chapters

2587

2588

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2589

return self._parse_json(self._search_regex(

2590

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

2591

regex), webpage, name, default='{}'), video_id, fatal=False)

2592

2593

def _extract_comment(self, comment_renderer, parent=None):

2594

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2599

2600

# note: timestamp is an estimate calculated from the current time and time_text

2601

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2602

author = self._get_text(comment_renderer, 'authorText')

2603

author_id = try_get(comment_renderer,

2604

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2605

2606

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2607

lambda x: x['likeCount']), compat_str)) or 0

2608

author_thumbnail = try_get(comment_renderer,

2609

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2610

2611

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2612

is_favorited = 'creatorHeart' in (try_get(

2613

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2618

'time_text': time_text,

2619

'like_count': votes,

2620

'is_favorited': is_favorited,

2621

'author': author,

2622

'author_id': author_id,

2623

'author_thumbnail': author_thumbnail,

2624

'author_is_uploader': author_is_uploader,

2625

'parent': parent or 'root'

2626

}

2627

2628

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2629

2630

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2631

2632

def extract_header(contents):

2633

_continuation = None

2634

for content in contents:

2635

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2636

expected_comment_count = self._get_count(

2637

comments_header_renderer, 'countText', 'commentsCount')

2638

2639

if expected_comment_count:

2640

tracker['est_total'] = expected_comment_count

2641

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2642

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2643

2644

sort_menu_item = try_get(

2645

comments_header_renderer,

2646

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2647

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2648

2649

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2650

if not _continuation:

2651

continue

2652

2653

sort_text = str_or_none(sort_menu_item.get('title'))

2654

if not sort_text:

2655

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2656

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2661

if not parent:

2662

tracker['current_page_thread'] = 0

2663

for content in contents:

2664

if not parent and tracker['total_parent_comments'] >= max_parents:

2665

yield

2666

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2667

comment_renderer = get_first(

2668

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2669

expected_type=dict, default={})

2670

2671

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2676

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2677

yield comment

2678

2679

# Attempt to get the replies

2680

comment_replies_renderer = try_get(

2681

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2682

2683

if comment_replies_renderer:

2684

tracker['current_page_thread'] += 1

2685

comment_entries_iter = self._comment_entries(

2686

comment_replies_renderer, ytcfg, video_id,

2687

parent=comment.get('id'), tracker=tracker)

2688

for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):

2689

yield reply_comment

2690

2691

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2697

total_parent_comments=0,

2698

total_reply_comments=0)

2699

2700

# TODO: Deprecated

2701

# YouTube comments have a max depth of 2

2702

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2703

if max_depth:

2704

self._downloader.deprecation_warning(

2705

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2706

if max_depth == 1 and parent:

2707

return

2708

2709

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2710

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2711

2712

continuation = self._extract_continuation(root_continuation_data)

2713

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2714

if message and not parent:

2715

self.report_warning(message, video_id=video_id)

2716

2717

response = None

2718

is_first_continuation = parent is None

2719

2720

for page_num in itertools.count(0):

2721

if not continuation:

2722

break

2723

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2724

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2725

if page_num == 0:

2726

if is_first_continuation:

2727

note_prefix = 'Downloading comment section API JSON'

2728

else:

2729

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2730

tracker['current_page_thread'], comment_prog_str)

2731

else:

2732

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2733

' ' if parent else '', ' replies' if parent else '',

2734

page_num, comment_prog_str)

2735

2736

response = self._extract_response(

2737

item_id=None, query=continuation,

2738

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2739

check_get_keys='onResponseReceivedEndpoints')

2740

2741

continuation_contents = traverse_obj(

2742

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2743

2744

continuation = None

2745

for continuation_section in continuation_contents:

2746

continuation_items = traverse_obj(

2747

continuation_section,

2748

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2749

get_all=False, expected_type=list) or []

2750

if is_first_continuation:

2751

continuation = extract_header(continuation_items)

2752

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

def _get_comments(self, ytcfg, video_id, contents, webpage):

2766

"""Entry for comment extraction"""

2767

def _real_comment_extract(contents):

2768

renderer = next((

2769

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2770

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2771

yield from self._comment_entries(renderer, ytcfg, video_id)

2772

2773

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2774

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2775

2776

@staticmethod

2777

def _get_checkok_params():

2778

return {'contentCheckOk': True, 'racyCheckOk': True}

2779

2780

@classmethod

2781

def _generate_player_context(cls, sts=None):

2782

context = {

2783

'html5Preference': 'HTML5_PREF_WANTS',

2784

}

2785

if sts is not None:

2786

context['signatureTimestamp'] = sts

2787

return {

2788

'playbackContext': {

2789

'contentPlaybackContext': context

2790

},

2791

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2796

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2797

return True

2798

2799

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2800

AGE_GATE_REASONS = (

2801

'confirm your age', 'age-restricted', 'inappropriate', # reason

2802

'age_verification_required', 'age_check_required', # status

2803

)

2804

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2805

2806

@staticmethod

2807

def _is_unplayable(player_response):

2808

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2809

2810

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2811

2812

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2813

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2814

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2815

headers = self.generate_api_headers(

2816

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

2817

2818

yt_query = {'videoId': video_id}

2819

yt_query.update(self._generate_player_context(sts))

2820

return self._extract_response(

2821

item_id=video_id, ep='player', query=yt_query,

2822

ytcfg=player_ytcfg, headers=headers, fatal=True,

2823

default_client=client,

2824

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2825

) or None

2826

2827

def _get_requested_clients(self, url, smuggled_data):

2828

requested_clients = []

2829

default = ['android', 'web']

2830

allowed_clients = sorted(

2831

[client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],

2832

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

2833

for client in self._configuration_arg('player_client'):

2834

if client in allowed_clients:

2835

requested_clients.append(client)

2836

elif client == 'default':

2837

requested_clients.extend(default)

2838

elif client == 'all':

2839

requested_clients.extend(allowed_clients)

2840

else:

2841

self.report_warning(f'Skipping unsupported client {client}')

2842

if not requested_clients:

2843

requested_clients = default

2844

2845

if smuggled_data.get('is_music_url') or self.is_music_url(url):

2846

requested_clients.extend(

2847

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

2848

2849

return orderedSet(requested_clients)

2850

2851

def _extract_player_ytcfg(self, client, video_id):

2852

url = {

2853

'web_music': 'https://music.youtube.com',

2854

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())

2859

return self.extract_ytcfg(video_id, webpage) or {}

2860

2861

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

2862

initial_pr = None

2863

if webpage:

2864

initial_pr = self._extract_yt_initial_variable(

2865

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2866

video_id, 'initial player response')

2867

2868

original_clients = clients

2869

clients = clients[::-1]

2870

prs = []

2871

2872

def append_client(client_name):

2873

if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:

2874

clients.append(client_name)

2875

2876

# Android player_response does not have microFormats which are needed for

2877

# extraction of some data. So we return the initial_pr with formats

2878

# stripped out even if not requested by the user

2879

# See: https://github.com/yt-dlp/yt-dlp/issues/501

2880

if initial_pr:

2881

pr = dict(initial_pr)

2882

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

2887

player_url = None

2888

while clients:

2889

client = clients.pop()

2890

player_ytcfg = master_ytcfg if client == 'web' else {}

2891

if 'configs' not in self._configuration_arg('player_skip'):

2892

player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg

2893

2894

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

2895

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

2896

if 'js' in self._configuration_arg('player_skip'):

2897

require_js_player = False

2898

player_url = None

2899

2900

if not player_url and not tried_iframe_fallback and require_js_player:

2901

player_url = self._download_player_url(video_id)

2902

tried_iframe_fallback = True

2903

2904

try:

2905

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

2906

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

2907

except ExtractorError as e:

2908

if last_error:

2909

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

2917

if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:

2918

append_client(client.replace('_agegate', '_creator'))

2919

elif self._is_agegated(pr):

2920

append_client(f'{client}_agegate')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

2926

return prs, player_url

2927

2928

def _extract_formats(self, streaming_data, video_id, player_url, is_live):

2929

itags, stream_ids = {}, []

2930

itag_qualities, res_qualities = {}, {}

2931

q = qualities([

2932

# Normally tiny is the smallest video-only formats. But

2933

# audio-only formats with unknown quality may get tagged as tiny

2934

'tiny',

2935

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

2936

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

2937

])

2938

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

2939

approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None

2940

2941

for fmt in streaming_formats:

2942

if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):

2943

continue

2944

2945

itag = str_or_none(fmt.get('itag'))

2946

audio_track = fmt.get('audioTrack') or {}

2947

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

2948

if stream_id in stream_ids:

2949

continue

2950

2951

quality = fmt.get('quality')

2952

height = int_or_none(fmt.get('height'))

2953

if quality == 'tiny' or not quality:

2954

quality = fmt.get('audioQuality', '').lower() or quality

2955

# The 3gp format (17) in android client has a quality of "small",

2956

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

2962

if height:

2963

res_qualities[height] = quality

2964

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

2965

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

2966

# number of fragment that would subsequently requested with (`&sq=N`)

2967

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

2968

continue

2969

2970

fmt_url = fmt.get('url')

2971

if not fmt_url:

2972

sc = compat_parse_qs(fmt.get('signatureCipher'))

2973

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

2974

encrypted_sig = try_get(sc, lambda x: x['s'][0])

2975

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

2980

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

2981

fmt_url += '&' + sp + '=' + signature

2982

2983

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

2988

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

2989

except ExtractorError as e:

2990

self.report_warning(

2991

f'nsig extraction failed: You may experience throttling for some formats\n'

2992

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

2997

stream_ids.append(stream_id)

2998

2999

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3000

language_preference = (

3001

10 if audio_track.get('audioIsDefault') and 10

3002

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3003

else -1)

3004

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3005

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3006

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)

3007

dct = {

3008

'asr': int_or_none(fmt.get('audioSampleRate')),

3009

'filesize': int_or_none(fmt.get('contentLength')),

3010

'format_id': itag,

3011

'format_note': join_nonempty(

3012

'%s%s' % (audio_track.get('displayName') or '',

3013

' (default)' if language_preference > 0 else ''),

3014

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3015

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3016

'source_preference': -10 if throttled else -1,

3017

'fps': int_or_none(fmt.get('fps')) or None,

3018

'height': height,

3019

'quality': q(quality),

3020

'tbr': tbr,

3021

'url': fmt_url,

3022

'width': int_or_none(fmt.get('width')),

3023

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3024

'desc' if language_preference < -1 else ''),

3025

'language_preference': language_preference,

3026

'preference': -10 if is_damaged else None,

3027

}

3028

mime_mobj = re.match(

3029

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3030

if mime_mobj:

3031

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3032

dct.update(parse_codecs(mime_mobj.group(2)))

3033

no_audio = dct.get('acodec') == 'none'

3034

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3040

dct['downloader_options'] = {

3041

# Youtube throttles chunks >~10M

3042

'http_chunk_size': 10485760,

3043

}

3044

if dct.get('ext'):

3045

dct['container'] = dct['ext'] + '_dash'

3046

yield dct

3047

3048

live_from_start = is_live and self.get_param('live_from_start')

3049

skip_manifests = self._configuration_arg('skip')

3050

if not self.get_param('youtube_include_hls_manifest', True):

3051

skip_manifests.append('hls')

3052

get_dash = 'dash' not in skip_manifests and (

3053

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3054

get_hls = not live_from_start and 'hls' not in skip_manifests

3055

3056

def process_manifest_format(f, proto, itag):

3057

if itag in itags:

3058

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3059

return False

3060

itag = f'{itag}-{proto}'

3061

if itag:

3062

f['format_id'] = itag

3063

itags[itag] = proto

3064

3065

f['quality'] = next((

3066

q(qdict[val])

3067

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3068

if val in qdict), -1)

3069

return True

3070

3071

for sd in streaming_data:

3072

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3073

if hls_manifest_url:

3074

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3075

if process_manifest_format(f, 'hls', self._search_regex(

3076

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3077

yield f

3078

3079

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3080

if dash_manifest_url:

3081

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3082

if process_manifest_format(f, 'dash', f['format_id']):

3083

f['filesize'] = int_or_none(self._search_regex(

3084

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3085

if live_from_start:

3086

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3091

spec = get_first(

3092

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3093

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3098

args = args.split('#')

3099

counts = list(map(int_or_none, args[:5]))

3100

if len(args) != 8 or not all(counts):

3101

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3102

continue

3103

width, height, frame_count, cols, rows = counts

3104

N, sigh = args[6:]

3105

3106

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3107

fragment_count = frame_count / (cols * rows)

3108

fragment_duration = duration / fragment_count

3109

yield {

3110

'format_id': f'sb{i}',

3111

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'path': url.replace('$M', str(j)),

3121

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3122

} for j in range(math.ceil(fragment_count))],

3123

}

3124

3125

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3126

webpage = None

3127

if 'webpage' not in self._configuration_arg('player_skip'):

3128

webpage = self._download_webpage(

3129

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

3130

3131

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3132

3133

player_responses, player_url = self._extract_player_responses(

3134

self._get_requested_clients(url, smuggled_data),

3135

video_id, webpage, master_ytcfg)

3136

3137

return webpage, master_ytcfg, player_responses, player_url

3138

3139

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):

3140

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3141

is_live = get_first(video_details, 'isLive')

3142

if is_live is None:

3143

is_live = get_first(live_broadcast_details, 'isLiveNow')

3144

3145

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3146

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))

3147

3148

return live_broadcast_details, is_live, streaming_data, formats

3149

3150

def _real_extract(self, url):

3151

url, smuggled_data = unsmuggle_url(url, {})

3152

video_id = self._match_id(url)

3153

3154

base_url = self.http_scheme() + '//www.youtube.com/'

3155

webpage_url = base_url + 'watch?v=' + video_id

3156

3157

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3158

3159

playability_statuses = traverse_obj(

3160

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3161

3162

trailer_video_id = get_first(

3163

playability_statuses,

3164

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3165

expected_type=str)

3166

if trailer_video_id:

3167

return self.url_result(

3168

trailer_video_id, self.ie_key(), trailer_video_id)

3169

3170

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3171

if webpage else (lambda x: None))

3172

3173

video_details = traverse_obj(

3174

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3175

microformats = traverse_obj(

3176

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3177

expected_type=dict, default=[])

3178

video_title = (

3179

get_first(video_details, 'title')

3180

or self._get_text(microformats, (..., 'title'))

3181

or search_meta(['og:title', 'twitter:title', 'title']))

3182

video_description = get_first(video_details, 'shortDescription')

3183

3184

multifeed_metadata_list = get_first(

3185

player_responses,

3186

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3187

expected_type=str)

3188

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3189

if self.get_param('noplaylist'):

3190

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3195

# Unquote should take place before split on comma (,) since textual

3196

# fields may contain comma as well (see

3197

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3198

feed_data = compat_parse_qs(

3199

compat_urllib_parse_unquote_plus(feed))

3200

3201

def feed_entry(name):

3202

return try_get(

3203

feed_data, lambda x: x[name][0], compat_str)

3204

3205

feed_id = feed_entry('id')

3206

if not feed_id:

3207

continue

3208

feed_title = feed_entry('title')

3209

title = video_title

3210

if feed_title:

3211

title += ' (%s)' % feed_title

3212

entries.append({

3213

'_type': 'url_transparent',

3214

'ie_key': 'Youtube',

3215

'url': smuggle_url(

3216

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3217

{'force_singlefeed': True}),

3218

'title': title,

3219

})

3220

feed_ids.append(feed_id)

3221

self.to_screen(

3222

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3223

% (', '.join(feed_ids), video_id))

3224

return self.playlist_result(

3225

entries, video_id, video_title, video_description)

3226

3227

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3228

3229

if not formats:

3230

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3231

self.report_drm(video_id)

3232

pemr = get_first(

3233

playability_statuses,

3234

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3235

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3236

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3237

if subreason:

3238

if subreason == 'The uploader has not made this video available in your country.':

3239

countries = get_first(microformats, 'availableCountries')

3240

if not countries:

3241

regions_allowed = search_meta('regionsAllowed')

3242

countries = regions_allowed.split(',') if regions_allowed else None

3243

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3244

reason += f'. {subreason}'

3245

if reason:

3246

self.raise_no_formats(reason, expected=True)

3247

3248

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3249

if not keywords and webpage:

3250

keywords = [

3251

unescapeHTML(m.group('content'))

3252

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3253

for keyword in keywords:

3254

if keyword.startswith('yt:stretch='):

3255

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3256

if mobj:

3257

# NB: float is intentional for forcing float division

3258

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3263

f['stretched_ratio'] = ratio

3264

break

3265

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3266

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3267

if thumbnail_url:

3268

thumbnails.append({

3269

'url': thumbnail_url,

3270

})

3271

original_thumbnails = thumbnails.copy()

3272

3273

# The best resolution thumbnails sometimes does not appear in the webpage

3274

# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340

3275

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3276

thumbnail_names = [

3277

'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',

3278

'hqdefault', 'hq1', 'hq2', 'hq3', '0',

3279

'mqdefault', 'mq1', 'mq2', 'mq3',

3280

'default', '1', '2', '3'

3281

]

3282

n_thumbnail_names = len(thumbnail_names)

3283

thumbnails.extend({

3284

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3285

video_id=video_id, name=name, ext=ext,

3286

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3287

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3288

for thumb in thumbnails:

3289

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3290

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3291

self._remove_duplicate_formats(thumbnails)

3292

self._downloader._sort_thumbnails(original_thumbnails)

3293

3294

category = get_first(microformats, 'category') or search_meta('genre')

3295

channel_id = str_or_none(

3296

get_first(video_details, 'channelId')

3297

or get_first(microformats, 'externalChannelId')

3298

or search_meta('channelId'))

3299

duration = int_or_none(

3300

get_first(video_details, 'lengthSeconds')

3301

or get_first(microformats, 'lengthSeconds')

3302

or parse_duration(search_meta('duration'))) or None

3303

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3304

3305

live_content = get_first(video_details, 'isLiveContent')

3306

is_upcoming = get_first(video_details, 'isUpcoming')

3307

if is_live is None:

3308

if is_upcoming or live_content is False:

3309

is_live = False

3310

if is_upcoming is None and (live_content or is_live):

3311

is_upcoming = False

3312

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3313

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3314

if not duration and live_end_time and live_start_time:

3315

duration = live_end_time - live_start_time

3316

3317

if is_live and self.get_param('live_from_start'):

3318

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3319

3320

formats.extend(self._extract_storyboard(player_responses, duration))

3321

3322

# Source is given priority since formats that throttle are given lower source_preference

3323

# When throttling issue is fully fixed, remove this

3324

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3329

'formats': formats,

3330

'thumbnails': thumbnails,

3331

# The best thumbnail that we are sure exists. Prevents unnecessary

3332

# URL checking if user don't care about getting the best possible thumbnail

3333

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3334

'description': video_description,

3335

'upload_date': unified_strdate(

3336

get_first(microformats, 'uploadDate')

3337

or search_meta('uploadDate')),

3338

'uploader': get_first(video_details, 'author'),

3339

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3340

'uploader_url': owner_profile_url,

3341

'channel_id': channel_id,

3342

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3343

'duration': duration,

3344

'view_count': int_or_none(

3345

get_first((video_details, microformats), (..., 'viewCount'))

3346

or search_meta('interactionCount')),

3347

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3348

'age_limit': 18 if (

3349

get_first(microformats, 'isFamilySafe') is False

3350

or search_meta('isFamilyFriendly') == 'false'

3351

or search_meta('og:restrictions:age') == '18+') else 0,

3352

'webpage_url': webpage_url,

3353

'categories': [category] if category else None,

3354

'tags': keywords,

3355

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3356

'is_live': is_live,

3357

'was_live': (False if is_live or is_upcoming or live_content is False

3358

else None if is_live is None or is_upcoming is None

3359

else live_content),

3360

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3361

'release_timestamp': live_start_time,

3362

}

3363

3364

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3365

if pctr:

3366

def get_lang_code(track):

3367

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3368

or track.get('languageCode'))

3369

3370

# Converted into dicts to remove duplicates

3371

captions = {

3372

get_lang_code(sub): sub

3373

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3374

translation_languages = {

3375

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3376

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3377

3378

def process_language(container, base_url, lang_code, sub_name, query):

3379

lang_subs = container.setdefault(lang_code, [])

3380

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3391

for lang_code, caption_track in captions.items():

3392

base_url = caption_track.get('baseUrl')

3393

if not base_url:

3394

continue

3395

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3396

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3401

if not caption_track.get('isTranslatable'):

3402

continue

3403

for trans_code, trans_name in translation_languages.items():

3404

if not trans_code:

3405

continue

3406

if caption_track.get('kind') != 'asr':

3407

trans_code += f'-{lang_code}'

3408

trans_name += format_field(lang_name, template=' from %s')

3409

process_language(

3410

automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})

3411

if lang_code == f'a-{trans_code}':

3412

process_language(

3413

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {'tlang': trans_code})

3414

info['automatic_captions'] = automatic_captions

3415

info['subtitles'] = subtitles

3416

3417

parsed_url = compat_urllib_parse_urlparse(url)

3418

for component in [parsed_url.fragment, parsed_url.query]:

3419

query = compat_parse_qs(component)

3420

for k, v in query.items():

3421

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3422

d_k += '_time'

3423

if d_k not in info and k in s_ks:

3424

info[d_k] = parse_duration(query[k][0])

3425

3426

# Youtube Music Auto-generated description

3427

if video_description:

3428

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3429

if mobj:

3430

release_year = mobj.group('release_year')

3431

release_date = mobj.group('release_date')

3432

if release_date:

3433

release_date = release_date.replace('-', '')

3434

if not release_year:

3435

release_year = release_date[:4]

3436

info.update({

3437

'album': mobj.group('album'.strip()),

3438

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3439

'track': mobj.group('track').strip(),

3440

'release_date': release_date,

3441

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3447

webpage, self._YT_INITIAL_DATA_RE, video_id,

3448

'yt initial data')

3449

if not initial_data:

3450

query = {'videoId': video_id}

3451

query.update(self._get_checkok_params())

3452

initial_data = self._extract_response(

3453

item_id=video_id, ep='next', fatal=False,

3454

ytcfg=master_ytcfg, query=query,

3455

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3456

note='Downloading initial data API JSON')

3457

3458

try:

3459

# This will error if there is no livechat

3460

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3461

info.setdefault('subtitles', {})['live_chat'] = [{

3462

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

3463

'video_id': video_id,

3464

'ext': 'json',

3465

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

3466

}]

3467

except (KeyError, IndexError, TypeError):

pass

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3473

or self._extract_chapters_from_engagement_panel(initial_data, duration)

or None)

contents = try_get(

initial_data,

lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],

3479

list) or []

3480

for content in contents:

3481

vpir = content.get('videoPrimaryInfoRenderer')

3482

if vpir:

3483

stl = vpir.get('superTitleLink')

3484

if stl:

3485

stl = self._get_text(stl)

3486

if try_get(

3487

vpir,

3488

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3489

info['location'] = stl

3490

else:

3491

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

3492

if mobj:

3493

info.update({

3494

'series': mobj.group(1),

3495

'season_number': int(mobj.group(2)),

3496

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3501

list) or []):

3502

tbr = tlb.get('toggleButtonRenderer') or {}

3503

for getter, regex in [(

3504

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3505

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3506

lambda x: x['accessibility'],

3507

lambda x: x['accessibilityData']['accessibilityData'],

3508

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3509

label = (try_get(tbr, getter, dict) or {}).get('label')

3510

if label:

3511

mobj = re.match(regex, label)

3512

if mobj:

3513

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3514

break

3515

sbr_tooltip = try_get(

3516

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3517

if sbr_tooltip:

3518

like_count, dislike_count = sbr_tooltip.split(' / ')

3519

info.update({

3520

'like_count': str_to_int(like_count),

3521

'dislike_count': str_to_int(dislike_count),

3522

})

3523

vsir = content.get('videoSecondaryInfoRenderer')

3524

if vsir:

3525

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3526

info.update({

3527

'channel': self._get_text(vor, 'title'),

3528

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3533

list) or []

3534

multiple_songs = False

3535

for row in rows:

3536

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3537

multiple_songs = True

3538

break

3539

for row in rows:

3540

mrr = row.get('metadataRowRenderer') or {}

3541

mrr_title = mrr.get('title')

3542

if not mrr_title:

3543

continue

3544

mrr_title = self._get_text(mrr, 'title')

3545

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3546

if mrr_title == 'License':

3547

info['license'] = mrr_contents_text

3548

elif not multiple_songs:

3549

if mrr_title == 'Album':

3550

info['album'] = mrr_contents_text

3551

elif mrr_title == 'Artist':

3552

info['artist'] = mrr_contents_text

3553

elif mrr_title == 'Song':

3554

info['track'] = mrr_contents_text

3555

3556

fallbacks = {

3557

'channel': 'uploader',

3558

'channel_id': 'uploader_id',

3559

'channel_url': 'uploader_url',

3560

}

3561

for to, frm in fallbacks.items():

3562

if not info.get(to):

3563

info[to] = info.get(frm)

3564

3565

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3571

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3572

is_membersonly = None

3573

is_premium = None

3574

if initial_data and is_private is not None:

3575

is_membersonly = False

3576

is_premium = False

3577

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3578

badge_labels = set()

3579

for content in contents:

3580

if not isinstance(content, dict):

3581

continue

3582

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3583

for badge_label in badge_labels:

3584

if badge_label.lower() == 'members only':

3585

is_membersonly = True

3586

elif badge_label.lower() == 'premium':

3587

is_premium = True

3588

elif badge_label.lower() == 'unlisted':

3589

is_unlisted = True

3590

3591

info['availability'] = self._availability(

3592

is_private=is_private,

3593

needs_premium=is_premium,

3594

needs_subscription=is_membersonly,

3595

needs_auth=info['age_limit'] >= 18,

3596

is_unlisted=None if is_private is None else is_unlisted)

3597

3598

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3599

3600

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3606

3607

@staticmethod

3608

def passthrough_smuggled_data(func):

3609

def _smuggle(entries, smuggled_data):

3610

for entry in entries:

3611

# TODO: Convert URL to music.youtube instead.

3612

# Do we need to passthrough any other smuggled_data?

3613

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3614

yield entry

3615

3616

@functools.wraps(func)

3617

def wrapper(self, url):

3618

url, smuggled_data = unsmuggle_url(url, {})

3619

if self.is_music_url(url):

3620

smuggled_data['is_music_url'] = True

3621

info_dict = func(self, url, smuggled_data)

3622

if smuggled_data and info_dict.get('entries'):

3623

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3628

channel_id = self._html_search_meta(

3629

'channelId', webpage, 'channel id', default=None)

3630

if channel_id:

3631

return channel_id

3632

channel_url = self._html_search_meta(

3633

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3634

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3635

'twitter:app:url:googleplay'), webpage, 'channel url')

3636

return self._search_regex(

3637

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3638

channel_url, 'channel id')

3639

3640

@staticmethod

3641

def _extract_basic_item_renderer(item):

3642

# Modified from _extract_grid_item_renderer

3643

known_basic_renderers = (

3644

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'

3645

)

3646

for key, renderer in item.items():

3647

if not isinstance(renderer, dict):

3648

continue

3649

elif key in known_basic_renderers:

3650

return renderer

3651

elif key.startswith('grid') and key.endswith('Renderer'):

3652

return renderer

3653

3654

def _grid_entries(self, grid_renderer):

3655

for item in grid_renderer['items']:

3656

if not isinstance(item, dict):

3657

continue

3658

renderer = self._extract_basic_item_renderer(item)

3659

if not isinstance(renderer, dict):

3660

continue

3661

title = self._get_text(renderer, 'title')

3662

3663

# playlist

3664

playlist_id = renderer.get('playlistId')

3665

if playlist_id:

3666

yield self.url_result(

3667

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3668

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3673

if video_id:

3674

yield self._extract_video(renderer)

3675

continue

3676

# channel

3677

channel_id = renderer.get('channelId')

3678

if channel_id:

3679

yield self.url_result(

3680

'https://www.youtube.com/channel/%s' % channel_id,

3681

ie=YoutubeTabIE.ie_key(), video_title=title)

3682

continue

3683

# generic endpoint URL support

3684

ep_url = urljoin('https://www.youtube.com/', try_get(

3685

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3686

compat_str))

3687

if ep_url:

3688

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3689

if ie.suitable(ep_url):

3690

yield self.url_result(

3691

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3692

break

3693

3694

def _music_reponsive_list_entry(self, renderer):

3695

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3696

if video_id:

3697

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3698

ie=YoutubeIE.ie_key(), video_id=video_id)

3699

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3700

if playlist_id:

3701

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3702

if video_id:

3703

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3704

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3705

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3706

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3707

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3708

if browse_id:

3709

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3710

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3711

3712

def _shelf_entries_from_content(self, shelf_renderer):

3713

content = shelf_renderer.get('content')

3714

if not isinstance(content, dict):

3715

return

3716

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3717

if renderer:

3718

# TODO: add support for nested playlists so each shelf is processed

3719

# as separate playlist

3720

# TODO: this includes only first N items

3721

for entry in self._grid_entries(renderer):

3722

yield entry

3723

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3729

ep = try_get(

3730

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3731

compat_str)

3732

shelf_url = urljoin('https://www.youtube.com', ep)

3733

if shelf_url:

3734

# Skipping links to another channels, note that checking for

3735

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3736

# will not work

3737

if skip_channels and '/channels?' in shelf_url:

3738

return

3739

title = self._get_text(shelf_renderer, 'title')

3740

yield self.url_result(shelf_url, video_title=title)

3741

# Shelf may not contain shelf URL, fallback to extraction from content

3742

for entry in self._shelf_entries_from_content(shelf_renderer):

3743

yield entry

3744

3745

def _playlist_entries(self, video_list_renderer):

3746

for content in video_list_renderer['contents']:

3747

if not isinstance(content, dict):

3748

continue

3749

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3750

if not isinstance(renderer, dict):

3751

continue

3752

video_id = renderer.get('videoId')

3753

if not video_id:

3754

continue

3755

yield self._extract_video(renderer)

3756

3757

def _rich_entries(self, rich_grid_renderer):

3758

renderer = try_get(

3759

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3760

video_id = renderer.get('videoId')

3761

if not video_id:

3762

return

3763

yield self._extract_video(renderer)

3764

3765

def _video_entry(self, video_renderer):

3766

video_id = video_renderer.get('videoId')

3767

if video_id:

3768

return self._extract_video(video_renderer)

3769

3770

def _post_thread_entries(self, post_thread_renderer):

3771

post_renderer = try_get(

3772

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3773

if not post_renderer:

3774

return

3775

# video attachment

3776

video_renderer = try_get(

3777

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3778

video_id = video_renderer.get('videoId')

3779

if video_id:

3780

entry = self._extract_video(video_renderer)

3781

if entry:

3782

yield entry

3783

# playlist attachment

3784

playlist_id = try_get(

3785

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3786

if playlist_id:

3787

yield self.url_result(

3788

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3789

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3790

# inline video links

3791

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3792

for run in runs:

3793

if not isinstance(run, dict):

3794

continue

3795

ep_url = try_get(

3796

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3797

if not ep_url:

3798

continue

3799

if not YoutubeIE.suitable(ep_url):

3800

continue

3801

ep_video_id = YoutubeIE._match_id(ep_url)

3802

if video_id == ep_video_id:

3803

continue

3804

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3805

3806

def _post_thread_continuation_entries(self, post_thread_continuation):

3807

contents = post_thread_continuation.get('contents')

3808

if not isinstance(contents, list):

3809

return

3810

for content in contents:

3811

renderer = content.get('backstagePostThreadRenderer')

3812

if not isinstance(renderer, dict):

3813

continue

3814

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3819

for content in contents:

3820

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3821

if video_renderer:

3822

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

3827

# continuation_list is modified in-place with continuation_list = [continuation_token]

3828

continuation_list[:] = [None]

3829

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3830

for content in contents:

3831

if not isinstance(content, dict):

3832

continue

3833

is_renderer = traverse_obj(

3834

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

3835

expected_type=dict)

3836

if not is_renderer:

3837

renderer = content.get('richItemRenderer')

3838

if renderer:

3839

for entry in self._rich_entries(renderer):

3840

yield entry

3841

continuation_list[0] = self._extract_continuation(parent_renderer)

3842

continue

3843

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3844

for isr_content in isr_contents:

3845

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3850

'gridRenderer': self._grid_entries,

3851

'shelfRenderer': lambda x: self._shelf_entries(x),

3852

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

3853

'backstagePostThreadRenderer': self._post_thread_entries,

3854

'videoRenderer': lambda x: [self._video_entry(x)],

3855

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

3856

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

3857

}

3858

for key, renderer in isr_content.items():

3859

if key not in known_renderers:

3860

continue

3861

for entry in known_renderers[key](renderer):

3862

if entry:

3863

yield entry

3864

continuation_list[0] = self._extract_continuation(renderer)

3865

break

3866

3867

if not continuation_list[0]:

3868

continuation_list[0] = self._extract_continuation(is_renderer)

3869

3870

if not continuation_list[0]:

3871

continuation_list[0] = self._extract_continuation(parent_renderer)

3872

3873

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

3874

continuation_list = [None]

3875

extract_entries = lambda x: self._extract_entries(x, continuation_list)

3876

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3881

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3882

for entry in extract_entries(parent_renderer):

3883

yield entry

3884

continuation = continuation_list[0]

3885

3886

for page_num in itertools.count(1):

3887

if not continuation:

3888

break

3889

headers = self.generate_api_headers(

3890

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

3891

response = self._extract_response(

3892

item_id='%s page %s' % (item_id, page_num),

3893

query=continuation, headers=headers, ytcfg=ytcfg,

3894

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

3899

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

3900

visitor_data = self._extract_visitor_data(response) or visitor_data

3901

3902

known_continuation_renderers = {

3903

'playlistVideoListContinuation': self._playlist_entries,

3904

'gridContinuation': self._grid_entries,

3905

'itemSectionContinuation': self._post_thread_continuation_entries,

3906

'sectionListContinuation': extract_entries, # for feeds

3907

}

3908

continuation_contents = try_get(

3909

response, lambda x: x['continuationContents'], dict) or {}

3910

continuation_renderer = None

3911

for key, value in continuation_contents.items():

3912

if key not in known_continuation_renderers:

3913

continue

3914

continuation_renderer = value

3915

continuation_list = [None]

3916

for entry in known_continuation_renderers[key](continuation_renderer):

3917

yield entry

3918

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

3919

break

3920

if continuation_renderer:

continue

known_renderers = {

'gridPlaylistRenderer': (self._grid_entries, 'items'),

3925

'gridVideoRenderer': (self._grid_entries, 'items'),

3926

'gridChannelRenderer': (self._grid_entries, 'items'),

3927

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

3928

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

3929

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

3930

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

3931

}

3932

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

3933

continuation_items = try_get(

3934

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

3935

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

3936

video_items_renderer = None

3937

for key, value in continuation_item.items():

3938

if key not in known_renderers:

3939

continue

3940

video_items_renderer = {known_renderers[key][1]: continuation_items}

3941

continuation_list = [None]

3942

for entry in known_renderers[key][0](video_items_renderer):

3943

yield entry

3944

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

3945

break

3946

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3952

for tab in tabs:

3953

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

3954

if renderer.get('selected') is True:

3955

return renderer

3956

else:

3957

raise ExtractorError('Unable to find selected tab')

3958

3959

@classmethod

3960

def _extract_uploader(cls, data):

3961

uploader = {}

3962

renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

3963

owner = try_get(

3964

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3965

if owner:

3966

uploader['uploader'] = owner.get('text')

3967

uploader['uploader_id'] = try_get(

3968

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3969

uploader['uploader_url'] = urljoin(

3970

'https://www.youtube.com/',

3971

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3972

return {k: v for k, v in uploader.items() if v is not None}

3973

3974

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

3975

playlist_id = title = description = channel_url = channel_name = channel_id = None

3976

tags = []

3977

3978

selected_tab = self._extract_selected_tab(tabs)

3979

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

3980

renderer = try_get(

3981

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3982

if renderer:

3983

channel_name = renderer.get('title')

3984

channel_url = renderer.get('channelUrl')

3985

channel_id = renderer.get('externalId')

3986

else:

3987

renderer = try_get(

3988

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3989

3990

if renderer:

3991

title = renderer.get('title')

3992

description = renderer.get('description', '')

3993

playlist_id = channel_id

3994

tags = renderer.get('keywords', '').split()

3995

3996

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

3997

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

3998

def _get_uncropped(url):

3999

return url_or_none((url or '').split('=')[0] + '=s0')

4000

4001

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4002

if avatar_thumbnails:

4003

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4004

if uncropped_avatar:

4005

avatar_thumbnails.append({

4006

'url': uncropped_avatar,

4007

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4012

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4013

for banner in channel_banners:

4014

banner['preference'] = -10

4015

4016

if channel_banners:

4017

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4018

if uncropped_banner:

4019

channel_banners.append({

4020

'url': uncropped_banner,

4021

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4026

primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail'))

4027

4028

if playlist_id is None:

4029

playlist_id = item_id

4030

4031

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4032

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4033

if title is None:

4034

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4035

title += format_field(selected_tab, 'title', ' - %s')

4036

title += format_field(selected_tab, 'expandedText', ' - %s')

4037

4038

metadata = {

4039

'playlist_id': playlist_id,

4040

'playlist_title': title,

4041

'playlist_description': description,

4042

'uploader': channel_name,

4043

'uploader_id': channel_id,

4044

'uploader_url': channel_url,

4045

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4046

'tags': tags,

4047

'view_count': self._get_count(playlist_stats, 1),

4048

'availability': self._extract_availability(data),

4049

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4050

'playlist_count': self._get_count(playlist_stats, 0),

4051

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4052

}

4053

if not channel_id:

4054

metadata.update(self._extract_uploader(data))

4055

metadata.update({

4056

'channel': metadata['uploader'],

4057

'channel_id': metadata['uploader_id'],

4058

'channel_url': metadata['uploader_url']})

4059

return self.playlist_result(

4060

self._entries(

4061

selected_tab, playlist_id, ytcfg,

4062

self._extract_account_syncid(ytcfg, data),

4063

self._extract_visitor_data(data, ytcfg)),

4064

**metadata)

4065

4066

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):

4067

first_id = last_id = response = None

4068

for page_num in itertools.count(1):

4069

videos = list(self._playlist_entries(playlist))

4070

if not videos:

4071

return

4072

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4073

if start >= len(videos):

4074

return

4075

for video in videos[start:]:

4076

if video['id'] == first_id:

4077

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

4078

return

4079

yield video

4080

first_id = first_id or videos[0]['id']

4081

last_id = videos[-1]['id']

4082

watch_endpoint = try_get(

4083

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4084

headers = self.generate_api_headers(

4085

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4086

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4087

query = {

4088

'playlistId': playlist_id,

4089

'videoId': watch_endpoint.get('videoId') or last_id,

4090

'index': watch_endpoint.get('index') or len(videos),

4091

'params': watch_endpoint.get('params') or 'OAE%3D'

4092

}

4093

response = self._extract_response(

4094

item_id='%s page %d' % (playlist_id, page_num),

4095

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4096

check_get_keys='contents'

4097

)

4098

playlist = try_get(

4099

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4100

4101

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4102

title = playlist.get('title') or try_get(

4103

data, lambda x: x['titleText']['simpleText'], compat_str)

4104

playlist_id = playlist.get('playlistId') or item_id

4105

4106

# Delegating everything except mix playlists to regular tab-based playlist URL

4107

playlist_url = urljoin(url, try_get(

4108

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4109

compat_str))

4110

if playlist_url and playlist_url != url:

4111

return self.url_result(

4112

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4113

video_title=title)

4114

4115

return self.playlist_result(

4116

self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),

4117

playlist_id=playlist_id, playlist_title=title)

4118

4119

def _extract_availability(self, data):

4120

"""

4121

Gets the availability of a given playlist/tab.

4122

Note: Unless YouTube tells us explicitly, we do not assume it is public

4123

@param data: response

4124

"""

4125

is_private = is_unlisted = None

4126

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4127

badge_labels = self._extract_badges(renderer)

4128

4129

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4130

privacy_dropdown_entries = try_get(

4131

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4132

for renderer_dict in privacy_dropdown_entries:

4133

is_selected = try_get(

4134

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4135

if not is_selected:

4136

continue

4137

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4138

if label:

4139

badge_labels.add(label.lower())

4140

break

4141

4142

for badge_label in badge_labels:

4143

if badge_label == 'unlisted':

4144

is_unlisted = True

4145

elif badge_label == 'private':

4146

is_private = True

4147

elif badge_label == 'public':

4148

is_unlisted = is_private = False

4149

return self._availability(is_private, False, False, False, is_unlisted)

4150

4151

@staticmethod

4152

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4153

sidebar_renderer = try_get(

4154

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4155

for item in sidebar_renderer:

4156

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4161

"""

4162

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4163

"""

4164

browse_id = params = None

4165

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4166

if not renderer:

4167

return

4168

menu_renderer = try_get(

4169

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4170

for menu_item in menu_renderer:

4171

if not isinstance(menu_item, dict):

4172

continue

4173

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4174

text = try_get(

4175

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4176

if not text or text.lower() != 'show unavailable videos':

4177

continue

4178

browse_endpoint = try_get(

4179

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4180

browse_id = browse_endpoint.get('browseId')

4181

params = browse_endpoint.get('params')

4182

break

4183

4184

headers = self.generate_api_headers(

4185

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4186

visitor_data=self._extract_visitor_data(data, ytcfg))

4187

query = {

4188

'params': params or 'wgYCCAA=',

4189

'browseId': browse_id or 'VL%s' % item_id

4190

}

4191

return self._extract_response(

4192

item_id=item_id, headers=headers, query=query,

4193

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4194

note='Downloading API JSON with unavailable videos')

4195

4196

def _extract_webpage(self, url, item_id, fatal=True):

4197

retries = self.get_param('extractor_retries', 3)

4198

count = -1

4199

webpage = data = last_error = None

4200

while count < retries:

4201

count += 1

4202

# Sometimes youtube returns a webpage with incomplete ytInitialData

4203

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4204

if last_error:

4205

self.report_warning('%s. Retrying ...' % last_error)

4206

try:

4207

webpage = self._download_webpage(

4208

url, item_id,

4209

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4210

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4211

except ExtractorError as e:

4212

if isinstance(e.cause, network_exceptions):

4213

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4214

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4224

except ExtractorError as e:

4225

if fatal:

4226

raise

4227

self.report_warning(error_to_compat_str(e))

4228

break

4229

4230

if dict_get(data, ('contents', 'currentVideoEndpoint')):

4231

break

4232

4233

last_error = 'Incomplete yt initial data received'

4234

if count >= retries:

4235

if fatal:

4236

raise ExtractorError(last_error)

4237

self.report_warning(last_error)

break

return webpage, data

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4243

data = None

4244

if 'webpage' not in self._configuration_arg('skip'):

4245

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4246

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4247

if not data:

4248

if not ytcfg and self.is_authenticated:

4249

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

4250

if 'authcheck' not in self._configuration_arg('skip') and fatal:

4251

raise ExtractorError(

4252

msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'

4253

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4254

expected=True)

4255

self.report_warning(msg, only_once=True)

4256

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4257

return data, ytcfg

4258

4259

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4260

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4261

resolve_response = self._extract_response(

4262

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4263

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4264

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4265

for ep_key, ep in endpoints.items():

4266

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4267

if params:

4268

return self._extract_response(

4269

item_id=item_id, query=params, ep=ep, headers=headers,

4270

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4271

check_get_keys=('contents', 'currentVideoEndpoint'))

4272

err_note = 'Failed to resolve url (does the playlist exist?)'

4273

if fatal:

4274

raise ExtractorError(err_note, expected=True)

4275

self.report_warning(err_note, item_id)

4276

4277

_SEARCH_PARAMS = None

4278

4279

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4280

data = {'query': query}

4281

if params is NO_DEFAULT:

4282

params = self._SEARCH_PARAMS

4283

if params:

4284

data['params'] = params

4285

4286

content_keys = (

4287

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4288

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4289

# ytmusic search

4290

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4291

('continuationContents', ),

4292

)

4293

check_get_keys = tuple(set(keys[0] for keys in content_keys))

4294

4295

continuation_list = [None]

4296

for page_num in itertools.count(1):

4297

data.update(continuation_list[0] or {})

4298

search = self._extract_response(

4299

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4300

default_client=default_client, check_get_keys=check_get_keys)

4301

slr_contents = traverse_obj(search, *content_keys)

4302

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4303

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4308

IE_DESC = 'YouTube Tabs'

4309

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4318

(?P<not_channel>

4319

feed/|hashtag/|

4320

(?:playlist|watch)\?.*?\blist=

4321

)|

4322

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4327

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4328

}

4329

IE_NAME = 'youtube:tab'

4330

4331

_TESTS = [{

4332

'note': 'playlists, multipage',

4333

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4334

'playlist_mincount': 94,

4335

'info_dict': {

4336

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4337

'title': 'Igor Kleiner - Playlists',

4338

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4339

'uploader': 'Igor Kleiner',

4340

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4341

'channel': 'Igor Kleiner',

4342

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4343

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4344

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4345

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4346

'channel_follower_count': int

4347

},

4348

}, {

4349

'note': 'playlists, multipage, different order',

4350

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4351

'playlist_mincount': 94,

4352

'info_dict': {

4353

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4354

'title': 'Igor Kleiner - Playlists',

4355

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4356

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4357

'uploader': 'Igor Kleiner',

4358

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4359

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4360

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4361

'channel': 'Igor Kleiner',

4362

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4363

'channel_follower_count': int

4364

},

4365

}, {

4366

'note': 'playlists, series',

4367

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4368

'playlist_mincount': 5,

4369

'info_dict': {

4370

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4371

'title': '3Blue1Brown - Playlists',

4372

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4373

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4374

'uploader': '3Blue1Brown',

4375

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4376

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4377

'channel': '3Blue1Brown',

4378

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4379

'tags': ['Mathematics'],

4380

'channel_follower_count': int

4381

},

4382

}, {

4383

'note': 'playlists, singlepage',

4384

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4385

'playlist_mincount': 4,

4386

'info_dict': {

4387

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4388

'title': 'ThirstForScience - Playlists',

4389

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4390

'uploader': 'ThirstForScience',

4391

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4392

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4393

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4394

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4395

'tags': 'count:13',

4396

'channel': 'ThirstForScience',

4397

'channel_follower_count': int

4398

}

4399

}, {

4400

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4401

'only_matching': True,

4402

}, {

4403

'note': 'basic, single video playlist',

4404

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4405

'info_dict': {

4406

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4407

'uploader': 'Sergey M.',

4408

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4409

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4414

'channel': 'Sergey M.',

4415

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4416

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4417

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4422

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4423

'info_dict': {

4424

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4425

'uploader': 'Sergey M.',

4426

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4427

'title': 'youtube-dl empty playlist',

4428

'tags': [],

4429

'channel': 'Sergey M.',

4430

'description': '',

4431

'modified_date': '20160902',

4432

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4433

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4434

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4440

'info_dict': {

4441

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4442

'title': 'lex will - Home',

4443

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4444

'uploader': 'lex will',

4445

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4446

'channel': 'lex will',

4447

'tags': ['bible', 'history', 'prophesy'],

4448

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4449

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4450

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4451

'channel_follower_count': int

4452

},

4453

'playlist_mincount': 2,

4454

}, {

4455

'note': 'Videos tab',

4456

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4457

'info_dict': {

4458

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4459

'title': 'lex will - Videos',

4460

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4461

'uploader': 'lex will',

4462

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4463

'tags': ['bible', 'history', 'prophesy'],

4464

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4465

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4466

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4467

'channel': 'lex will',

4468

'channel_follower_count': int

4469

},

4470

'playlist_mincount': 975,

4471

}, {

4472

'note': 'Videos tab, sorted by popular',

4473

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4474

'info_dict': {

4475

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4476

'title': 'lex will - Videos',

4477

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4478

'uploader': 'lex will',

4479

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4480

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4481

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4482

'channel': 'lex will',

4483

'tags': ['bible', 'history', 'prophesy'],

4484

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4485

'channel_follower_count': int

4486

},

4487

'playlist_mincount': 199,

4488

}, {

4489

'note': 'Playlists tab',

4490

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4491

'info_dict': {

4492

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4493

'title': 'lex will - Playlists',

4494

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4495

'uploader': 'lex will',

4496

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4497

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4498

'channel': 'lex will',

4499

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4500

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4501

'tags': ['bible', 'history', 'prophesy'],

4502

'channel_follower_count': int

4503

},

4504

'playlist_mincount': 17,

4505

}, {

4506

'note': 'Community tab',

4507

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4508

'info_dict': {

4509

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4510

'title': 'lex will - Community',

4511

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4512

'uploader': 'lex will',

4513

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4514

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4515

'channel': 'lex will',

4516

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4517

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4518

'tags': ['bible', 'history', 'prophesy'],

4519

'channel_follower_count': int

4520

},

4521

'playlist_mincount': 18,

4522

}, {

4523

'note': 'Channels tab',

4524

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4525

'info_dict': {

4526

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4527

'title': 'lex will - Channels',

4528

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4529

'uploader': 'lex will',

4530

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4531

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4532

'channel': 'lex will',

4533

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4534

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4535

'tags': ['bible', 'history', 'prophesy'],

4536

'channel_follower_count': int

4537

},

4538

'playlist_mincount': 12,

4539

}, {

4540

'note': 'Search tab',

4541

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4542

'playlist_mincount': 40,

4543

'info_dict': {

4544

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4545

'title': '3Blue1Brown - Search - linear algebra',

4546

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4547

'uploader': '3Blue1Brown',

4548

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4549

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4550

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4551

'tags': ['Mathematics'],

4552

'channel': '3Blue1Brown',

4553

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4554

'channel_follower_count': int

4555

},

4556

}, {

4557

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4558

'only_matching': True,

4559

}, {

4560

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4561

'only_matching': True,

4562

}, {

4563

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4564

'only_matching': True,

4565

}, {

4566

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4567

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4568

'info_dict': {

4569

'title': '29C3: Not my department',

4570

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4571

'uploader': 'Christiaan008',

4572

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4573

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4574

'tags': [],

4575

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4576

'view_count': int,

4577

'modified_date': '20150605',

4578

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4579

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4580

'channel': 'Christiaan008',

4581

},

4582

'playlist_count': 96,

4583

}, {

4584

'note': 'Large playlist',

4585

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4586

'info_dict': {

4587

'title': 'Uploads from Cauchemar',

4588

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4589

'uploader': 'Cauchemar',

4590

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4591

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4592

'tags': [],

4593

'modified_date': r're:\d{8}',

4594

'channel': 'Cauchemar',

4595

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4596

'view_count': int,

4597

'description': '',

4598

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4599

},

4600

'playlist_mincount': 1123,

4601

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4602

}, {

4603

'note': 'even larger playlist, 8832 videos',

4604

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4605

'only_matching': True,

4606

}, {

4607

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4608

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4609

'info_dict': {

4610

'title': 'Uploads from Interstellar Movie',

4611

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4612

'uploader': 'Interstellar Movie',

4613

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4614

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4615

'tags': [],

4616

'view_count': int,

4617

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4618

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4619

'channel': 'Interstellar Movie',

4620

'description': '',

4621

'modified_date': r're:\d{8}',

4622

},

4623

'playlist_mincount': 21,

4624

}, {

4625

'note': 'Playlist with "show unavailable videos" button',

4626

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4627

'info_dict': {

4628

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4629

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4630

'uploader': 'Phim Siêu Nhân Nhật Bản',

4631

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4632

'view_count': int,

4633

'channel': 'Phim Siêu Nhân Nhật Bản',

4634

'tags': [],

4635

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4636

'description': '',

4637

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4638

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4639

'modified_date': r're:\d{8}',

4640

},

4641

'playlist_mincount': 200,

4642

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4643

}, {

4644

'note': 'Playlist with unavailable videos in page 7',

4645

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4646

'info_dict': {

4647

'title': 'Uploads from BlankTV',

4648

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4649

'uploader': 'BlankTV',

4650

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4651

'channel': 'BlankTV',

4652

'channel_url': 'https://www.youtube.com/c/blanktv',

4653

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4654

'view_count': int,

4655

'tags': [],

4656

'uploader_url': 'https://www.youtube.com/c/blanktv',

4657

'modified_date': r're:\d{8}',

4658

'description': '',

4659

},

4660

'playlist_mincount': 1000,

4661

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4662

}, {

4663

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4664

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4665

'info_dict': {

4666

'title': 'Data Analysis with Dr Mike Pound',

4667

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4668

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4669

'uploader': 'Computerphile',

4670

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4671

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4672

'tags': [],

4673

'view_count': int,

4674

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4675

'channel_url': 'https://www.youtube.com/user/Computerphile',

4676

'channel': 'Computerphile',

4677

},

4678

'playlist_mincount': 11,

4679

}, {

4680

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4681

'only_matching': True,

4682

}, {

4683

'note': 'Playlist URL that does not actually serve a playlist',

4684

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4689

'uploader': 'STREEM',

4690

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4691

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4692

'upload_date': '20150526',

4693

'license': 'Standard YouTube License',

4694

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4695

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4702

},

4703

'skip': 'This video is not available.',

4704

'add_ie': [YoutubeIE.ie_key()],

4705

}, {

4706

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4707

'only_matching': True,

4708

}, {

4709

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4710

'only_matching': True,

4711

}, {

4712

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4713

'info_dict': {

4714

'id': 'GgL890LIznQ', # This will keep changing

4715

'ext': 'mp4',

4716

'title': str,

4717

'uploader': 'Sky News',

4718

'uploader_id': 'skynews',

4719

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4720

'upload_date': r're:\d{8}',

4721

'description': str,

4722

'categories': ['News & Politics'],

4723

'tags': list,

4724

'like_count': int,

4725

'release_timestamp': 1642502819,

4726

'channel': 'Sky News',

4727

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4728

'age_limit': 0,

4729

'view_count': int,

4730

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

4731

'playable_in_embed': True,

4732

'release_date': '20220118',

4733

'availability': 'public',

4734

'live_status': 'is_live',

4735

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4736

'channel_follower_count': int

4737

},

4738

'params': {

4739

'skip_download': True,

4740

},

4741

'expected_warnings': ['Ignoring subtitle tracks found in '],

4742

}, {

4743

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4748

'uploader': 'The Young Turks',

4749

'uploader_id': 'TheYoungTurks',

4750

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4751

'upload_date': '20150715',

4752

'license': 'Standard YouTube License',

4753

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4754

'categories': ['News & Politics'],

4755

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4760

},

4761

'only_matching': True,

4762

}, {

4763

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4764

'only_matching': True,

4765

}, {

4766

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4767

'only_matching': True,

4768

}, {

4769

'note': 'A channel that is not live. Should raise error',

4770

'url': 'https://www.youtube.com/user/numberphile/live',

4771

'only_matching': True,

4772

}, {

4773

'url': 'https://www.youtube.com/feed/trending',

4774

'only_matching': True,

4775

}, {

4776

'url': 'https://www.youtube.com/feed/library',

4777

'only_matching': True,

4778

}, {

4779

'url': 'https://www.youtube.com/feed/history',

4780

'only_matching': True,

4781

}, {

4782

'url': 'https://www.youtube.com/feed/subscriptions',

4783

'only_matching': True,

4784

}, {

4785

'url': 'https://www.youtube.com/feed/watch_later',

4786

'only_matching': True,

4787

}, {

4788

'note': 'Recommended - redirects to home page.',

4789

'url': 'https://www.youtube.com/feed/recommended',

4790

'only_matching': True,

4791

}, {

4792

'note': 'inline playlist with not always working continuations',

4793

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

4794

'only_matching': True,

4795

}, {

4796

'url': 'https://www.youtube.com/course',

4797

'only_matching': True,

4798

}, {

4799

'url': 'https://www.youtube.com/zsecurity',

4800

'only_matching': True,

4801

}, {

4802

'url': 'http://www.youtube.com/NASAgovVideo/videos',

4803

'only_matching': True,

4804

}, {

4805

'url': 'https://www.youtube.com/TheYoungTurks/live',

4806

'only_matching': True,

4807

}, {

4808

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

4815

}, {

4816

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

4817

'only_matching': True,

4818

}, {

4819

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

4820

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4821

'only_matching': True

4822

}, {

4823

'note': '/browse/ should redirect to /channel/',

4824

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

4825

'only_matching': True

4826

}, {

4827

'note': 'VLPL, should redirect to playlist?list=PL...',

4828

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4829

'info_dict': {

4830

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4831

'uploader': 'NoCopyrightSounds',

4832

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

4833

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4834

'title': 'NCS Releases',

4835

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4836

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4837

'modified_date': r're:\d{8}',

4838

'view_count': int,

4839

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4840

'tags': [],

4841

'channel': 'NoCopyrightSounds',

4842

},

4843

'playlist_mincount': 166,

4844

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4845

}, {

4846

'note': 'Topic, should redirect to playlist?list=UU...',

4847

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4848

'info_dict': {

4849

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4850

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4851

'title': 'Uploads from Royalty Free Music - Topic',

4852

'uploader': 'Royalty Free Music - Topic',

4853

'tags': [],

4854

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4855

'channel': 'Royalty Free Music - Topic',

4856

'view_count': int,

4857

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4858

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4859

'modified_date': r're:\d{8}',

4860

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4861

'description': '',

4862

},

4863

'expected_warnings': [

4864

'The URL does not have a videos tab',

4865

r'[Uu]navailable videos (are|will be) hidden',

4866

],

4867

'playlist_mincount': 101,

4868

}, {

4869

'note': 'Topic without a UU playlist',

4870

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

4871

'info_dict': {

4872

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

4873

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

4874

'tags': [],

4875

},

4876

'expected_warnings': [

4877

'the playlist redirect gave error',

4878

],

4879

'playlist_mincount': 9,

4880

}, {

4881

'note': 'Youtube music Album',

4882

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

4883

'info_dict': {

4884

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

4885

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

4890

'modified_date': r're:\d{8}',

4891

},

4892

'playlist_count': 50,

4893

}, {

4894

'note': 'unlisted single video playlist',

4895

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4896

'info_dict': {

4897

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4898

'uploader': 'colethedj',

4899

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4900

'title': 'yt-dlp unlisted playlist test',

4901

'availability': 'unlisted',

4902

'tags': [],

4903

'modified_date': '20211208',

4904

'channel': 'colethedj',

4905

'view_count': int,

4906

'description': '',

4907

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

4908

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4909

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

4914

'url': 'https://www.youtube.com/feed/recommended',

4915

'info_dict': {

4916

'id': 'recommended',

4917

'title': 'recommended',

4918

'tags': [],

4919

},

4920

'playlist_mincount': 50,

4921

'params': {

4922

'skip_download': True,

4923

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4924

},

4925

}, {

4926

'note': 'API Fallback: /videos tab, sorted by oldest first',

4927

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

4928

'info_dict': {

4929

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4930

'title': 'Cody\'sLab - Videos',

4931

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

4932

'uploader': 'Cody\'sLab',

4933

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4934

'channel': 'Cody\'sLab',

4935

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4936

'tags': [],

4937

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4938

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4939

'channel_follower_count': int

4940

},

4941

'playlist_mincount': 650,

4942

'params': {

4943

'skip_download': True,

4944

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4945

},

4946

}, {

4947

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

4948

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4949

'info_dict': {

4950

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4951

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4952

'title': 'Uploads from Royalty Free Music - Topic',

4953

'uploader': 'Royalty Free Music - Topic',

4954

'modified_date': r're:\d{8}',

4955

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4956

'description': '',

4957

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4958

'tags': [],

4959

'channel': 'Royalty Free Music - Topic',

4960

'view_count': int,

4961

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4962

},

4963

'expected_warnings': [

4964

'does not have a videos tab',

4965

r'[Uu]navailable videos (are|will be) hidden',

4966

],

4967

'playlist_mincount': 101,

4968

'params': {

4969

'skip_download': True,

4970

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

},

}]

@classmethod

def suitable(cls, url):

4976

return False if YoutubeIE.suitable(url) else super(

4977

YoutubeTabIE, cls).suitable(url)

4978

4979

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

4980

4981

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

4982

def _real_extract(self, url, smuggled_data):

4983

item_id = self._match_id(url)

4984

url = compat_urlparse.urlunparse(

4985

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

4986

compat_opts = self.get_param('compat_opts', [])

4987

4988

def get_mobj(url):

4989

mobj = self._URL_RE.match(url).groupdict()

4990

mobj.update((k, '') for k, v in mobj.items() if v is None)

4991

return mobj

4992

4993

mobj, redirect_warning = get_mobj(url), None

4994

# Youtube returns incomplete data if tabname is not lower case

4995

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

4996

if is_channel:

4997

if smuggled_data.get('is_music_url'):

4998

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

4999

item_id = item_id[2:]

5000

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5001

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5002

mdata = self._extract_tab_endpoint(

5003

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5004

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5005

get_all=False, expected_type=compat_str)

5006

if not murl:

5007

raise ExtractorError('Failed to resolve album to playlist')

5008

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5009

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5010

pre = f'https://www.youtube.com/channel/{item_id}'

5011

5012

original_tab_name = tab

5013

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5014

# Home URLs should redirect to /videos/

5015

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5016

'To download only the videos in the home page, add a "/featured" to the URL')

5017

tab = '/videos'

5018

5019

url = ''.join((pre, tab, post))

5020

mobj = get_mobj(url)

5021

5022

# Handle both video/playlist URLs

5023

qs = parse_qs(url)

5024

video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]

5025

5026

if not video_id and mobj['not_channel'].startswith('watch'):

5027

if not playlist_id:

5028

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5029

raise ExtractorError('Unable to recognize tab page')

5030

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5031

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5032

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5033

mobj = get_mobj(url)

5034

5035

if video_id and playlist_id:

5036

if self.get_param('noplaylist'):

5037

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5038

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5039

ie=YoutubeIE.ie_key(), video_id=video_id)

5040

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5041

5042

data, ytcfg = self._extract_data(url, item_id)

5043

5044

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5045

if tabs:

5046

selected_tab = self._extract_selected_tab(tabs)

5047

selected_tab_name = selected_tab.get('title', '').lower()

5048

if selected_tab_name == 'home':

5049

selected_tab_name = 'featured'

5050

requested_tab_name = mobj['tab'][1:]

5051

if 'no-youtube-channel-redirect' not in compat_opts:

5052

if requested_tab_name == 'live':

5053

# Live tab should have redirected to the video

5054

raise ExtractorError('The channel is not currently live', expected=True)

5055

if requested_tab_name not in ('', selected_tab_name):

5056

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5057

if not original_tab_name:

5058

if item_id[:2] == 'UC':

5059

# Topic channels don't have /videos. Use the equivalent playlist instead

5060

pl_id = f'UU{item_id[2:]}'

5061

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5062

try:

5063

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5064

except ExtractorError:

5065

redirect_warning += ' and the playlist redirect gave error'

5066

else:

5067

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5068

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5069

if selected_tab_name and selected_tab_name != requested_tab_name:

5070

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5071

else:

5072

raise ExtractorError(redirect_warning, expected=True)

5073

5074

if redirect_warning:

5075

self.to_screen(redirect_warning)

5076

self.write_debug(f'Final URL: {url}')

5077

5078

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5079

if 'no-youtube-unavailable-videos' not in compat_opts:

5080

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5081

self._extract_and_report_alerts(data, only_once=True)

5082

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5083

if tabs:

5084

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5085

5086

playlist = traverse_obj(

5087

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5088

if playlist:

5089

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5090

5091

video_id = traverse_obj(

5092

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5093

if video_id:

5094

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5095

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5096

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5097

ie=YoutubeIE.ie_key(), video_id=video_id)

5098

5099

raise ExtractorError('Unable to recognize tab page')

5100

5101

5102

class YoutubePlaylistIE(InfoExtractor):

5103

IE_DESC = 'YouTube playlists'

5104

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5115

)''' % {

5116

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5117

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5118

}

5119

IE_NAME = 'youtube:playlist'

5120

_TESTS = [{

5121

'note': 'issue #673',

5122

'url': 'PLBB231211A4F62143',

5123

'info_dict': {

5124

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5125

'id': 'PLBB231211A4F62143',

5126

'uploader': 'Wickman',

5127

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5128

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5129

'view_count': int,

5130

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5131

'modified_date': r're:\d{8}',

5132

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5133

'channel': 'Wickman',

5134

'tags': [],

5135

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5136

},

5137

'playlist_mincount': 29,

5138

}, {

5139

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5140

'info_dict': {

5141

'title': 'YDL_safe_search',

5142

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5143

},

5144

'playlist_count': 2,

5145

'skip': 'This playlist is private',

5146

}, {

5147

'note': 'embedded',

5148

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5153

'uploader': 'milan',

5154

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5155

'description': '',

5156

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5157

'tags': [],

5158

'modified_date': '20140919',

5159

'view_count': int,

5160

'channel': 'milan',

5161

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5162

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5163

},

5164

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5165

}, {

5166

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5167

'playlist_mincount': 654,

5168

'info_dict': {

5169

'title': '2018 Chinese New Singles (11/6 updated)',

5170

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5171

'uploader': 'LBK',

5172

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5173

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5174

'channel': 'LBK',

5175

'view_count': int,

5176

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5177

'tags': [],

5178

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5179

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5180

'modified_date': r're:\d{8}',

5181

},

5182

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5183

}, {

5184

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5185

'only_matching': True,

5186

}, {

5187

# music album playlist

5188

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5189

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5194

if YoutubeTabIE.suitable(url):

5195

return False

5196

from ..utils import parse_qs

5197

qs = parse_qs(url)

5198

if qs.get('v', [None])[0]:

5199

return False

5200

return super(YoutubePlaylistIE, cls).suitable(url)

5201

5202

def _real_extract(self, url):

5203

playlist_id = self._match_id(url)

5204

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5205

url = update_url_query(

5206

'https://www.youtube.com/playlist',

5207

parse_qs(url) or {'list': playlist_id})

5208

if is_music_url:

5209

url = smuggle_url(url, {'is_music_url': True})

5210

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5211

5212

5213

class YoutubeYtBeIE(InfoExtractor):

5214

IE_DESC = 'youtu.be'

5215

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5216

_TESTS = [{

5217

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5222

'uploader': 'Backus-Page House Museum',

5223

'uploader_id': 'backuspagemuseum',

5224

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5225

'upload_date': '20161008',

5226

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5227

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5232

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5233

'channel': 'Backus-Page House Museum',

5234

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5235

'live_status': 'not_live',

5236

'view_count': int,

5237

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5238

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5244

},

5245

}, {

5246

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5247

'only_matching': True,

5248

}]

5249

5250

def _real_extract(self, url):

5251

mobj = self._match_valid_url(url)

5252

video_id = mobj.group('id')

5253

playlist_id = mobj.group('playlist_id')

5254

return self.url_result(

5255

update_url_query('https://www.youtube.com/watch', {

5256

'v': video_id,

5257

'list': playlist_id,

5258

'feature': 'youtu.be',

5259

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5260

5261

5262

class YoutubeLivestreamEmbedIE(InfoExtractor):

5263

IE_DESC = 'YouTube livestream embeds'

5264

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5265

_TESTS = [{

5266

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5267

'only_matching': True,

5268

}]

5269

5270

def _real_extract(self, url):

5271

channel_id = self._match_id(url)

5272

return self.url_result(

5273

f'https://www.youtube.com/channel/{channel_id}/live',

5274

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5275

5276

5277

class YoutubeYtUserIE(InfoExtractor):

5278

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5279

IE_NAME = 'youtube:user'

5280

_VALID_URL = r'ytuser:(?P<id>.+)'

5281

_TESTS = [{

5282

'url': 'ytuser:phihag',

5283

'only_matching': True,

5284

}]

5285

5286

def _real_extract(self, url):

5287

user_id = self._match_id(url)

5288

return self.url_result(

5289

'https://www.youtube.com/user/%s/videos' % user_id,

5290

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5291

5292

5293

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5294

IE_NAME = 'youtube:favorites'

5295

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5296

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5297

_LOGIN_REQUIRED = True

5298

_TESTS = [{

5299

'url': ':ytfav',

5300

'only_matching': True,

5301

}, {

5302

'url': ':ytfavorites',

5303

'only_matching': True,

5304

}]

5305

5306

def _real_extract(self, url):

5307

return self.url_result(

5308

'https://www.youtube.com/playlist?list=LL',

5309

ie=YoutubeTabIE.ie_key())

5310

5311

5312

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5313

IE_DESC = 'YouTube search'

5314

IE_NAME = 'youtube:search'

5315

_SEARCH_KEY = 'ytsearch'

5316

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5317

_TESTS = [{

5318

'url': 'ytsearch5:youtube-dl test video',

5319

'playlist_count': 5,

5320

'info_dict': {

5321

'id': 'youtube-dl test video',

5322

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5328

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5329

_SEARCH_KEY = 'ytsearchdate'

5330

IE_DESC = 'YouTube search, newest videos first'

5331

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5332

_TESTS = [{

5333

'url': 'ytsearchdate5:youtube-dl test video',

5334

'playlist_count': 5,

5335

'info_dict': {

5336

'id': 'youtube-dl test video',

5337

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5343

IE_DESC = 'YouTube search URLs with sorting and filter support'

5344

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5345

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5346

_TESTS = [{

5347

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5348

'playlist_mincount': 5,

5349

'info_dict': {

5350

'id': 'youtube-dl test video',

5351

'title': 'youtube-dl test video',

5352

}

5353

}, {

5354

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5355

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5363

'only_matching': True,

5364

}]

5365

5366

def _real_extract(self, url):

5367

qs = parse_qs(url)

5368

query = (qs.get('search_query') or qs.get('q'))[0]

5369

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5370

5371

5372

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5373

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5374

IE_NAME = 'youtube:music:search_url'

5375

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5376

_TESTS = [{

5377

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5378

'playlist_count': 16,

5379

'info_dict': {

5380

'id': 'royalty free music',

5381

'title': 'royalty free music',

5382

}

5383

}, {

5384

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5385

'playlist_mincount': 30,

5386

'info_dict': {

5387

'id': 'royalty free music - songs',

5388

'title': 'royalty free music - songs',

5389

},

5390

'params': {'extract_flat': 'in_playlist'}

5391

}, {

5392

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5393

'playlist_mincount': 30,

5394

'info_dict': {

5395

'id': 'royalty free music - community playlists',

5396

'title': 'royalty free music - community playlists',

5397

},

5398

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5403

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5404

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5405

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5406

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5407

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5408

}

5409

5410

def _real_extract(self, url):

5411

qs = parse_qs(url)

5412

query = (qs.get('search_query') or qs.get('q'))[0]

5413

params = qs.get('sp', (None,))[0]

5414

if params:

5415

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5416

else:

5417

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5418

params = self._SECTIONS.get(section)

5419

if not params:

5420

section = None

5421

title = join_nonempty(query, section, delim=' - ')

5422

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5423

5424

5425

class YoutubeFeedsInfoExtractor(InfoExtractor):

5426

"""

5427

Base class for feed extractors

5428

Subclasses must define the _FEED_NAME property.

5429

"""

5430

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5436

5437

def _real_extract(self, url):

5438

return self.url_result(

5439

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5440

5441

5442

class YoutubeWatchLaterIE(InfoExtractor):

5443

IE_NAME = 'youtube:watchlater'

5444

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5445

_VALID_URL = r':ytwatchlater'

5446

_TESTS = [{

5447

'url': ':ytwatchlater',

5448

'only_matching': True,

5449

}]

5450

5451

def _real_extract(self, url):

5452

return self.url_result(

5453

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5454

5455

5456

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5457

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5458

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5459

_FEED_NAME = 'recommended'

5460

_LOGIN_REQUIRED = False

5461

_TESTS = [{

5462

'url': ':ytrec',

5463

'only_matching': True,

5464

}, {

5465

'url': ':ytrecommended',

5466

'only_matching': True,

5467

}, {

5468

'url': 'https://youtube.com',

5469

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5474

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5475

_VALID_URL = r':ytsub(?:scription)?s?'

5476

_FEED_NAME = 'subscriptions'

5477

_TESTS = [{

5478

'url': ':ytsubs',

5479

'only_matching': True,

5480

}, {

5481

'url': ':ytsubscriptions',

5482

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5487

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5488

_VALID_URL = r':ythis(?:tory)?'

5489

_FEED_NAME = 'history'

5490

_TESTS = [{

5491

'url': ':ythistory',

5492

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

5497

IE_NAME = 'youtube:truncated_url'

5498

IE_DESC = False # Do not list

5499

_VALID_URL = r'''(?x)

5500

(?:https?://)?

5501

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5502

(?:watch\?(?:

5503

feature=[a-z_]+|

5504

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5517

'only_matching': True,

5518

}, {

5519

'url': 'https://www.youtube.com/watch?',

5520

'only_matching': True,

5521

}, {

5522

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5523

'only_matching': True,

5524

}, {

5525

'url': 'https://www.youtube.com/watch?feature=foo',

5526

'only_matching': True,

5527

}, {

5528

'url': 'https://www.youtube.com/watch?hl=en-GB',

5529

'only_matching': True,

5530

}, {

5531

'url': 'https://www.youtube.com/watch?t=2372',

5532

'only_matching': True,

5533

}]

5534

5535

def _real_extract(self, url):

5536

raise ExtractorError(

5537

'Did you forget to quote the URL? Remember that & is a meta '

5538

'character in most shells, so you want to put the URL in quotes, '

5539

'like youtube-dl '

5540

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5541

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5546

IE_NAME = 'youtube:clip'

5547

IE_DESC = False # Do not list

5548

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5549

5550

def _real_extract(self, url):

5551

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5552

return self.url_result(url, 'Generic')

5553

5554

5555

class YoutubeTruncatedIDIE(InfoExtractor):

5556

IE_NAME = 'youtube:truncated_id'

5557

IE_DESC = False # Do not list

5558

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5559

5560

_TESTS = [{

5561

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5562

'only_matching': True,

5563

}]

5564

5565

def _real_extract(self, url):

5566

video_id = self._match_id(url)

5567

raise ExtractorError(

5568

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

5569

expected=True)