jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import calendar
	6	import copy
	7	import datetime
	8	import functools
	9	import hashlib
	10	import itertools
	11	import json
	12	import math
	13	import os.path
	14	import random
	15	import re
	16	import sys
	17	import time
	18	import traceback
	19	import threading
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from ..compat import (
	23	compat_chr,
	24	compat_HTTPError,
	25	compat_parse_qs,
	26	compat_str,
	27	compat_urllib_parse_unquote_plus,
	28	compat_urllib_parse_urlencode,
	29	compat_urllib_parse_urlparse,
	30	compat_urlparse,
	31	)
	32	from ..jsinterp import JSInterpreter
	33	from ..utils import (
	34	bug_reports_message,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	ExtractorError,
	40	float_or_none,
	41	format_field,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	mimetype2ext,
	46	network_exceptions,
	47	NO_DEFAULT,
	48	orderedSet,
	49	parse_codecs,
	50	parse_count,
	51	parse_duration,
	52	parse_iso8601,
	53	parse_qs,
	54	qualities,
	55	remove_end,
	56	remove_start,
	57	smuggle_url,
	58	str_or_none,
	59	str_to_int,
	60	strftime_or_none,
	61	traverse_obj,
	62	try_get,
	63	unescapeHTML,
	64	unified_strdate,
	65	unified_timestamp,
	66	unsmuggle_url,
	67	update_url_query,
	68	url_or_none,
	69	urljoin,
	70	variadic,
	71	)
	72
	73
	74	def get_first(obj, keys, **kwargs):
	75	return traverse_obj(obj, (..., variadic(keys)), *kwargs, get_all=False)
	76
	77
	78	# any clients starting with _ cannot be explicity requested by the user
	79	INNERTUBE_CLIENTS = {
	80	'web': {
	81	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	82	'INNERTUBE_CONTEXT': {
	83	'client': {
	84	'clientName': 'WEB',
	85	'clientVersion': '2.20211221.00.00',
	86	}
	87	},
	88	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	89	},
	90	'web_embedded': {
	91	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	92	'INNERTUBE_CONTEXT': {
	93	'client': {
	94	'clientName': 'WEB_EMBEDDED_PLAYER',
	95	'clientVersion': '1.20211215.00.01',
	96	},
	97	},
	98	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	99	},
	100	'web_music': {
	101	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	102	'INNERTUBE_HOST': 'music.youtube.com',
	103	'INNERTUBE_CONTEXT': {
	104	'client': {
	105	'clientName': 'WEB_REMIX',
	106	'clientVersion': '1.20211213.00.00',
	107	}
	108	},
	109	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	110	},
	111	'web_creator': {
	112	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	113	'INNERTUBE_CONTEXT': {
	114	'client': {
	115	'clientName': 'WEB_CREATOR',
	116	'clientVersion': '1.20211220.02.00',
	117	}
	118	},
	119	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	120	},
	121	'android': {
	122	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	123	'INNERTUBE_CONTEXT': {
	124	'client': {
	125	'clientName': 'ANDROID',
	126	'clientVersion': '16.49',
	127	}
	128	},
	129	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	130	'REQUIRE_JS_PLAYER': False
	131	},
	132	'android_embedded': {
	133	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	134	'INNERTUBE_CONTEXT': {
	135	'client': {
	136	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	137	'clientVersion': '16.49',
	138	},
	139	},
	140	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	141	'REQUIRE_JS_PLAYER': False
	142	},
	143	'android_music': {
	144	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	145	'INNERTUBE_CONTEXT': {
	146	'client': {
	147	'clientName': 'ANDROID_MUSIC',
	148	'clientVersion': '4.57',
	149	}
	150	},
	151	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	152	'REQUIRE_JS_PLAYER': False
	153	},
	154	'android_creator': {
	155	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	156	'INNERTUBE_CONTEXT': {
	157	'client': {
	158	'clientName': 'ANDROID_CREATOR',
	159	'clientVersion': '21.47',
	160	},
	161	},
	162	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	163	'REQUIRE_JS_PLAYER': False
	164	},
	165	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	166	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	167	'ios': {
	168	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	169	'INNERTUBE_CONTEXT': {
	170	'client': {
	171	'clientName': 'IOS',
	172	'clientVersion': '16.46',
	173	'deviceModel': 'iPhone14,3',
	174	}
	175	},
	176	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	177	'REQUIRE_JS_PLAYER': False
	178	},
	179	'ios_embedded': {
	180	'INNERTUBE_CONTEXT': {
	181	'client': {
	182	'clientName': 'IOS_MESSAGES_EXTENSION',
	183	'clientVersion': '16.46',
	184	'deviceModel': 'iPhone14,3',
	185	},
	186	},
	187	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	188	'REQUIRE_JS_PLAYER': False
	189	},
	190	'ios_music': {
	191	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_MUSIC',
	195	'clientVersion': '4.57',
	196	},
	197	},
	198	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	199	'REQUIRE_JS_PLAYER': False
	200	},
	201	'ios_creator': {
	202	'INNERTUBE_CONTEXT': {
	203	'client': {
	204	'clientName': 'IOS_CREATOR',
	205	'clientVersion': '21.47',
	206	},
	207	},
	208	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	209	'REQUIRE_JS_PLAYER': False
	210	},
	211	# mweb has 'ultralow' formats
	212	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	213	'mweb': {
	214	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	215	'INNERTUBE_CONTEXT': {
	216	'client': {
	217	'clientName': 'MWEB',
	218	'clientVersion': '2.20211221.01.00',
	219	}
	220	},
	221	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	222	}
	223	}
	224
	225
	226	def build_innertube_clients():
	227	third_party = {
	228	'embedUrl': 'https://google.com', # Can be any valid URL
	229	}
	230	base_clients = ('android', 'web', 'ios', 'mweb')
	231	priority = qualities(base_clients[::-1])
	232
	233	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	234	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	235	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	236	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	237	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	238	ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
	239
	240	if client in base_clients:
	241	INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
	242	agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	243	agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
	244	agegate_ytcfg['priority'] -= 1
	245	elif client.endswith('_embedded'):
	246	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
	247	ytcfg['priority'] -= 2
	248	else:
	249	ytcfg['priority'] -= 3
	250
	251
	252	build_innertube_clients()
	253
	254
	255	class YoutubeBaseInfoExtractor(InfoExtractor):
	256	"""Provide base functions for Youtube extractors"""
	257
	258	_RESERVED_NAMES = (
	259	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	260	r'shorts\|movies\|results\|shared\|hashtag\|trending\|feed\|feeds\|'
	261	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	262	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	263
	264	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	265
	266	_NETRC_MACHINE = 'youtube'
	267
	268	# If True it will raise an error if no login info is provided
	269	_LOGIN_REQUIRED = False
	270
	271	_INVIDIOUS_SITES = (
	272	# invidious-redirect websites
	273	r'(?:www\.)?redirect\.invidious\.io',
	274	r'(?:(?:www\|dev)\.)?invidio\.us',
	275	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
	276	r'(?:www\.)?invidious\.pussthecat\.org',
	277	r'(?:www\.)?invidious\.zee\.li',
	278	r'(?:www\.)?invidious\.ethibox\.fr',
	279	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	280	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	281	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	282	# youtube-dl invidious instances list
	283	r'(?:(?:www\|no)\.)?invidiou\.sh',
	284	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	285	r'(?:www\.)?invidious\.kabi\.tk',
	286	r'(?:www\.)?invidious\.mastodon\.host',
	287	r'(?:www\.)?invidious\.zapashcanon\.fr',
	288	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	289	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	290	r'(?:www\.)?invidious\.himiko\.cloud',
	291	r'(?:www\.)?invidious\.reallyancient\.tech',
	292	r'(?:www\.)?invidious\.tube',
	293	r'(?:www\.)?invidiou\.site',
	294	r'(?:www\.)?invidious\.site',
	295	r'(?:www\.)?invidious\.xyz',
	296	r'(?:www\.)?invidious\.nixnet\.xyz',
	297	r'(?:www\.)?invidious\.048596\.xyz',
	298	r'(?:www\.)?invidious\.drycat\.fr',
	299	r'(?:www\.)?inv\.skyn3t\.in',
	300	r'(?:www\.)?tube\.poal\.co',
	301	r'(?:www\.)?tube\.connect\.cafe',
	302	r'(?:www\.)?vid\.wxzm\.sx',
	303	r'(?:www\.)?vid\.mint\.lgbt',
	304	r'(?:www\.)?vid\.puffyan\.us',
	305	r'(?:www\.)?yewtu\.be',
	306	r'(?:www\.)?yt\.elukerio\.org',
	307	r'(?:www\.)?yt\.lelux\.fi',
	308	r'(?:www\.)?invidious\.ggc-project\.de',
	309	r'(?:www\.)?yt\.maisputain\.ovh',
	310	r'(?:www\.)?ytprivate\.com',
	311	r'(?:www\.)?invidious\.13ad\.de',
	312	r'(?:www\.)?invidious\.toot\.koeln',
	313	r'(?:www\.)?invidious\.fdn\.fr',
	314	r'(?:www\.)?watch\.nettohikari\.com',
	315	r'(?:www\.)?invidious\.namazso\.eu',
	316	r'(?:www\.)?invidious\.silkky\.cloud',
	317	r'(?:www\.)?invidious\.exonip\.de',
	318	r'(?:www\.)?invidious\.riverside\.rocks',
	319	r'(?:www\.)?invidious\.blamefran\.net',
	320	r'(?:www\.)?invidious\.moomoo\.de',
	321	r'(?:www\.)?ytb\.trom\.tf',
	322	r'(?:www\.)?yt\.cyberhost\.uk',
	323	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	324	r'(?:www\.)?qklhadlycap4cnod\.onion',
	325	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	326	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	327	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	328	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	329	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	330	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	331	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	332	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	333	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	334	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	335	)
	336
	337	def _login(self):
	338	"""
	339	Attempt to log in to YouTube.
	340	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	341	"""
	342
	343	if (self._LOGIN_REQUIRED
	344	and self.get_param('cookiefile') is None
	345	and self.get_param('cookiesfrombrowser') is None):
	346	self.raise_login_required(
	347	'Login details are needed to download this content', method='cookies')
	348	username, password = self._get_login_info()
	349	if username:
	350	self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
	351
	352	def _initialize_consent(self):
	353	cookies = self._get_cookies('https://www.youtube.com/')
	354	if cookies.get('__Secure-3PSID'):
	355	return
	356	consent_id = None
	357	consent = cookies.get('CONSENT')
	358	if consent:
	359	if 'YES' in consent.value:
	360	return
	361	consent_id = self._search_regex(
	362	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	363	if not consent_id:
	364	consent_id = random.randint(100, 999)
	365	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	366
	367	def _initialize_pref(self):
	368	cookies = self._get_cookies('https://www.youtube.com/')
	369	pref_cookie = cookies.get('PREF')
	370	pref = {}
	371	if pref_cookie:
	372	try:
	373	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	374	except ValueError:
	375	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	376	pref.update({'hl': 'en', 'tz': 'UTC'})
	377	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	378
	379	def _real_initialize(self):
	380	self._initialize_pref()
	381	self._initialize_consent()
	382	self._login()
	383
	384	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	385	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	386	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	387
	388	def _get_default_ytcfg(self, client='web'):
	389	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	390
	391	def _get_innertube_host(self, client='web'):
	392	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	393
	394	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	395	# try_get but with fallback to default ytcfg client values when present
	396	_func = lambda y: try_get(y, getter, expected_type)
	397	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	398
	399	def _extract_client_name(self, ytcfg, default_client='web'):
	400	return self._ytcfg_get_safe(
	401	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	402	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	403
	404	def _extract_client_version(self, ytcfg, default_client='web'):
	405	return self._ytcfg_get_safe(
	406	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	407	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	408
	409	def _extract_api_key(self, ytcfg=None, default_client='web'):
	410	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	411
	412	def _extract_context(self, ytcfg=None, default_client='web'):
	413	context = get_first(
	414	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	415	# Enforce language and tz for extraction
	416	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	417	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	418	return context
	419
	420	_SAPISID = None
	421
	422	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	423	time_now = round(time.time())
	424	if self._SAPISID is None:
	425	yt_cookies = self._get_cookies('https://www.youtube.com')
	426	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	427	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	428	sapisid_cookie = dict_get(
	429	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	430	if sapisid_cookie and sapisid_cookie.value:
	431	self._SAPISID = sapisid_cookie.value
	432	self.write_debug('Extracted SAPISID cookie')
	433	# SAPISID cookie is required if not already present
	434	if not yt_cookies.get('SAPISID'):
	435	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	436	self._set_cookie(
	437	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	438	else:
	439	self._SAPISID = False
	440	if not self._SAPISID:
	441	return None
	442	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	443	sapisidhash = hashlib.sha1(
	444	f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
	445	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	446
	447	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	448	note='Downloading API JSON', errnote='Unable to download API page',
	449	context=None, api_key=None, api_hostname=None, default_client='web'):
	450
	451	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	452	data.update(query)
	453	real_headers = self.generate_api_headers(default_client=default_client)
	454	real_headers.update({'content-type': 'application/json'})
	455	if headers:
	456	real_headers.update(headers)
	457	return self._download_json(
	458	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	459	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	460	data=json.dumps(data).encode('utf8'), headers=real_headers,
	461	query={'key': api_key or self._extract_api_key()})
	462
	463	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	464	data = self._search_regex(
	465	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	466	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	467	if data:
	468	return self._parse_json(data, item_id, fatal=fatal)
	469
	470	@staticmethod
	471	def _extract_session_index(*data):
	472	"""
	473	Index of current account in account list.
	474	See: https://github.com/yt-dlp/yt-dlp/pull/519
	475	"""
	476	for ytcfg in data:
	477	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	478	if session_index is not None:
	479	return session_index
	480
	481	# Deprecated?
	482	def _extract_identity_token(self, ytcfg=None, webpage=None):
	483	if ytcfg:
	484	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	485	if token:
	486	return token
	487	if webpage:
	488	return self._search_regex(
	489	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	490	'identity token', default=None, fatal=False)
	491
	492	@staticmethod
	493	def _extract_account_syncid(*args):
	494	"""
	495	Extract syncId required to download private playlists of secondary channels
	496	@params response and/or ytcfg
	497	"""
	498	for data in args:
	499	# ytcfg includes channel_syncid if on secondary channel
	500	delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import time

import traceback

import threading

from .common import InfoExtractor, SearchInfoExtractor

22

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

28

compat_urllib_parse_urlencode,

29

compat_urllib_parse_urlparse,

30

compat_urlparse,

31

)

32

from ..jsinterp import JSInterpreter

33

from ..utils import (

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

format_field,

int_or_none,

is_html,

join_nonempty,

mimetype2ext,

network_exceptions,

NO_DEFAULT,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

def get_first(obj, keys, **kwargs):

75

return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)

76

77

78

# any clients starting with _ cannot be explicity requested by the user

79

INNERTUBE_CLIENTS = {

80

'web': {

81

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

82

'INNERTUBE_CONTEXT': {

83

'client': {

84

'clientName': 'WEB',

85

'clientVersion': '2.20211221.00.00',

86

}

87

},

88

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

89

},

90

'web_embedded': {

91

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

92

'INNERTUBE_CONTEXT': {

93

'client': {

94

'clientName': 'WEB_EMBEDDED_PLAYER',

95

'clientVersion': '1.20211215.00.01',

96

},

97

},

98

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

99

},

100

'web_music': {

101

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

102

'INNERTUBE_HOST': 'music.youtube.com',

103

'INNERTUBE_CONTEXT': {

104

'client': {

105

'clientName': 'WEB_REMIX',

106

'clientVersion': '1.20211213.00.00',

107

}

108

},

109

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

110

},

111

'web_creator': {

112

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

113

'INNERTUBE_CONTEXT': {

114

'client': {

115

'clientName': 'WEB_CREATOR',

116

'clientVersion': '1.20211220.02.00',

117

}

118

},

119

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

120

},

121

'android': {

122

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

123

'INNERTUBE_CONTEXT': {

124

'client': {

125

'clientName': 'ANDROID',

126

'clientVersion': '16.49',

127

}

128

},

129

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

130

'REQUIRE_JS_PLAYER': False

131

},

132

'android_embedded': {

133

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

134

'INNERTUBE_CONTEXT': {

135

'client': {

136

'clientName': 'ANDROID_EMBEDDED_PLAYER',

137

'clientVersion': '16.49',

138

},

139

},

140

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

141

'REQUIRE_JS_PLAYER': False

142

},

143

'android_music': {

144

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

145

'INNERTUBE_CONTEXT': {

146

'client': {

147

'clientName': 'ANDROID_MUSIC',

148

'clientVersion': '4.57',

149

}

150

},

151

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

152

'REQUIRE_JS_PLAYER': False

153

},

154

'android_creator': {

155

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

156

'INNERTUBE_CONTEXT': {

157

'client': {

158

'clientName': 'ANDROID_CREATOR',

159

'clientVersion': '21.47',

160

},

161

},

162

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

163

'REQUIRE_JS_PLAYER': False

164

},

165

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

166

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

167

'ios': {

168

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

169

'INNERTUBE_CONTEXT': {

170

'client': {

171

'clientName': 'IOS',

172

'clientVersion': '16.46',

173

'deviceModel': 'iPhone14,3',

174

}

175

},

176

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

177

'REQUIRE_JS_PLAYER': False

178

},

179

'ios_embedded': {

180

'INNERTUBE_CONTEXT': {

181

'client': {

182

'clientName': 'IOS_MESSAGES_EXTENSION',

183

'clientVersion': '16.46',

184

'deviceModel': 'iPhone14,3',

185

},

186

},

187

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

188

'REQUIRE_JS_PLAYER': False

189

},

190

'ios_music': {

191

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_MUSIC',

195

'clientVersion': '4.57',

196

},

197

},

198

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

199

'REQUIRE_JS_PLAYER': False

200

},

201

'ios_creator': {

202

'INNERTUBE_CONTEXT': {

203

'client': {

204

'clientName': 'IOS_CREATOR',

205

'clientVersion': '21.47',

206

},

207

},

208

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

209

'REQUIRE_JS_PLAYER': False

210

},

211

# mweb has 'ultralow' formats

212

# See: https://github.com/yt-dlp/yt-dlp/pull/557

213

'mweb': {

214

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

215

'INNERTUBE_CONTEXT': {

216

'client': {

217

'clientName': 'MWEB',

218

'clientVersion': '2.20211221.01.00',

219

}

220

},

221

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

}

}

def build_innertube_clients():

227

third_party = {

228

'embedUrl': 'https://google.com', # Can be any valid URL

229

}

230

base_clients = ('android', 'web', 'ios', 'mweb')

231

priority = qualities(base_clients[::-1])

232

233

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

234

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

235

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

236

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

237

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

238

ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])

239

240

if client in base_clients:

241

INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)

242

agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

243

agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party

244

agegate_ytcfg['priority'] -= 1

245

elif client.endswith('_embedded'):

246

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party

247

ytcfg['priority'] -= 2

248

else:

249

ytcfg['priority'] -= 3

250

251

252

build_innertube_clients()

253

254

255

class YoutubeBaseInfoExtractor(InfoExtractor):

256

"""Provide base functions for Youtube extractors"""

257

258

_RESERVED_NAMES = (

259

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

265

266

_NETRC_MACHINE = 'youtube'

267

268

# If True it will raise an error if no login info is provided

269

_LOGIN_REQUIRED = False

270

271

_INVIDIOUS_SITES = (

272

# invidious-redirect websites

273

r'(?:www\.)?redirect\.invidious\.io',

274

r'(?:(?:www|dev)\.)?invidio\.us',

275

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

276

r'(?:www\.)?invidious\.pussthecat\.org',

277

r'(?:www\.)?invidious\.zee\.li',

278

r'(?:www\.)?invidious\.ethibox\.fr',

279

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

280

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

281

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

282

# youtube-dl invidious instances list

283

r'(?:(?:www|no)\.)?invidiou\.sh',

284

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

285

r'(?:www\.)?invidious\.kabi\.tk',

286

r'(?:www\.)?invidious\.mastodon\.host',

287

r'(?:www\.)?invidious\.zapashcanon\.fr',

288

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

289

r'(?:www\.)?invidious\.tinfoil-hat\.net',

290

r'(?:www\.)?invidious\.himiko\.cloud',

291

r'(?:www\.)?invidious\.reallyancient\.tech',

292

r'(?:www\.)?invidious\.tube',

293

r'(?:www\.)?invidiou\.site',

294

r'(?:www\.)?invidious\.site',

295

r'(?:www\.)?invidious\.xyz',

296

r'(?:www\.)?invidious\.nixnet\.xyz',

297

r'(?:www\.)?invidious\.048596\.xyz',

298

r'(?:www\.)?invidious\.drycat\.fr',

299

r'(?:www\.)?inv\.skyn3t\.in',

300

r'(?:www\.)?tube\.poal\.co',

301

r'(?:www\.)?tube\.connect\.cafe',

302

r'(?:www\.)?vid\.wxzm\.sx',

303

r'(?:www\.)?vid\.mint\.lgbt',

304

r'(?:www\.)?vid\.puffyan\.us',

305

r'(?:www\.)?yewtu\.be',

306

r'(?:www\.)?yt\.elukerio\.org',

307

r'(?:www\.)?yt\.lelux\.fi',

308

r'(?:www\.)?invidious\.ggc-project\.de',

309

r'(?:www\.)?yt\.maisputain\.ovh',

310

r'(?:www\.)?ytprivate\.com',

311

r'(?:www\.)?invidious\.13ad\.de',

312

r'(?:www\.)?invidious\.toot\.koeln',

313

r'(?:www\.)?invidious\.fdn\.fr',

314

r'(?:www\.)?watch\.nettohikari\.com',

315

r'(?:www\.)?invidious\.namazso\.eu',

316

r'(?:www\.)?invidious\.silkky\.cloud',

317

r'(?:www\.)?invidious\.exonip\.de',

318

r'(?:www\.)?invidious\.riverside\.rocks',

319

r'(?:www\.)?invidious\.blamefran\.net',

320

r'(?:www\.)?invidious\.moomoo\.de',

321

r'(?:www\.)?ytb\.trom\.tf',

322

r'(?:www\.)?yt\.cyberhost\.uk',

323

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

324

r'(?:www\.)?qklhadlycap4cnod\.onion',

325

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

326

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

327

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

328

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

329

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

330

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

331

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

332

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

333

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

334

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

)

def _login(self):

"""

Attempt to log in to YouTube.

340

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

341

"""

342

343

if (self._LOGIN_REQUIRED

344

and self.get_param('cookiefile') is None

345

and self.get_param('cookiesfrombrowser') is None):

346

self.raise_login_required(

347

'Login details are needed to download this content', method='cookies')

348

username, password = self._get_login_info()

349

if username:

350

self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')

351

352

def _initialize_consent(self):

353

cookies = self._get_cookies('https://www.youtube.com/')

354

if cookies.get('__Secure-3PSID'):

355

return

356

consent_id = None

357

consent = cookies.get('CONSENT')

358

if consent:

359

if 'YES' in consent.value:

360

return

361

consent_id = self._search_regex(

362

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

363

if not consent_id:

364

consent_id = random.randint(100, 999)

365

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

366

367

def _initialize_pref(self):

368

cookies = self._get_cookies('https://www.youtube.com/')

369

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

374

except ValueError:

375

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

376

pref.update({'hl': 'en', 'tz': 'UTC'})

377

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

378

379

def _real_initialize(self):

380

self._initialize_pref()

381

self._initialize_consent()

382

self._login()

383

384

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

385

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

386

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

387

388

def _get_default_ytcfg(self, client='web'):

389

return copy.deepcopy(INNERTUBE_CLIENTS[client])

390

391

def _get_innertube_host(self, client='web'):

392

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

393

394

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

395

# try_get but with fallback to default ytcfg client values when present

396

_func = lambda y: try_get(y, getter, expected_type)

397

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

398

399

def _extract_client_name(self, ytcfg, default_client='web'):

400

return self._ytcfg_get_safe(

401

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

402

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

403

404

def _extract_client_version(self, ytcfg, default_client='web'):

405

return self._ytcfg_get_safe(

406

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

407

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

408

409

def _extract_api_key(self, ytcfg=None, default_client='web'):

410

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

411

412

def _extract_context(self, ytcfg=None, default_client='web'):

413

context = get_first(

414

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

415

# Enforce language and tz for extraction

416

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

417

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

423

time_now = round(time.time())

424

if self._SAPISID is None:

425

yt_cookies = self._get_cookies('https://www.youtube.com')

426

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

427

# See: https://github.com/yt-dlp/yt-dlp/issues/393

428

sapisid_cookie = dict_get(

429

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

430

if sapisid_cookie and sapisid_cookie.value:

431

self._SAPISID = sapisid_cookie.value

432

self.write_debug('Extracted SAPISID cookie')

433

# SAPISID cookie is required if not already present

434

if not yt_cookies.get('SAPISID'):

435

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

436

self._set_cookie(

437

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

438

else:

439

self._SAPISID = False

440

if not self._SAPISID:

441

return None

442

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

443

sapisidhash = hashlib.sha1(

444

f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()

445

return f'SAPISIDHASH {time_now}_{sapisidhash}'

446

447

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

448

note='Downloading API JSON', errnote='Unable to download API page',

449

context=None, api_key=None, api_hostname=None, default_client='web'):

450

451

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

452

data.update(query)

453

real_headers = self.generate_api_headers(default_client=default_client)

454

real_headers.update({'content-type': 'application/json'})

455

if headers:

456

real_headers.update(headers)

457

return self._download_json(

458

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

459

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

460

data=json.dumps(data).encode('utf8'), headers=real_headers,

461

query={'key': api_key or self._extract_api_key()})

462

463

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

464

data = self._search_regex(

465

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

466

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

467

if data:

468

return self._parse_json(data, item_id, fatal=fatal)

469

470

@staticmethod

471

def _extract_session_index(*data):

472

"""

473

Index of current account in account list.

474

See: https://github.com/yt-dlp/yt-dlp/pull/519

475

"""

476

for ytcfg in data:

477

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

478

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

483

if ytcfg:

484

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

489

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

490

'identity token', default=None, fatal=False)

491

492

@staticmethod

493

def _extract_account_syncid(*args):

494

"""

495

Extract syncId required to download private playlists of secondary channels

496

@params response and/or ytcfg

497

"""

498

for data in args:

499

# ytcfg includes channel_syncid if on secondary channel

500

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

505

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

506

if len(sync_ids) >= 2 and sync_ids[1]:

507

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

508

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

513

"""

514

Extracts visitorData from an API response or ytcfg

515

Appears to be used to track session state

516

"""

517

return get_first(

518

args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),

expected_type=str)

@property

def is_authenticated(self):

523

return bool(self._generate_sapisidhash_header())

524

525

def extract_ytcfg(self, video_id, webpage):

526

if not webpage:

527

return {}

528

return self._parse_json(

529

self._search_regex(

530

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

531

default='{}'), video_id, fatal=False) or {}

532

533

def generate_api_headers(

534

self, *, ytcfg=None, account_syncid=None, session_index=None,

535

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

536

537

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

538

headers = {

539

'X-YouTube-Client-Name': compat_str(

540

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

541

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

542

'Origin': origin,

543

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

544

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

545

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

546

}

547

if session_index is None:

548

session_index = self._extract_session_index(ytcfg)

549

if account_syncid or session_index is not None:

550

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

551

552

auth = self._generate_sapisidhash_header(origin)

553

if auth is not None:

554

headers['Authorization'] = auth

555

headers['X-Origin'] = origin

556

return {h: v for h, v in headers.items() if v is not None}

557

558

@staticmethod

559

def _build_api_continuation_query(continuation, ctp=None):

560

query = {

561

'continuation': continuation

562

}

563

# TODO: Inconsistency with clickTrackingParams.

564

# Currently we have a fixed ctp contained within context (from ytcfg)

565

# and a ctp in root query for continuation.

566

if ctp:

567

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

572

next_continuation = try_get(

573

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

574

lambda x: x['continuation']['reloadContinuationData']), dict)

575

if not next_continuation:

576

return

577

continuation = next_continuation.get('continuation')

578

if not continuation:

579

return

580

ctp = next_continuation.get('clickTrackingParams')

581

return cls._build_api_continuation_query(continuation, ctp)

582

583

@classmethod

584

def _extract_continuation_ep_data(cls, continuation_ep: dict):

585

if isinstance(continuation_ep, dict):

586

continuation = try_get(

587

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

588

if not continuation:

589

return

590

ctp = continuation_ep.get('clickTrackingParams')

591

return cls._build_api_continuation_query(continuation, ctp)

592

593

@classmethod

594

def _extract_continuation(cls, renderer):

595

next_continuation = cls._extract_next_continuation_data(renderer)

596

if next_continuation:

597

return next_continuation

598

599

contents = []

600

for key in ('contents', 'items'):

601

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

602

603

for content in contents:

604

if not isinstance(content, dict):

605

continue

606

continuation_ep = try_get(

607

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

608

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

609

dict)

610

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

616

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

617

if not isinstance(alert_dict, dict):

618

continue

619

for alert in alert_dict.values():

620

alert_type = alert.get('type')

621

if not alert_type:

622

continue

623

message = cls._get_text(alert, 'text')

624

if message:

625

yield alert_type, message

626

627

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

628

errors = []

629

warnings = []

630

for alert_type, alert_message in alerts:

631

if alert_type.lower() == 'error' and fatal:

632

errors.append([alert_type, alert_message])

633

else:

634

warnings.append([alert_type, alert_message])

635

636

for alert_type, alert_message in (warnings + errors[:-1]):

637

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)

638

if errors:

639

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

640

641

def _extract_and_report_alerts(self, data, *args, **kwargs):

642

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

643

644

def _extract_badges(self, renderer: dict):

645

badges = set()

646

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

647

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

648

if label:

649

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

654

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

659

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

660

obj = [obj]

661

for item in obj:

662

text = try_get(item, lambda x: x['simpleText'], compat_str)

663

if text:

664

return text

665

runs = try_get(item, lambda x: x['runs'], list) or []

666

if not runs and isinstance(item, list):

667

runs = item

668

669

runs = runs[:min(len(runs), max_runs or len(runs))]

670

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

675

count_text = self._get_text(data, *path_list) or ''

676

count = parse_count(count_text)

677

if count is None:

678

count = str_to_int(

679

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

684

"""

685

Extract thumbnails from thumbnails dict

686

@param path_list: path list to level that contains 'thumbnails' key

687

"""

688

thumbnails = []

689

for path in path_list or [()]:

690

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

691

thumbnail_url = url_or_none(thumbnail.get('url'))

692

if not thumbnail_url:

693

continue

694

# Sometimes youtube gives a wrong thumbnail URL. See:

695

# https://github.com/yt-dlp/yt-dlp/issues/233

696

# https://github.com/ytdl-org/youtube-dl/issues/28023

697

if 'maxresdefault' in thumbnail_url:

698

thumbnail_url = thumbnail_url.split('?')[0]

699

thumbnails.append({

700

'url': thumbnail_url,

701

'height': int_or_none(thumbnail.get('height')),

702

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

708

"""

709

Extracts a relative time from string and converts to dt object

710

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

715

if start:

716

return datetime_from_str(start)

717

try:

718

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

723

text = self._get_text(renderer, *path_list) or ''

724

dt = self.extract_relative_time(text)

725

timestamp = None

726

if isinstance(dt, datetime.datetime):

727

timestamp = calendar.timegm(dt.timetuple())

728

729

if timestamp is None:

730

timestamp = (

731

unified_timestamp(text) or unified_timestamp(

732

self._search_regex(

733

(r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'),

734

text.lower(), 'time text', default=None)))

735

736

if text and timestamp is None:

737

self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)

738

return timestamp, text

739

740

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

741

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

742

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

747

if check_get_keys is None:

748

check_get_keys = []

749

while count < retries:

750

count += 1

751

if last_error:

752

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

753

try:

754

response = self._call_api(

755

ep=ep, fatal=True, headers=headers,

756

video_id=item_id, query=query,

757

context=self._extract_context(ytcfg, default_client),

758

api_key=self._extract_api_key(ytcfg, default_client),

759

api_hostname=api_hostname, default_client=default_client,

760

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

761

except ExtractorError as e:

762

if isinstance(e.cause, network_exceptions):

763

if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):

764

e.cause.seek(0)

765

yt_error = try_get(

766

self._parse_json(e.cause.read().decode(), item_id, fatal=False),

767

lambda x: x['error']['message'], compat_str)

768

if yt_error:

769

self._report_alerts([('ERROR', yt_error)], fatal=False)

770

# Downloading page may result in intermittent 5xx HTTP error

771

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

772

# We also want to catch all other network exceptions since errors in later pages can be troublesome

773

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

774

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

775

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

787

except ExtractorError as e:

788

# YouTube servers may return errors we want to retry on in a 200 OK response

789

# See: https://github.com/yt-dlp/yt-dlp/issues/839

790

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

796

return

797

if not check_get_keys or dict_get(response, check_get_keys):

798

break

799

# Youtube sometimes sends incomplete data

800

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

801

last_error = 'Incomplete data received'

802

if count >= retries:

803

if fatal:

804

raise ExtractorError(last_error)

805

else:

806

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

812

return re.match(r'https?://music\.youtube\.com/', url) is not None

813

814

def _extract_video(self, renderer):

815

video_id = renderer.get('videoId')

816

title = self._get_text(renderer, 'title')

817

description = self._get_text(renderer, 'descriptionSnippet')

818

duration = parse_duration(self._get_text(

819

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

820

view_count = self._get_count(renderer, 'viewCountText')

821

822

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

823

channel_id = traverse_obj(

824

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)

825

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

826

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

827

overlay_style = traverse_obj(

828

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)

829

badges = self._extract_badges(renderer)

830

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

835

'id': video_id,

836

'url': f'https://www.youtube.com/watch?v={video_id}',

837

'title': title,

838

'description': description,

839

'duration': duration,

840

'view_count': view_count,

841

'uploader': uploader,

842

'channel_id': channel_id,

843

'thumbnails': thumbnails,

844

# 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),

845

'live_status': ('is_upcoming' if scheduled_timestamp is not None

846

else 'was_live' if 'streamed' in time_text.lower()

847

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

848

else None),

849

'release_timestamp': scheduled_timestamp,

850

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

855

IE_DESC = 'YouTube'

856

_VALID_URL = r"""(?x)^

857

(

858

(?:https?://|//) # http(s):// or protocol-independent URL

859

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

860

(?:www\.)?deturl\.com/www\.youtube\.com|

861

(?:www\.)?pwnyoutube\.com|

862

(?:www\.)?hooktube\.com|

863

(?:www\.)?yourepeat\.com|

864

tube\.majestyc\.net|

865

%(invidious)s|

866

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

867

(?:.*?\#/)? # handle anchor (#/) redirect urls

868

(?: # the various things that can precede the ID:

869

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

870

|(?: # or the v= param in all its forms

871

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

872

(?:\?|\#!?) # the params delimiter ? or # or #!

873

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

879

vid\.plus| # or vid.plus/xxxx

880

zwearz\.com/watch| # or zwearz.com/watch/xxxx

881

%(invidious)s

882

)/

883

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

884

)

885

)? # all until now is optional -> you can pass the naked ID

886

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

887

(?(1).+)? # if we found the ID, everything can follow

888

(?:\#|$)""" % {

889

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

890

}

891

_PLAYER_INFO_RE = (

892

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

893

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

894

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

895

)

896

_formats = {

897

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

898

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

899

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

900

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

901

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

902

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

903

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

904

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

905

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

906

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

907

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

908

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

909

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

910

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

911

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

912

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

913

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

914

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

919

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

920

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

921

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

922

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

923

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

924

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

925

926

# Apple HTTP Live Streaming

927

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

928

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

929

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

930

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

931

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

932

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

933

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

934

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

935

936

# DASH mp4 video

937

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

938

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

939

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

940

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

941

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

942

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

943

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

944

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

945

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

946

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

947

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

948

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

949

950

# Dash mp4 audio

951

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

952

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

953

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

954

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

955

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

956

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

957

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

958

959

# Dash webm

960

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

961

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

962

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

963

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

964

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

965

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

966

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

967

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

968

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

969

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

970

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

971

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

972

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

973

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

974

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

975

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

976

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

977

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

978

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

979

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

980

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

981

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

982

983

# Dash webm audio

984

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

985

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

986

987

# Dash webm audio with opus inside

988

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

989

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

990

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

991

992

# RTMP (unnamed)

993

'_rtmp': {'protocol': 'rtmp'},

994

995

# av01 video only formats sometimes served with "unknown" codecs

996

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

997

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

998

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

999

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1000

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1001

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1002

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1003

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1004

}

1005

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1017

'uploader': 'Philipp Hagemeister',

1018

'uploader_id': 'phihag',

1019

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1020

'channel': 'Philipp Hagemeister',

1021

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1022

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1023

'upload_date': '20121002',

1024

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1025

'categories': ['Science & Technology'],

1026

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1031

'playable_in_embed': True,

1032

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1033

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1041

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1046

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1047

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1048

'uploader': 'SET India',

1049

'uploader_id': 'setindia',

1050

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1051

'age_limit': 18,

1052

},

1053

'skip': 'Private video',

1054

},

1055

{

1056

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1057

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1062

'uploader': 'Philipp Hagemeister',

1063

'uploader_id': 'phihag',

1064

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1065

'channel': 'Philipp Hagemeister',

1066

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1067

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1068

'upload_date': '20121002',

1069

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1070

'categories': ['Science & Technology'],

1071

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1076

'playable_in_embed': True,

1077

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1078

'live_status': 'not_live',

'age_limit': 0,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1087

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1092

'uploader_id': '8KVIDEO',

1093

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1094

'description': '',

1095

'uploader': '8KVIDEO',

1096

'title': 'UHDTV TEST 8K VIDEO.mp4'

1097

},

1098

'params': {

1099

'youtube_include_dash_manifest': True,

1100

'format': '141',

1101

},

1102

'skip': 'format 141 not served anymore',

1103

},

1104

# DASH manifest with encrypted signature

1105

{

1106

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1111

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1112

'duration': 244,

1113

'uploader': 'AfrojackVEVO',

1114

'uploader_id': 'AfrojackVEVO',

1115

'upload_date': '20131011',

1116

'abr': 129.495,

1117

'like_count': int,

1118

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1119

'playable_in_embed': True,

1120

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1121

'view_count': int,

1122

'track': 'The Spark',

1123

'live_status': 'not_live',

1124

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1125

'channel': 'Afrojack',

1126

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1127

'tags': 'count:19',

1128

'availability': 'public',

1129

'categories': ['Music'],

1130

'age_limit': 0,

1131

'alt_title': 'The Spark',

1132

},

1133

'params': {

1134

'youtube_include_dash_manifest': True,

1135

'format': '141/bestaudio[ext=m4a]',

1136

},

1137

},

1138

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1139

{

1140

'note': 'Embed allowed age-gate video',

1141

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1146

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1147

'duration': 142,

1148

'uploader': 'The Witcher',

1149

'uploader_id': 'WitcherGame',

1150

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1151

'upload_date': '20140605',

1152

'age_limit': 18,

1153

'categories': ['Gaming'],

1154

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1155

'availability': 'needs_auth',

1156

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1157

'like_count': int,

1158

'channel': 'The Witcher',

1159

'live_status': 'not_live',

1160

'tags': 'count:17',

1161

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1162

'playable_in_embed': True,

'view_count': int,

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1168

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1173

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1174

'upload_date': '20200408',

1175

'uploader_id': 'FlyingKitty900',

1176

'uploader': 'FlyingKitty',

1177

'age_limit': 18,

1178

'availability': 'needs_auth',

1179

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1180

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1181

'channel': 'FlyingKitty',

1182

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1183

'view_count': int,

1184

'categories': ['Entertainment'],

1185

'live_status': 'not_live',

1186

'tags': ['Flyingkitty', 'godzilla 2'],

1187

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1188

'like_count': int,

1189

'duration': 177,

1190

'playable_in_embed': True,

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1195

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1196

'info_dict': {

1197

'id': 'Tq92D6wQ1mg',

1198

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1199

'ext': 'mp4',

1200

'upload_date': '20191227',

1201

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1202

'uploader': 'Projekt Melody',

1203

'description': 'md5:17eccca93a786d51bc67646756894066',

1204

'age_limit': 18,

1205

'like_count': int,

1206

'availability': 'needs_auth',

1207

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1208

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1209

'view_count': int,

1210

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1211

'channel': 'Projekt Melody',

1212

'live_status': 'not_live',

1213

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1214

'playable_in_embed': True,

1215

'categories': ['Entertainment'],

1216

'duration': 106,

1217

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

},

},

{

'note': 'Non-Agegated non-embeddable video',

1222

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1227

'uploader': 'Herr Lurik',

1228

'uploader_id': 'st3in234',

1229

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1230

'upload_date': '20130730',

1231

'track': 'Such mich find mich',

1232

'age_limit': 0,

1233

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1234

'like_count': int,

1235

'playable_in_embed': False,

1236

'creator': 'OOMPH!',

1237

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1238

'view_count': int,

1239

'alt_title': 'Such mich find mich',

1240

'duration': 210,

1241

'channel': 'Herr Lurik',

1242

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1243

'categories': ['Music'],

1244

'availability': 'public',

1245

'uploader_url': 'http://www.youtube.com/user/st3in234',

1246

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1247

'live_status': 'not_live',

'artist': 'OOMPH!',

},

},

{

'note': 'Non-bypassable age-gated video',

1253

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1254

'only_matching': True,

1255

},

1256

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1257

# YouTube Red ad is not captured for creator

1258

{

1259

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1265

'uploader_id': 'deadmau5',

1266

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1267

'creator': 'deadmau5',

1268

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1269

'uploader': 'deadmau5',

1270

'title': 'Deadmau5 - Some Chords (HD)',

1271

'alt_title': 'Some Chords',

1272

'availability': 'public',

1273

'tags': 'count:14',

1274

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1275

'view_count': int,

1276

'live_status': 'not_live',

1277

'channel': 'deadmau5',

1278

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1279

'like_count': int,

1280

'track': 'Some Chords',

1281

'artist': 'deadmau5',

1282

'playable_in_embed': True,

1283

'age_limit': 0,

1284

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1285

'categories': ['Music'],

1286

'album': 'Some Chords',

1287

},

1288

'expected_warnings': [

1289

'DASH manifest missing',

1290

]

1291

},

1292

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1293

{

1294

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1300

'uploader_id': 'olympic',

1301

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1302

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1303

'uploader': 'Olympics',

1304

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1305

'like_count': int,

1306

'release_timestamp': 1343767800,

1307

'playable_in_embed': True,

1308

'categories': ['Sports'],

1309

'release_date': '20120731',

1310

'channel': 'Olympics',

1311

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1312

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1313

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1314

'age_limit': 0,

1315

'availability': 'public',

1316

'live_status': 'was_live',

1317

'view_count': int,

1318

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1319

},

1320

'params': {

1321

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1331

'duration': 85,

1332

'upload_date': '20110310',

1333

'uploader_id': 'AllenMeow',

1334

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1335

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1336

'uploader': '孫ᄋᄅ',

1337

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1338

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1343

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1344

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1345

'view_count': int,

1346

'categories': ['People & Blogs'],

1347

'like_count': int,

1348

'live_status': 'not_live',

1349

'availability': 'unlisted',

1350

},

1351

},

1352

# url_encoded_fmt_stream_map is empty string

1353

{

1354

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1359

'description': '',

1360

'upload_date': '20150404',

1361

'uploader_id': 'spbelect',

1362

'uploader': 'Наблюдатели Петербурга',

1363

},

1364

'params': {

1365

'skip_download': 'requires avconv',

1366

},

1367

'skip': 'This live event has ended.',

1368

},

1369

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1370

{

1371

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1376

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1377

'duration': 220,

1378

'upload_date': '20150625',

1379

'uploader_id': 'dorappi2000',

1380

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1381

'uploader': 'dorappi2000',

1382

'formats': 'mincount:31',

1383

},

1384

'skip': 'not actual anymore',

1385

},

1386

# DASH manifest with segment_list

1387

{

1388

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1389

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1394

'uploader': 'Airtek',

1395

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1396

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1397

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1398

},

1399

'params': {

1400

'youtube_include_dash_manifest': True,

1401

'format': '135', # bestvideo

1402

},

1403

'skip': 'This live event has ended.',

1404

},

1405

{

1406

# Multifeed videos (multiple cameras), URL is for Main Camera

1407

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1408

'info_dict': {

1409

'id': 'jvGDaLqkpTg',

1410

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1411

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1418

'description': 'md5:e03b909557865076822aa169218d6a5d',

1419

'duration': 10643,

1420

'upload_date': '20161111',

1421

'uploader': 'Team PGP',

1422

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1423

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1430

'description': 'md5:e03b909557865076822aa169218d6a5d',

1431

'duration': 10991,

1432

'upload_date': '20161111',

1433

'uploader': 'Team PGP',

1434

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1435

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1442

'description': 'md5:e03b909557865076822aa169218d6a5d',

1443

'duration': 10995,

1444

'upload_date': '20161111',

1445

'uploader': 'Team PGP',

1446

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1447

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1454

'description': 'md5:e03b909557865076822aa169218d6a5d',

1455

'duration': 10990,

1456

'upload_date': '20161111',

1457

'uploader': 'Team PGP',

1458

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1459

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1464

},

1465

'skip': 'Not multifeed anymore',

1466

},

1467

{

1468

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1469

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1470

'info_dict': {

1471

'id': 'gVfLd0zydlo',

1472

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1473

},

1474

'playlist_count': 2,

1475

'skip': 'Not multifeed anymore',

1476

},

1477

{

1478

'url': 'https://vid.plus/FlRa-iH7PGw',

1479

'only_matching': True,

1480

},

1481

{

1482

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1483

'only_matching': True,

1484

},

1485

{

1486

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1487

# Also tests cut-off URL expansion in video description (see

1488

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1489

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1490

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1495

'alt_title': 'Dark Walk',

1496

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1497

'duration': 133,

1498

'upload_date': '20151119',

1499

'uploader_id': 'IronSoulElf',

1500

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1501

'uploader': 'IronSoulElf',

1502

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1503

'track': 'Dark Walk',

1504

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1505

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1506

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1507

'categories': ['Film & Animation'],

1508

'view_count': int,

1509

'live_status': 'not_live',

1510

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1511

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1512

'tags': 'count:13',

1513

'availability': 'public',

1514

'channel': 'IronSoulElf',

1515

'playable_in_embed': True,

'like_count': int,

'age_limit': 0,

},

'params': {

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1525

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1526

'only_matching': True,

1527

},

1528

{

1529

# Video with yt:stretch=17:0

1530

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1535

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1536

'upload_date': '20151107',

1537

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1538

'uploader': 'CH GAMER DROID',

1539

},

1540

'params': {

1541

'skip_download': True,

1542

},

1543

'skip': 'This video does not exist.',

1544

},

1545

{

1546

# Video with incomplete 'yt:stretch=16:'

1547

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1548

'only_matching': True,

1549

},

1550

{

1551

# Video licensed under Creative Commons

1552

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1557

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1558

'duration': 721,

1559

'upload_date': '20150127',

1560

'uploader_id': 'BerkmanCenter',

1561

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1562

'uploader': 'The Berkman Klein Center for Internet & Society',

1563

'license': 'Creative Commons Attribution license (reuse allowed)',

1564

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1565

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1566

'like_count': int,

1567

'age_limit': 0,

1568

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1569

'channel': 'The Berkman Klein Center for Internet & Society',

1570

'availability': 'public',

1571

'view_count': int,

1572

'categories': ['Education'],

1573

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1574

'live_status': 'not_live',

1575

'playable_in_embed': True,

1576

},

1577

'params': {

1578

'skip_download': True,

},

},

{

# Channel-like uploader_url

1583

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1588

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1589

'duration': 4060,

1590

'upload_date': '20151119',

1591

'uploader': 'Bernie Sanders',

1592

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1593

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1594

'license': 'Creative Commons Attribution license (reuse allowed)',

1595

'playable_in_embed': True,

1596

'tags': 'count:12',

1597

'like_count': int,

1598

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1599

'age_limit': 0,

1600

'availability': 'public',

1601

'categories': ['News & Politics'],

1602

'channel': 'Bernie Sanders',

1603

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1604

'view_count': int,

1605

'live_status': 'not_live',

1606

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1607

},

1608

'params': {

1609

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1614

'only_matching': True,

1615

},

1616

{

1617

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1618

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1619

'only_matching': True,

1620

},

1621

{

1622

# Rental video preview

1623

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1628

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1629

'upload_date': '20150811',

1630

'uploader': 'FlixMatrix',

1631

'uploader_id': 'FlixMatrixKaravan',

1632

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1633

'license': 'Standard YouTube License',

1634

},

1635

'params': {

1636

'skip_download': True,

1637

},

1638

'skip': 'This video is not available.',

1639

},

1640

{

1641

# YouTube Red video with episode data

1642

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1647

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1648

'duration': 2085,

1649

'upload_date': '20170118',

1650

'uploader': 'Vsauce',

1651

'uploader_id': 'Vsauce',

1652

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1653

'series': 'Mind Field',

1654

'season_number': 1,

1655

'episode_number': 1,

1656

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1657

'tags': 'count:12',

1658

'view_count': int,

1659

'availability': 'public',

1660

'age_limit': 0,

1661

'channel': 'Vsauce',

1662

'episode': 'Episode 1',

1663

'categories': ['Entertainment'],

1664

'season': 'Season 1',

1665

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1666

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1667

'like_count': int,

1668

'playable_in_embed': True,

1669

'live_status': 'not_live',

1670

},

1671

'params': {

1672

'skip_download': True,

1673

},

1674

'expected_warnings': [

1675

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1680

# as inappropriate or offensive to some audiences.

1681

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1686

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1687

'duration': 965,

1688

'upload_date': '20140124',

1689

'uploader': 'New Century Foundation',

1690

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1691

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1692

},

1693

'params': {

1694

'skip_download': True,

1695

},

1696

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1701

'only_matching': True,

1702

},

1703

{

1704

# geo restricted to JP

1705

'url': 'sJL6WA-aGkQ',

1706

'only_matching': True,

1707

},

1708

{

1709

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1710

'only_matching': True,

1711

},

1712

{

1713

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1714

'only_matching': True,

1715

},

1716

{

1717

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1718

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1719

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1724

'only_matching': True,

1725

},

1726

{

1727

# Video with unsupported adaptive stream type formats

1728

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1733

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1734

'duration': 433,

1735

'upload_date': '20130923',

1736

'uploader': 'Amelia Putri Harwita',

1737

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1738

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1739

'formats': 'maxcount:10',

1740

},

1741

'params': {

1742

'skip_download': True,

1743

'youtube_include_dash_manifest': False,

1744

},

1745

'skip': 'not actual anymore',

1746

},

1747

{

1748

# Youtube Music Auto-generated description

1749

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1754

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1755

'upload_date': '20190312',

1756

'uploader': 'Stephen - Topic',

1757

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1758

'artist': 'Stephen',

1759

'track': 'Voyeur Girl',

1760

'album': 'it\'s too much love to know my dear',

1761

'release_date': '20190313',

1762

'release_year': 2019,

1763

'alt_title': 'Voyeur Girl',

1764

'view_count': int,

1765

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1766

'playable_in_embed': True,

1767

'like_count': int,

1768

'categories': ['Music'],

1769

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1770

'channel': 'Stephen',

1771

'availability': 'public',

1772

'creator': 'Stephen',

1773

'duration': 169,

1774

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1775

'age_limit': 0,

1776

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1777

'tags': 'count:11',

1778

'live_status': 'not_live',

1779

},

1780

'params': {

1781

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1786

'only_matching': True,

1787

},

1788

{

1789

# invalid -> valid video id redirection

1790

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1795

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1796

'upload_date': '20090125',

1797

'uploader': 'Prochorowka',

1798

'uploader_id': 'Prochorowka',

1799

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1800

'artist': 'Panjabi MC',

1801

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1802

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1803

},

1804

'params': {

1805

'skip_download': True,

1806

},

1807

'skip': 'Video unavailable',

1808

},

1809

{

1810

# empty description results in an empty string

1811

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1818

'uploader_id': 'ElevageOrVert',

1819

'uploader': 'ElevageOrVert',

1820

'view_count': int,

1821

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1822

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1823

'like_count': int,

1824

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1825

'tags': [],

1826

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1827

'availability': 'public',

1828

'age_limit': 0,

1829

'categories': ['Pets & Animals'],

1830

'duration': 7,

1831

'playable_in_embed': True,

1832

'live_status': 'not_live',

1833

'channel': 'ElevageOrVert',

1834

},

1835

'params': {

1836

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1841

# see [2] for an example with '};' inside ytInitialPlayerResponse

1842

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1843

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1844

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1849

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1850

'upload_date': '20130831',

1851

'uploader_id': 'kudvenkat',

1852

'uploader': 'kudvenkat',

1853

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1854

'like_count': int,

1855

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1856

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1857

'live_status': 'not_live',

1858

'categories': ['Education'],

1859

'availability': 'public',

1860

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1861

'tags': 'count:12',

1862

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1867

},

1868

'params': {

1869

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1874

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1875

'only_matching': True,

1876

},

1877

{

1878

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1879

'only_matching': True,

1880

},

1881

{

1882

# https://github.com/ytdl-org/youtube-dl/pull/28094

1883

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1889

'upload_date': '20141120',

1890

'uploader': 'The Cinematic Orchestra - Topic',

1891

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1892

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1893

'artist': 'The Cinematic Orchestra',

1894

'track': 'Burn Out',

1895

'album': 'Every Day',

1896

'like_count': int,

1897

'live_status': 'not_live',

1898

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1903

'creator': 'The Cinematic Orchestra',

1904

'channel': 'The Cinematic Orchestra',

1905

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1906

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1907

'availability': 'public',

1908

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1909

'categories': ['Music'],

1910

'playable_in_embed': True,

1911

},

1912

'params': {

1913

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1918

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1919

'only_matching': True,

1920

},

1921

{

1922

# controversial video, requires bpctr/contentCheckOk

1923

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1928

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1929

'uploader': 'CBS Mornings',

1930

'uploader_id': 'CBSThisMorning',

1931

'upload_date': '20140716',

1932

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1933

'duration': 170,

1934

'categories': ['News & Politics'],

1935

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1936

'view_count': int,

1937

'channel': 'CBS Mornings',

1938

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1939

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1940

'age_limit': 18,

1941

'availability': 'needs_auth',

1942

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1943

'like_count': int,

1944

'live_status': 'not_live',

1945

'playable_in_embed': True,

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1950

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1955

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1956

'upload_date': '20201120',

1957

'uploader': 'Walk around Japan',

1958

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1959

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1960

'duration': 1456,

1961

'categories': ['Travel & Events'],

1962

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1963

'view_count': int,

1964

'channel': 'Walk around Japan',

1965

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

1966

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

1967

'age_limit': 0,

1968

'availability': 'public',

1969

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1970

'live_status': 'not_live',

1971

'playable_in_embed': True,

1972

},

1973

'params': {

1974

'skip_download': True,

1975

},

1976

}, {

1977

# Has multiple audio streams

1978

'url': 'WaOKSUlf4TM',

1979

'only_matching': True

1980

}, {

1981

# Requires Premium: has format 141 when requested using YTM url

1982

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

1983

'only_matching': True

1984

}, {

1985

# multiple subtitles with same lang_code

1986

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

1987

'only_matching': True,

1988

}, {

1989

# Force use android client fallback

1990

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

1991

'info_dict': {

1992

'id': 'YOelRv7fMxY',

1993

'title': 'DIGGING A SECRET TUNNEL Part 1',

1994

'ext': '3gp',

1995

'upload_date': '20210624',

1996

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

1997

'uploader': 'colinfurze',

1998

'uploader_id': 'colinfurze',

1999

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2000

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2001

'duration': 596,

2002

'categories': ['Entertainment'],

2003

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2004

'view_count': int,

2005

'channel': 'colinfurze',

2006

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2007

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2008

'age_limit': 0,

2009

'availability': 'public',

2010

'like_count': int,

2011

'live_status': 'not_live',

2012

'playable_in_embed': True,

2013

},

2014

'params': {

2015

'format': '17', # 3gp format available on android

2016

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2021

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2022

'only_matching': True,

2023

'params': {

2024

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2029

'only_matching': True,

2030

}, {

2031

'note': 'Storyboards',

2032

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2038

'uploader_id': 'scishow',

2039

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2040

'upload_date': '20140324',

2041

'uploader': 'SciShow',

2042

'like_count': int,

2043

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2044

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2045

'view_count': int,

2046

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2047

'playable_in_embed': True,

2048

'tags': 'count:12',

2049

'uploader_url': 'http://www.youtube.com/user/scishow',

2050

'availability': 'public',

2051

'channel': 'SciShow',

2052

'live_status': 'not_live',

2053

'duration': 248,

2054

'categories': ['Education'],

2055

'age_limit': 0,

2056

}, 'params': {'format': 'mhtml', 'skip_download': True}

}

]

@classmethod

def suitable(cls, url):

2062

from ..utils import parse_qs

2063

2064

qs = parse_qs(url)

2065

if qs.get('list', [None])[0]:

2066

return False

2067

return super(YoutubeIE, cls).suitable(url)

2068

2069

def __init__(self, *args, **kwargs):

2070

super(YoutubeIE, self).__init__(*args, **kwargs)

2071

self._code_cache = {}

2072

self._player_cache = {}

2073

2074

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2075

lock = threading.Lock()

2076

2077

is_live = True

2078

start_time = time.time()

2079

formats = [f for f in formats if f.get('is_from_start')]

2080

2081

def refetch_manifest(format_id, delay):

2082

nonlocal formats, start_time, is_live

2083

if time.time() <= start_time + delay:

2084

return

2085

2086

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2087

video_details = traverse_obj(

2088

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2089

microformats = traverse_obj(

2090

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2091

expected_type=dict, default=[])

2092

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2093

start_time = time.time()

2094

2095

def mpd_feed(format_id, delay):

2096

"""

2097

@returns (manifest_url, manifest_stream_number, is_live) or None

2098

"""

2099

with lock:

2100

refetch_manifest(format_id, delay)

2101

2102

f = next((f for f in formats if f['format_id'] == format_id), None)

2103

if not f:

2104

if not is_live:

2105

self.to_screen(f'{video_id}: Video is no longer live')

2106

else:

2107

self.report_warning(

2108

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2109

return None

2110

return f['manifest_url'], f['manifest_stream_number'], is_live

2111

2112

for f in formats:

2113

f['protocol'] = 'http_dash_segments_generator'

2114

f['fragments'] = functools.partial(

2115

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2116

2117

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2118

FETCH_SPAN, MAX_DURATION = 5, 432000

2119

2120

mpd_url, stream_number, is_live = None, None, True

2121

2122

begin_index = 0

2123

download_start_time = ctx.get('start') or time.time()

2124

2125

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2126

if lack_early_segments:

2127

self.report_warning(bug_reports_message(

2128

'Starting download from the last 120 hours of the live stream since '

2129

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2130

lack_early_segments = True

2131

2132

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2133

fragments, fragment_base_url = None, None

2134

2135

def _extract_sequence_from_mpd(refresh_sequence):

2136

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2137

# Obtain from MPD's maximum seq value

2138

old_mpd_url = mpd_url

2139

last_error = ctx.pop('last_error', None)

2140

expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2141

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2142

or (mpd_url, stream_number, False))

2143

if not refresh_sequence:

2144

if expire_fast and not is_live:

2145

return False, last_seq

2146

elif old_mpd_url == mpd_url:

2147

return True, last_seq

2148

try:

2149

fmts, _ = self._extract_mpd_formats_and_subtitles(

2150

mpd_url, None, note=False, errnote=False, fatal=False)

2151

except ExtractorError:

2152

fmts = None

2153

if not fmts:

2154

no_fragment_score += 1

2155

return False, last_seq

2156

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2157

fragments = fmt_info['fragments']

2158

fragment_base_url = fmt_info['fragment_base_url']

2159

assert fragment_base_url

2160

2161

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2162

return True, _last_seq

2163

2164

while is_live:

2165

fetch_time = time.time()

2166

if no_fragment_score > 30:

2167

return

2168

if last_segment_url:

2169

# Obtain from "X-Head-Seqnum" header value from each segment

2170

try:

2171

urlh = self._request_webpage(

2172

last_segment_url, None, note=False, errnote=False, fatal=False)

2173

except ExtractorError:

2174

urlh = None

2175

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2176

if last_seq is None:

2177

no_fragment_score += 1

2178

last_segment_url = None

2179

continue

2180

else:

2181

should_continue, last_seq = _extract_sequence_from_mpd(True)

2182

if not should_continue:

2183

continue

2184

2185

if known_idx > last_seq:

2186

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2192

# skip from the start when it's negative value

2193

known_idx = last_seq + begin_index

2194

if lack_early_segments:

2195

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2196

try:

2197

for idx in range(known_idx, last_seq):

2198

# do not update sequence here or you'll get skipped some part of it

2199

should_continue, _ = _extract_sequence_from_mpd(False)

2200

if not should_continue:

2201

known_idx = idx - 1

2202

raise ExtractorError('breaking out of outer loop')

2203

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2204

yield {

2205

'url': last_segment_url,

2206

}

2207

if known_idx == last_seq:

2208

no_fragment_score += 5

2209

else:

2210

no_fragment_score = 0

2211

known_idx = last_seq

2212

except ExtractorError:

2213

continue

2214

2215

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2216

2217

def _extract_player_url(self, *ytcfgs, webpage=None):

2218

player_url = traverse_obj(

2219

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2220

get_all=False, expected_type=compat_str)

2221

if not player_url:

2222

return

2223

if player_url.startswith('//'):

2224

player_url = 'https:' + player_url

2225

elif not re.match(r'https?://', player_url):

2226

player_url = compat_urlparse.urljoin(

2227

'https://www.youtube.com', player_url)

2228

return player_url

2229

2230

def _download_player_url(self, video_id, fatal=False):

2231

res = self._download_webpage(

2232

'https://www.youtube.com/iframe_api',

2233

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2234

if res:

2235

player_version = self._search_regex(

2236

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2237

if player_version:

2238

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2239

2240

def _signature_cache_id(self, example_sig):

2241

""" Return a string representation of a signature """

2242

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2243

2244

@classmethod

2245

def _extract_player_info(cls, player_url):

2246

for player_re in cls._PLAYER_INFO_RE:

2247

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2252

return id_m.group('id')

2253

2254

def _load_player(self, video_id, player_url, fatal=True):

2255

player_id = self._extract_player_info(player_url)

2256

if player_id not in self._code_cache:

2257

code = self._download_webpage(

2258

player_url, video_id, fatal=fatal,

2259

note='Downloading player ' + player_id,

2260

errnote='Download of %s failed' % player_url)

2261

if code:

2262

self._code_cache[player_id] = code

2263

return self._code_cache.get(player_id)

2264

2265

def _extract_signature_function(self, video_id, player_url, example_sig):

2266

player_id = self._extract_player_info(player_url)

2267

2268

# Read from filesystem cache

2269

func_id = 'js_%s_%s' % (

2270

player_id, self._signature_cache_id(example_sig))

2271

assert os.path.basename(func_id) == func_id

2272

2273

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2274

if cache_spec is not None:

2275

return lambda s: ''.join(s[i] for i in cache_spec)

2276

2277

code = self._load_player(video_id, player_url)

2278

if code:

2279

res = self._parse_sig_js(code)

2280

2281

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2282

cache_res = res(test_string)

2283

cache_spec = [ord(c) for c in cache_res]

2284

2285

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2286

return res

2287

2288

def _print_sig_code(self, func, example_sig):

2289

if not self.get_param('youtube_print_sig_code'):

2290

return

2291

2292

def gen_sig_code(idxs):

2293

def _genslice(start, end, step):

2294

starts = '' if start == 0 else str(start)

2295

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2296

steps = '' if step == 1 else (':%d' % step)

2297

return 's[%s%s%s]' % (starts, ends, steps)

2298

2299

step = None

2300

# Quelch pyflakes warnings - start will be set when step is set

2301

start = '(Never used)'

2302

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2307

step = None

2308

continue

2309

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2319

2320

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2321

cache_res = func(test_string)

2322

cache_spec = [ord(c) for c in cache_res]

2323

expr_code = ' + '.join(gen_sig_code(cache_spec))

2324

signature_id_tuple = '(%s)' % (

2325

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2326

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2327

' return %s\n') % (signature_id_tuple, expr_code)

2328

self.to_screen('Extracted signature function:\n' + code)

2329

2330

def _parse_sig_js(self, jscode):

2331

funcname = self._search_regex(

2332

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2333

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2334

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2335

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2336

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2337

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2338

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2339

# Obsolete patterns

2340

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2341

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2342

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2343

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2344

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2345

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2346

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2347

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2348

jscode, 'Initial JS player signature function name', group='sig')

2349

2350

jsi = JSInterpreter(jscode)

2351

initial_function = jsi.extract_function(funcname)

2352

return lambda s: initial_function([s])

2353

2354

def _decrypt_signature(self, s, video_id, player_url):

2355

"""Turn the encrypted s field into a working signature"""

2356

2357

if player_url is None:

2358

raise ExtractorError('Cannot decrypt signature without player_url')

2359

2360

try:

2361

player_id = (player_url, self._signature_cache_id(s))

2362

if player_id not in self._player_cache:

2363

func = self._extract_signature_function(

2364

video_id, player_url, s

2365

)

2366

self._player_cache[player_id] = func

2367

func = self._player_cache[player_id]

2368

self._print_sig_code(func, s)

2369

return func(s)

2370

except Exception as e:

2371

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2372

2373

def _decrypt_nsig(self, s, video_id, player_url):

2374

"""Turn the encrypted n field into a working signature"""

2375

if player_url is None:

2376

raise ExtractorError('Cannot decrypt nsig without player_url')

2377

if player_url.startswith('//'):

2378

player_url = 'https:' + player_url

2379

elif not re.match(r'https?://', player_url):

2380

player_url = compat_urlparse.urljoin(

2381

'https://www.youtube.com', player_url)

2382

2383

sig_id = ('nsig_value', s)

2384

if sig_id in self._player_cache:

2385

return self._player_cache[sig_id]

2386

2387

try:

2388

player_id = ('nsig', player_url)

2389

if player_id not in self._player_cache:

2390

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2391

func = self._player_cache[player_id]

2392

self._player_cache[sig_id] = func(s)

2393

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2394

return self._player_cache[sig_id]

2395

except Exception as e:

2396

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2397

2398

def _extract_n_function_name(self, jscode):

2399

return self._search_regex(

2400

(r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]$',),

2401

jscode, 'Initial JS player n function name', group='nfunc')

2402

2403

def _extract_n_function(self, video_id, player_url):

2404

player_id = self._extract_player_info(player_url)

2405

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2406

2407

if func_code:

2408

jsi = JSInterpreter(func_code)

2409

else:

2410

jscode = self._load_player(video_id, player_url)

2411

funcname = self._extract_n_function_name(jscode)

2412

jsi = JSInterpreter(jscode)

2413

func_code = jsi.extract_function_code(funcname)

2414

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2415

2416

if self.get_param('youtube_print_sig_code'):

2417

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2418

2419

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2420

2421

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2422

"""

2423

Extract signatureTimestamp (sts)

2424

Required to tell API what sig/player version is in use.

2425

"""

2426

sts = None

2427

if isinstance(ytcfg, dict):

2428

sts = int_or_none(ytcfg.get('STS'))

2429

2430

if not sts:

2431

# Attempt to extract from player

2432

if player_url is None:

2433

error_msg = 'Cannot extract signature timestamp without player_url.'

2434

if fatal:

2435

raise ExtractorError(error_msg)

2436

self.report_warning(error_msg)

2437

return

2438

code = self._load_player(video_id, player_url, fatal=fatal)

2439

if code:

2440

sts = int_or_none(self._search_regex(

2441

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2442

'JS player signature timestamp', group='sts', fatal=fatal))

2443

return sts

2444

2445

def _mark_watched(self, video_id, player_responses):

2446

playback_url = get_first(

2447

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2448

expected_type=url_or_none)

2449

if not playback_url:

2450

self.report_warning('Unable to mark watched')

2451

return

2452

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2453

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2454

2455

# cpn generation algorithm is reverse engineered from base.js.

2456

# In fact it works even with dummy cpn.

2457

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2458

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2465

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2466

2467

self._download_webpage(

2468

playback_url, video_id, 'Marking watched',

2469

'Unable to mark watched', fatal=False)

2470

2471

@staticmethod

2472

def _extract_urls(webpage):

2473

# Embedded YouTube player

2474

entries = [

2475

unescapeHTML(mobj.group('url'))

2476

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2487

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2488

\1''', webpage)]

2489

2490

# lazyYT YouTube embed

2491

entries.extend(list(map(

2492

unescapeHTML,

2493

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2494

2495

# Wordpress "YouTube Video Importer" plugin

2496

matches = re.findall(r'''(?x)<div[^>]+

2497

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2498

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2499

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2505

urls = YoutubeIE._extract_urls(webpage)

2506

return urls[0] if urls else None

2507

2508

@classmethod

2509

def extract_id(cls, url):

2510

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2511

if mobj is None:

2512

raise ExtractorError('Invalid URL: %s' % url)

2513

return mobj.group('id')

2514

2515

def _extract_chapters_from_json(self, data, duration):

2516

chapter_list = traverse_obj(

2517

data, (

2518

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2519

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2520

), expected_type=list)

2521

2522

return self._extract_chapters(

2523

chapter_list,

2524

chapter_time=lambda chapter: float_or_none(

2525

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2526

chapter_title=lambda chapter: traverse_obj(

2527

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2528

duration=duration)

2529

2530

def _extract_chapters_from_engagement_panel(self, data, duration):

2531

content_list = traverse_obj(

2532

data,

2533

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2534

expected_type=list, default=[])

2535

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2536

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2541

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2542

chapter_time, chapter_title, duration)

2543

for contents in content_list

2544

))), [])

2545

2546

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2547

chapters = []

2548

last_chapter = {'start_time': 0}

2549

for idx, chapter in enumerate(chapter_list or []):

2550

title = chapter_title(chapter)

2551

start_time = chapter_time(chapter)

2552

if start_time is None:

2553

continue

2554

last_chapter['end_time'] = start_time

2555

if start_time < last_chapter['start_time']:

2556

if idx == 1:

2557

chapters.pop()

2558

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2559

else:

2560

self.report_warning(f'Invalid start time for chapter "{title}"')

2561

continue

2562

last_chapter = {'start_time': start_time, 'title': title}

2563

chapters.append(last_chapter)

2564

last_chapter['end_time'] = duration

2565

return chapters

2566

2567

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2568

return self._parse_json(self._search_regex(

2569

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

2570

regex), webpage, name, default='{}'), video_id, fatal=False)

2571

2572

def _extract_comment(self, comment_renderer, parent=None):

2573

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2578

2579

# note: timestamp is an estimate calculated from the current time and time_text

2580

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2581

author = self._get_text(comment_renderer, 'authorText')

2582

author_id = try_get(comment_renderer,

2583

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2584

2585

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2586

lambda x: x['likeCount']), compat_str)) or 0

2587

author_thumbnail = try_get(comment_renderer,

2588

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2589

2590

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2591

is_favorited = 'creatorHeart' in (try_get(

2592

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2597

'time_text': time_text,

2598

'like_count': votes,

2599

'is_favorited': is_favorited,

2600

'author': author,

2601

'author_id': author_id,

2602

'author_thumbnail': author_thumbnail,

2603

'author_is_uploader': author_is_uploader,

2604

'parent': parent or 'root'

2605

}

2606

2607

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2608

2609

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2610

2611

def extract_header(contents):

2612

_continuation = None

2613

for content in contents:

2614

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2615

expected_comment_count = self._get_count(

2616

comments_header_renderer, 'countText', 'commentsCount')

2617

2618

if expected_comment_count:

2619

tracker['est_total'] = expected_comment_count

2620

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2621

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2622

2623

sort_menu_item = try_get(

2624

comments_header_renderer,

2625

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2626

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2627

2628

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2629

if not _continuation:

2630

continue

2631

2632

sort_text = str_or_none(sort_menu_item.get('title'))

2633

if not sort_text:

2634

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2635

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2640

if not parent:

2641

tracker['current_page_thread'] = 0

2642

for content in contents:

2643

if not parent and tracker['total_parent_comments'] >= max_parents:

2644

yield

2645

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2646

comment_renderer = get_first(

2647

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2648

expected_type=dict, default={})

2649

2650

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2655

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2656

yield comment

2657

2658

# Attempt to get the replies

2659

comment_replies_renderer = try_get(

2660

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2661

2662

if comment_replies_renderer:

2663

tracker['current_page_thread'] += 1

2664

comment_entries_iter = self._comment_entries(

2665

comment_replies_renderer, ytcfg, video_id,

2666

parent=comment.get('id'), tracker=tracker)

2667

for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):

2668

yield reply_comment

2669

2670

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2676

total_parent_comments=0,

2677

total_reply_comments=0)

2678

2679

# TODO: Deprecated

2680

# YouTube comments have a max depth of 2

2681

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2682

if max_depth:

2683

self._downloader.deprecation_warning(

2684

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2685

if max_depth == 1 and parent:

2686

return

2687

2688

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2689

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2690

2691

continuation = self._extract_continuation(root_continuation_data)

2692

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2693

if message and not parent:

2694

self.report_warning(message, video_id=video_id)

2695

2696

response = None

2697

is_first_continuation = parent is None

2698

2699

for page_num in itertools.count(0):

2700

if not continuation:

2701

break

2702

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2703

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2704

if page_num == 0:

2705

if is_first_continuation:

2706

note_prefix = 'Downloading comment section API JSON'

2707

else:

2708

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2709

tracker['current_page_thread'], comment_prog_str)

2710

else:

2711

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2712

' ' if parent else '', ' replies' if parent else '',

2713

page_num, comment_prog_str)

2714

2715

response = self._extract_response(

2716

item_id=None, query=continuation,

2717

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2718

check_get_keys='onResponseReceivedEndpoints')

2719

2720

continuation_contents = traverse_obj(

2721

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2722

2723

continuation = None

2724

for continuation_section in continuation_contents:

2725

continuation_items = traverse_obj(

2726

continuation_section,

2727

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2728

get_all=False, expected_type=list) or []

2729

if is_first_continuation:

2730

continuation = extract_header(continuation_items)

2731

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

def _get_comments(self, ytcfg, video_id, contents, webpage):

2745

"""Entry for comment extraction"""

2746

def _real_comment_extract(contents):

2747

renderer = next((

2748

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2749

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2750

yield from self._comment_entries(renderer, ytcfg, video_id)

2751

2752

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2753

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2754

2755

@staticmethod

2756

def _get_checkok_params():

2757

return {'contentCheckOk': True, 'racyCheckOk': True}

2758

2759

@classmethod

2760

def _generate_player_context(cls, sts=None):

2761

context = {

2762

'html5Preference': 'HTML5_PREF_WANTS',

2763

}

2764

if sts is not None:

2765

context['signatureTimestamp'] = sts

2766

return {

2767

'playbackContext': {

2768

'contentPlaybackContext': context

2769

},

2770

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2775

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2776

return True

2777

2778

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2779

AGE_GATE_REASONS = (

2780

'confirm your age', 'age-restricted', 'inappropriate', # reason

2781

'age_verification_required', 'age_check_required', # status

2782

)

2783

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2784

2785

@staticmethod

2786

def _is_unplayable(player_response):

2787

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2788

2789

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2790

2791

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2792

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2793

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2794

headers = self.generate_api_headers(

2795

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

2796

2797

yt_query = {'videoId': video_id}

2798

yt_query.update(self._generate_player_context(sts))

2799

return self._extract_response(

2800

item_id=video_id, ep='player', query=yt_query,

2801

ytcfg=player_ytcfg, headers=headers, fatal=True,

2802

default_client=client,

2803

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2804

) or None

2805

2806

def _get_requested_clients(self, url, smuggled_data):

2807

requested_clients = []

2808

default = ['android', 'web']

2809

allowed_clients = sorted(

2810

[client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],

2811

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

2812

for client in self._configuration_arg('player_client'):

2813

if client in allowed_clients:

2814

requested_clients.append(client)

2815

elif client == 'default':

2816

requested_clients.extend(default)

2817

elif client == 'all':

2818

requested_clients.extend(allowed_clients)

2819

else:

2820

self.report_warning(f'Skipping unsupported client {client}')

2821

if not requested_clients:

2822

requested_clients = default

2823

2824

if smuggled_data.get('is_music_url') or self.is_music_url(url):

2825

requested_clients.extend(

2826

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

2827

2828

return orderedSet(requested_clients)

2829

2830

def _extract_player_ytcfg(self, client, video_id):

2831

url = {

2832

'web_music': 'https://music.youtube.com',

2833

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())

2838

return self.extract_ytcfg(video_id, webpage) or {}

2839

2840

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

2841

initial_pr = None

2842

if webpage:

2843

initial_pr = self._extract_yt_initial_variable(

2844

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2845

video_id, 'initial player response')

2846

2847

original_clients = clients

2848

clients = clients[::-1]

2849

prs = []

2850

2851

def append_client(client_name):

2852

if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:

2853

clients.append(client_name)

2854

2855

# Android player_response does not have microFormats which are needed for

2856

# extraction of some data. So we return the initial_pr with formats

2857

# stripped out even if not requested by the user

2858

# See: https://github.com/yt-dlp/yt-dlp/issues/501

2859

if initial_pr:

2860

pr = dict(initial_pr)

2861

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

2866

player_url = None

2867

while clients:

2868

client = clients.pop()

2869

player_ytcfg = master_ytcfg if client == 'web' else {}

2870

if 'configs' not in self._configuration_arg('player_skip'):

2871

player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg

2872

2873

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

2874

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

2875

if 'js' in self._configuration_arg('player_skip'):

2876

require_js_player = False

2877

player_url = None

2878

2879

if not player_url and not tried_iframe_fallback and require_js_player:

2880

player_url = self._download_player_url(video_id)

2881

tried_iframe_fallback = True

2882

2883

try:

2884

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

2885

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

2886

except ExtractorError as e:

2887

if last_error:

2888

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

2896

if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:

2897

append_client(client.replace('_agegate', '_creator'))

2898

elif self._is_agegated(pr):

2899

append_client(f'{client}_agegate')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

2905

return prs, player_url

2906

2907

def _extract_formats(self, streaming_data, video_id, player_url, is_live):

2908

itags, stream_ids = {}, []

2909

itag_qualities, res_qualities = {}, {}

2910

q = qualities([

2911

# Normally tiny is the smallest video-only formats. But

2912

# audio-only formats with unknown quality may get tagged as tiny

2913

'tiny',

2914

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

2915

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

2916

])

2917

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

2918

2919

for fmt in streaming_formats:

2920

if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):

2921

continue

2922

2923

itag = str_or_none(fmt.get('itag'))

2924

audio_track = fmt.get('audioTrack') or {}

2925

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

2926

if stream_id in stream_ids:

2927

continue

2928

2929

quality = fmt.get('quality')

2930

height = int_or_none(fmt.get('height'))

2931

if quality == 'tiny' or not quality:

2932

quality = fmt.get('audioQuality', '').lower() or quality

2933

# The 3gp format (17) in android client has a quality of "small",

2934

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

2940

if height:

2941

res_qualities[height] = quality

2942

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

2943

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

2944

# number of fragment that would subsequently requested with (`&sq=N`)

2945

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

2946

continue

2947

2948

fmt_url = fmt.get('url')

2949

if not fmt_url:

2950

sc = compat_parse_qs(fmt.get('signatureCipher'))

2951

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

2952

encrypted_sig = try_get(sc, lambda x: x['s'][0])

2953

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

2958

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

2959

fmt_url += '&' + sp + '=' + signature

2960

2961

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

2966

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

2967

except ExtractorError as e:

2968

self.report_warning(

2969

f'nsig extraction failed: You may experience throttling for some formats\n'

2970

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

2975

stream_ids.append(stream_id)

2976

2977

tbr = float_or_none(

2978

fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

2979

dct = {

2980

'asr': int_or_none(fmt.get('audioSampleRate')),

2981

'filesize': int_or_none(fmt.get('contentLength')),

2982

'format_id': itag,

2983

'format_note': join_nonempty(

2984

'%s%s' % (audio_track.get('displayName') or '',

2985

' (default)' if audio_track.get('audioIsDefault') else ''),

2986

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

2987

throttled and 'THROTTLED', delim=', '),

2988

'source_preference': -10 if throttled else -1,

2989

'fps': int_or_none(fmt.get('fps')) or None,

2990

'height': height,

2991

'quality': q(quality),

2992

'tbr': tbr,

2993

'url': fmt_url,

2994

'width': int_or_none(fmt.get('width')),

2995

'language': audio_track.get('id', '').split('.')[0],

2996

'language_preference': 1 if audio_track.get('audioIsDefault') else -1,

2997

}

2998

mime_mobj = re.match(

2999

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3000

if mime_mobj:

3001

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3002

dct.update(parse_codecs(mime_mobj.group(2)))

3003

no_audio = dct.get('acodec') == 'none'

3004

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3010

dct['downloader_options'] = {

3011

# Youtube throttles chunks >~10M

3012

'http_chunk_size': 10485760,

3013

}

3014

if dct.get('ext'):

3015

dct['container'] = dct['ext'] + '_dash'

3016

yield dct

3017

3018

live_from_start = is_live and self.get_param('live_from_start')

3019

skip_manifests = self._configuration_arg('skip')

3020

if not self.get_param('youtube_include_hls_manifest', True):

3021

skip_manifests.append('hls')

3022

get_dash = 'dash' not in skip_manifests and (

3023

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3024

get_hls = not live_from_start and 'hls' not in skip_manifests

3025

3026

def process_manifest_format(f, proto, itag):

3027

if itag in itags:

3028

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3029

return False

3030

itag = f'{itag}-{proto}'

3031

if itag:

3032

f['format_id'] = itag

3033

itags[itag] = proto

3034

3035

f['quality'] = next((

3036

q(qdict[val])

3037

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3038

if val in qdict), -1)

3039

return True

3040

3041

for sd in streaming_data:

3042

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3043

if hls_manifest_url:

3044

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3045

if process_manifest_format(f, 'hls', self._search_regex(

3046

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3047

yield f

3048

3049

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3050

if dash_manifest_url:

3051

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3052

if process_manifest_format(f, 'dash', f['format_id']):

3053

f['filesize'] = int_or_none(self._search_regex(

3054

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3055

if live_from_start:

3056

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3061

spec = get_first(

3062

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3063

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3068

args = args.split('#')

3069

counts = list(map(int_or_none, args[:5]))

3070

if len(args) != 8 or not all(counts):

3071

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3072

continue

3073

width, height, frame_count, cols, rows = counts

3074

N, sigh = args[6:]

3075

3076

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3077

fragment_count = frame_count / (cols * rows)

3078

fragment_duration = duration / fragment_count

3079

yield {

3080

'format_id': f'sb{i}',

3081

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'path': url.replace('$M', str(j)),

3091

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3092

} for j in range(math.ceil(fragment_count))],

3093

}

3094

3095

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3096

webpage = None

3097

if 'webpage' not in self._configuration_arg('player_skip'):

3098

webpage = self._download_webpage(

3099

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

3100

3101

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3102

3103

player_responses, player_url = self._extract_player_responses(

3104

self._get_requested_clients(url, smuggled_data),

3105

video_id, webpage, master_ytcfg)

3106

3107

return webpage, master_ytcfg, player_responses, player_url

3108

3109

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):

3110

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3111

is_live = get_first(video_details, 'isLive')

3112

if is_live is None:

3113

is_live = get_first(live_broadcast_details, 'isLiveNow')

3114

3115

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3116

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))

3117

3118

return live_broadcast_details, is_live, streaming_data, formats

3119

3120

def _real_extract(self, url):

3121

url, smuggled_data = unsmuggle_url(url, {})

3122

video_id = self._match_id(url)

3123

3124

base_url = self.http_scheme() + '//www.youtube.com/'

3125

webpage_url = base_url + 'watch?v=' + video_id

3126

3127

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3128

3129

playability_statuses = traverse_obj(

3130

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3131

3132

trailer_video_id = get_first(

3133

playability_statuses,

3134

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3135

expected_type=str)

3136

if trailer_video_id:

3137

return self.url_result(

3138

trailer_video_id, self.ie_key(), trailer_video_id)

3139

3140

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3141

if webpage else (lambda x: None))

3142

3143

video_details = traverse_obj(

3144

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3145

microformats = traverse_obj(

3146

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3147

expected_type=dict, default=[])

3148

video_title = (

3149

get_first(video_details, 'title')

3150

or self._get_text(microformats, (..., 'title'))

3151

or search_meta(['og:title', 'twitter:title', 'title']))

3152

video_description = get_first(video_details, 'shortDescription')

3153

3154

multifeed_metadata_list = get_first(

3155

player_responses,

3156

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3157

expected_type=str)

3158

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3159

if self.get_param('noplaylist'):

3160

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3165

# Unquote should take place before split on comma (,) since textual

3166

# fields may contain comma as well (see

3167

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3168

feed_data = compat_parse_qs(

3169

compat_urllib_parse_unquote_plus(feed))

3170

3171

def feed_entry(name):

3172

return try_get(

3173

feed_data, lambda x: x[name][0], compat_str)

3174

3175

feed_id = feed_entry('id')

3176

if not feed_id:

3177

continue

3178

feed_title = feed_entry('title')

3179

title = video_title

3180

if feed_title:

3181

title += ' (%s)' % feed_title

3182

entries.append({

3183

'_type': 'url_transparent',

3184

'ie_key': 'Youtube',

3185

'url': smuggle_url(

3186

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3187

{'force_singlefeed': True}),

3188

'title': title,

3189

})

3190

feed_ids.append(feed_id)

3191

self.to_screen(

3192

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3193

% (', '.join(feed_ids), video_id))

3194

return self.playlist_result(

3195

entries, video_id, video_title, video_description)

3196

3197

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3198

3199

if not formats:

3200

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3201

self.report_drm(video_id)

3202

pemr = get_first(

3203

playability_statuses,

3204

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3205

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3206

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3207

if subreason:

3208

if subreason == 'The uploader has not made this video available in your country.':

3209

countries = get_first(microformats, 'availableCountries')

3210

if not countries:

3211

regions_allowed = search_meta('regionsAllowed')

3212

countries = regions_allowed.split(',') if regions_allowed else None

3213

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3214

reason += f'. {subreason}'

3215

if reason:

3216

self.raise_no_formats(reason, expected=True)

3217

3218

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3219

if not keywords and webpage:

3220

keywords = [

3221

unescapeHTML(m.group('content'))

3222

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3223

for keyword in keywords:

3224

if keyword.startswith('yt:stretch='):

3225

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3226

if mobj:

3227

# NB: float is intentional for forcing float division

3228

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3233

f['stretched_ratio'] = ratio

3234

break

3235

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3236

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3237

if thumbnail_url:

3238

thumbnails.append({

3239

'url': thumbnail_url,

3240

})

3241

original_thumbnails = thumbnails.copy()

3242

3243

# The best resolution thumbnails sometimes does not appear in the webpage

3244

# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340

3245

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3246

thumbnail_names = [

3247

'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',

3248

'hqdefault', 'hq1', 'hq2', 'hq3', '0',

3249

'mqdefault', 'mq1', 'mq2', 'mq3',

3250

'default', '1', '2', '3'

3251

]

3252

n_thumbnail_names = len(thumbnail_names)

3253

thumbnails.extend({

3254

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3255

video_id=video_id, name=name, ext=ext,

3256

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3257

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3258

for thumb in thumbnails:

3259

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3260

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3261

self._remove_duplicate_formats(thumbnails)

3262

self._downloader._sort_thumbnails(original_thumbnails)

3263

3264

category = get_first(microformats, 'category') or search_meta('genre')

3265

channel_id = str_or_none(

3266

get_first(video_details, 'channelId')

3267

or get_first(microformats, 'externalChannelId')

3268

or search_meta('channelId'))

3269

duration = int_or_none(

3270

get_first(video_details, 'lengthSeconds')

3271

or get_first(microformats, 'lengthSeconds')

3272

or parse_duration(search_meta('duration'))) or None

3273

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3274

3275

live_content = get_first(video_details, 'isLiveContent')

3276

is_upcoming = get_first(video_details, 'isUpcoming')

3277

if is_live is None:

3278

if is_upcoming or live_content is False:

3279

is_live = False

3280

if is_upcoming is None and (live_content or is_live):

3281

is_upcoming = False

3282

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3283

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3284

if not duration and live_end_time and live_start_time:

3285

duration = live_end_time - live_start_time

3286

3287

if is_live and self.get_param('live_from_start'):

3288

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3289

3290

formats.extend(self._extract_storyboard(player_responses, duration))

3291

3292

# Source is given priority since formats that throttle are given lower source_preference

3293

# When throttling issue is fully fixed, remove this

3294

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3299

'formats': formats,

3300

'thumbnails': thumbnails,

3301

# The best thumbnail that we are sure exists. Prevents unnecessary

3302

# URL checking if user don't care about getting the best possible thumbnail

3303

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3304

'description': video_description,

3305

'upload_date': unified_strdate(

3306

get_first(microformats, 'uploadDate')

3307

or search_meta('uploadDate')),

3308

'uploader': get_first(video_details, 'author'),

3309

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3310

'uploader_url': owner_profile_url,

3311

'channel_id': channel_id,

3312

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

3313

'duration': duration,

3314

'view_count': int_or_none(

3315

get_first((video_details, microformats), (..., 'viewCount'))

3316

or search_meta('interactionCount')),

3317

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3318

'age_limit': 18 if (

3319

get_first(microformats, 'isFamilySafe') is False

3320

or search_meta('isFamilyFriendly') == 'false'

3321

or search_meta('og:restrictions:age') == '18+') else 0,

3322

'webpage_url': webpage_url,

3323

'categories': [category] if category else None,

3324

'tags': keywords,

3325

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3326

'is_live': is_live,

3327

'was_live': (False if is_live or is_upcoming or live_content is False

3328

else None if is_live is None or is_upcoming is None

3329

else live_content),

3330

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3331

'release_timestamp': live_start_time,

3332

}

3333

3334

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3335

if pctr:

3336

def get_lang_code(track):

3337

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3338

or track.get('languageCode'))

3339

3340

# Converted into dicts to remove duplicates

3341

captions = {

3342

get_lang_code(sub): sub

3343

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3344

translation_languages = {

3345

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3346

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3347

3348

def process_language(container, base_url, lang_code, sub_name, query):

3349

lang_subs = container.setdefault(lang_code, [])

3350

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': update_url_query(base_url, query),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3361

for lang_code, caption_track in captions.items():

3362

base_url = caption_track.get('baseUrl')

3363

if not base_url:

3364

continue

3365

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3366

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3371

if not caption_track.get('isTranslatable'):

3372

continue

3373

for trans_code, trans_name in translation_languages.items():

3374

if not trans_code:

3375

continue

3376

if caption_track.get('kind') != 'asr':

3377

trans_code += f'-{lang_code}'

3378

trans_name += format_field(lang_name, template=' from %s')

3379

process_language(

3380

automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})

3381

info['automatic_captions'] = automatic_captions

3382

info['subtitles'] = subtitles

3383

3384

parsed_url = compat_urllib_parse_urlparse(url)

3385

for component in [parsed_url.fragment, parsed_url.query]:

3386

query = compat_parse_qs(component)

3387

for k, v in query.items():

3388

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3389

d_k += '_time'

3390

if d_k not in info and k in s_ks:

3391

info[d_k] = parse_duration(query[k][0])

3392

3393

# Youtube Music Auto-generated description

3394

if video_description:

3395

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3396

if mobj:

3397

release_year = mobj.group('release_year')

3398

release_date = mobj.group('release_date')

3399

if release_date:

3400

release_date = release_date.replace('-', '')

3401

if not release_year:

3402

release_year = release_date[:4]

3403

info.update({

3404

'album': mobj.group('album'.strip()),

3405

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3406

'track': mobj.group('track').strip(),

3407

'release_date': release_date,

3408

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3414

webpage, self._YT_INITIAL_DATA_RE, video_id,

3415

'yt initial data')

3416

if not initial_data:

3417

query = {'videoId': video_id}

3418

query.update(self._get_checkok_params())

3419

initial_data = self._extract_response(

3420

item_id=video_id, ep='next', fatal=False,

3421

ytcfg=master_ytcfg, query=query,

3422

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3423

note='Downloading initial data API JSON')

3424

3425

try:

3426

# This will error if there is no livechat

3427

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3428

info.setdefault('subtitles', {})['live_chat'] = [{

3429

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

3430

'video_id': video_id,

3431

'ext': 'json',

3432

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

3433

}]

3434

except (KeyError, IndexError, TypeError):

pass

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3440

or self._extract_chapters_from_engagement_panel(initial_data, duration)

or None)

contents = try_get(

initial_data,

lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],

3446

list) or []

3447

for content in contents:

3448

vpir = content.get('videoPrimaryInfoRenderer')

3449

if vpir:

3450

stl = vpir.get('superTitleLink')

3451

if stl:

3452

stl = self._get_text(stl)

3453

if try_get(

3454

vpir,

3455

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3456

info['location'] = stl

3457

else:

3458

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

3459

if mobj:

3460

info.update({

3461

'series': mobj.group(1),

3462

'season_number': int(mobj.group(2)),

3463

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3468

list) or []):

3469

tbr = tlb.get('toggleButtonRenderer') or {}

3470

for getter, regex in [(

3471

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3472

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3473

lambda x: x['accessibility'],

3474

lambda x: x['accessibilityData']['accessibilityData'],

3475

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3476

label = (try_get(tbr, getter, dict) or {}).get('label')

3477

if label:

3478

mobj = re.match(regex, label)

3479

if mobj:

3480

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3481

break

3482

sbr_tooltip = try_get(

3483

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3484

if sbr_tooltip:

3485

like_count, dislike_count = sbr_tooltip.split(' / ')

3486

info.update({

3487

'like_count': str_to_int(like_count),

3488

'dislike_count': str_to_int(dislike_count),

3489

})

3490

vsir = content.get('videoSecondaryInfoRenderer')

3491

if vsir:

3492

info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))

3493

rows = try_get(

3494

vsir,

3495

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3496

list) or []

3497

multiple_songs = False

3498

for row in rows:

3499

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3500

multiple_songs = True

3501

break

3502

for row in rows:

3503

mrr = row.get('metadataRowRenderer') or {}

3504

mrr_title = mrr.get('title')

3505

if not mrr_title:

3506

continue

3507

mrr_title = self._get_text(mrr, 'title')

3508

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3509

if mrr_title == 'License':

3510

info['license'] = mrr_contents_text

3511

elif not multiple_songs:

3512

if mrr_title == 'Album':

3513

info['album'] = mrr_contents_text

3514

elif mrr_title == 'Artist':

3515

info['artist'] = mrr_contents_text

3516

elif mrr_title == 'Song':

3517

info['track'] = mrr_contents_text

3518

3519

fallbacks = {

3520

'channel': 'uploader',

3521

'channel_id': 'uploader_id',

3522

'channel_url': 'uploader_url',

3523

}

3524

for to, frm in fallbacks.items():

3525

if not info.get(to):

3526

info[to] = info.get(frm)

3527

3528

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3534

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3535

is_membersonly = None

3536

is_premium = None

3537

if initial_data and is_private is not None:

3538

is_membersonly = False

3539

is_premium = False

3540

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3541

badge_labels = set()

3542

for content in contents:

3543

if not isinstance(content, dict):

3544

continue

3545

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3546

for badge_label in badge_labels:

3547

if badge_label.lower() == 'members only':

3548

is_membersonly = True

3549

elif badge_label.lower() == 'premium':

3550

is_premium = True

3551

elif badge_label.lower() == 'unlisted':

3552

is_unlisted = True

3553

3554

info['availability'] = self._availability(

3555

is_private=is_private,

3556

needs_premium=is_premium,

3557

needs_subscription=is_membersonly,

3558

needs_auth=info['age_limit'] >= 18,

3559

is_unlisted=None if is_private is None else is_unlisted)

3560

3561

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3562

3563

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3569

3570

def _extract_channel_id(self, webpage):

3571

channel_id = self._html_search_meta(

3572

'channelId', webpage, 'channel id', default=None)

3573

if channel_id:

3574

return channel_id

3575

channel_url = self._html_search_meta(

3576

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3577

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3578

'twitter:app:url:googleplay'), webpage, 'channel url')

3579

return self._search_regex(

3580

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3581

channel_url, 'channel id')

3582

3583

@staticmethod

3584

def _extract_basic_item_renderer(item):

3585

# Modified from _extract_grid_item_renderer

3586

known_basic_renderers = (

3587

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'

3588

)

3589

for key, renderer in item.items():

3590

if not isinstance(renderer, dict):

3591

continue

3592

elif key in known_basic_renderers:

3593

return renderer

3594

elif key.startswith('grid') and key.endswith('Renderer'):

3595

return renderer

3596

3597

def _grid_entries(self, grid_renderer):

3598

for item in grid_renderer['items']:

3599

if not isinstance(item, dict):

3600

continue

3601

renderer = self._extract_basic_item_renderer(item)

3602

if not isinstance(renderer, dict):

3603

continue

3604

title = self._get_text(renderer, 'title')

3605

3606

# playlist

3607

playlist_id = renderer.get('playlistId')

3608

if playlist_id:

3609

yield self.url_result(

3610

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3611

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3616

if video_id:

3617

yield self._extract_video(renderer)

3618

continue

3619

# channel

3620

channel_id = renderer.get('channelId')

3621

if channel_id:

3622

yield self.url_result(

3623

'https://www.youtube.com/channel/%s' % channel_id,

3624

ie=YoutubeTabIE.ie_key(), video_title=title)

3625

continue

3626

# generic endpoint URL support

3627

ep_url = urljoin('https://www.youtube.com/', try_get(

3628

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3629

compat_str))

3630

if ep_url:

3631

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3632

if ie.suitable(ep_url):

3633

yield self.url_result(

3634

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3635

break

3636

3637

def _shelf_entries_from_content(self, shelf_renderer):

3638

content = shelf_renderer.get('content')

3639

if not isinstance(content, dict):

3640

return

3641

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3642

if renderer:

3643

# TODO: add support for nested playlists so each shelf is processed

3644

# as separate playlist

3645

# TODO: this includes only first N items

3646

for entry in self._grid_entries(renderer):

3647

yield entry

3648

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3654

ep = try_get(

3655

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3656

compat_str)

3657

shelf_url = urljoin('https://www.youtube.com', ep)

3658

if shelf_url:

3659

# Skipping links to another channels, note that checking for

3660

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3661

# will not work

3662

if skip_channels and '/channels?' in shelf_url:

3663

return

3664

title = self._get_text(shelf_renderer, 'title')

3665

yield self.url_result(shelf_url, video_title=title)

3666

# Shelf may not contain shelf URL, fallback to extraction from content

3667

for entry in self._shelf_entries_from_content(shelf_renderer):

3668

yield entry

3669

3670

def _playlist_entries(self, video_list_renderer):

3671

for content in video_list_renderer['contents']:

3672

if not isinstance(content, dict):

3673

continue

3674

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3675

if not isinstance(renderer, dict):

3676

continue

3677

video_id = renderer.get('videoId')

3678

if not video_id:

3679

continue

3680

yield self._extract_video(renderer)

3681

3682

def _rich_entries(self, rich_grid_renderer):

3683

renderer = try_get(

3684

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3685

video_id = renderer.get('videoId')

3686

if not video_id:

3687

return

3688

yield self._extract_video(renderer)

3689

3690

def _video_entry(self, video_renderer):

3691

video_id = video_renderer.get('videoId')

3692

if video_id:

3693

return self._extract_video(video_renderer)

3694

3695

def _post_thread_entries(self, post_thread_renderer):

3696

post_renderer = try_get(

3697

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3698

if not post_renderer:

3699

return

3700

# video attachment

3701

video_renderer = try_get(

3702

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3703

video_id = video_renderer.get('videoId')

3704

if video_id:

3705

entry = self._extract_video(video_renderer)

3706

if entry:

3707

yield entry

3708

# playlist attachment

3709

playlist_id = try_get(

3710

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3711

if playlist_id:

3712

yield self.url_result(

3713

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3714

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3715

# inline video links

3716

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3717

for run in runs:

3718

if not isinstance(run, dict):

3719

continue

3720

ep_url = try_get(

3721

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3722

if not ep_url:

3723

continue

3724

if not YoutubeIE.suitable(ep_url):

3725

continue

3726

ep_video_id = YoutubeIE._match_id(ep_url)

3727

if video_id == ep_video_id:

3728

continue

3729

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3730

3731

def _post_thread_continuation_entries(self, post_thread_continuation):

3732

contents = post_thread_continuation.get('contents')

3733

if not isinstance(contents, list):

3734

return

3735

for content in contents:

3736

renderer = content.get('backstagePostThreadRenderer')

3737

if not isinstance(renderer, dict):

3738

continue

3739

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3744

for content in contents:

3745

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3746

if video_renderer:

3747

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

3752

# continuation_list is modified in-place with continuation_list = [continuation_token]

3753

continuation_list[:] = [None]

3754

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3755

for content in contents:

3756

if not isinstance(content, dict):

3757

continue

3758

is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)

3759

if not is_renderer:

3760

renderer = content.get('richItemRenderer')

3761

if renderer:

3762

for entry in self._rich_entries(renderer):

3763

yield entry

3764

continuation_list[0] = self._extract_continuation(parent_renderer)

3765

continue

3766

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3767

for isr_content in isr_contents:

3768

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3773

'gridRenderer': self._grid_entries,

3774

'shelfRenderer': lambda x: self._shelf_entries(x),

3775

'backstagePostThreadRenderer': self._post_thread_entries,

3776

'videoRenderer': lambda x: [self._video_entry(x)],

3777

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

3778

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

3779

}

3780

for key, renderer in isr_content.items():

3781

if key not in known_renderers:

3782

continue

3783

for entry in known_renderers[key](renderer):

3784

if entry:

3785

yield entry

3786

continuation_list[0] = self._extract_continuation(renderer)

3787

break

3788

3789

if not continuation_list[0]:

3790

continuation_list[0] = self._extract_continuation(is_renderer)

3791

3792

if not continuation_list[0]:

3793

continuation_list[0] = self._extract_continuation(parent_renderer)

3794

3795

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

3796

continuation_list = [None]

3797

extract_entries = lambda x: self._extract_entries(x, continuation_list)

3798

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3803

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3804

for entry in extract_entries(parent_renderer):

3805

yield entry

3806

continuation = continuation_list[0]

3807

3808

for page_num in itertools.count(1):

3809

if not continuation:

3810

break

3811

headers = self.generate_api_headers(

3812

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

3813

response = self._extract_response(

3814

item_id='%s page %s' % (item_id, page_num),

3815

query=continuation, headers=headers, ytcfg=ytcfg,

3816

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

3821

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

3822

visitor_data = self._extract_visitor_data(response) or visitor_data

3823

3824

known_continuation_renderers = {

3825

'playlistVideoListContinuation': self._playlist_entries,

3826

'gridContinuation': self._grid_entries,

3827

'itemSectionContinuation': self._post_thread_continuation_entries,

3828

'sectionListContinuation': extract_entries, # for feeds

3829

}

3830

continuation_contents = try_get(

3831

response, lambda x: x['continuationContents'], dict) or {}

3832

continuation_renderer = None

3833

for key, value in continuation_contents.items():

3834

if key not in known_continuation_renderers:

3835

continue

3836

continuation_renderer = value

3837

continuation_list = [None]

3838

for entry in known_continuation_renderers[key](continuation_renderer):

3839

yield entry

3840

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

3841

break

3842

if continuation_renderer:

continue

known_renderers = {

'gridPlaylistRenderer': (self._grid_entries, 'items'),

3847

'gridVideoRenderer': (self._grid_entries, 'items'),

3848

'gridChannelRenderer': (self._grid_entries, 'items'),

3849

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

3850

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

3851

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

3852

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

3853

}

3854

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

3855

continuation_items = try_get(

3856

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

3857

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

3858

video_items_renderer = None

3859

for key, value in continuation_item.items():

3860

if key not in known_renderers:

3861

continue

3862

video_items_renderer = {known_renderers[key][1]: continuation_items}

3863

continuation_list = [None]

3864

for entry in known_renderers[key][0](video_items_renderer):

3865

yield entry

3866

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

3867

break

3868

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3874

for tab in tabs:

3875

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

3876

if renderer.get('selected') is True:

3877

return renderer

3878

else:

3879

raise ExtractorError('Unable to find selected tab')

3880

3881

@classmethod

3882

def _extract_uploader(cls, data):

3883

uploader = {}

3884

renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

3885

owner = try_get(

3886

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3887

if owner:

3888

uploader['uploader'] = owner.get('text')

3889

uploader['uploader_id'] = try_get(

3890

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3891

uploader['uploader_url'] = urljoin(

3892

'https://www.youtube.com/',

3893

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3894

return {k: v for k, v in uploader.items() if v is not None}

3895

3896

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

3897

playlist_id = title = description = channel_url = channel_name = channel_id = None

3898

tags = []

3899

3900

selected_tab = self._extract_selected_tab(tabs)

3901

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

3902

renderer = try_get(

3903

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3904

if renderer:

3905

channel_name = renderer.get('title')

3906

channel_url = renderer.get('channelUrl')

3907

channel_id = renderer.get('externalId')

3908

else:

3909

renderer = try_get(

3910

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3911

3912

if renderer:

3913

title = renderer.get('title')

3914

description = renderer.get('description', '')

3915

playlist_id = channel_id

3916

tags = renderer.get('keywords', '').split()

3917

3918

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

3919

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

3920

def _get_uncropped(url):

3921

return url_or_none((url or '').split('=')[0] + '=s0')

3922

3923

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

3924

if avatar_thumbnails:

3925

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

3926

if uncropped_avatar:

3927

avatar_thumbnails.append({

3928

'url': uncropped_avatar,

3929

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

3934

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

3935

for banner in channel_banners:

3936

banner['preference'] = -10

3937

3938

if channel_banners:

3939

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

3940

if uncropped_banner:

3941

channel_banners.append({

3942

'url': uncropped_banner,

3943

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

3948

primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail'))

3949

3950

if playlist_id is None:

3951

playlist_id = item_id

3952

3953

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

3954

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

3955

if title is None:

3956

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

3957

title += format_field(selected_tab, 'title', ' - %s')

3958

title += format_field(selected_tab, 'expandedText', ' - %s')

3959

3960

metadata = {

3961

'playlist_id': playlist_id,

3962

'playlist_title': title,

3963

'playlist_description': description,

3964

'uploader': channel_name,

3965

'uploader_id': channel_id,

3966

'uploader_url': channel_url,

3967

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

3968

'tags': tags,

3969

'view_count': self._get_count(playlist_stats, 1),

3970

'availability': self._extract_availability(data),

3971

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

3972

'playlist_count': self._get_count(playlist_stats, 0)

3973

}

3974

if not channel_id:

3975

metadata.update(self._extract_uploader(data))

3976

metadata.update({

3977

'channel': metadata['uploader'],

3978

'channel_id': metadata['uploader_id'],

3979

'channel_url': metadata['uploader_url']})

3980

return self.playlist_result(

3981

self._entries(

3982

selected_tab, playlist_id, ytcfg,

3983

self._extract_account_syncid(ytcfg, data),

3984

self._extract_visitor_data(data, ytcfg)),

3985

**metadata)

3986

3987

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):

3988

first_id = last_id = response = None

3989

for page_num in itertools.count(1):

3990

videos = list(self._playlist_entries(playlist))

3991

if not videos:

3992

return

3993

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

3994

if start >= len(videos):

3995

return

3996

for video in videos[start:]:

3997

if video['id'] == first_id:

3998

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

3999

return

4000

yield video

4001

first_id = first_id or videos[0]['id']

4002

last_id = videos[-1]['id']

4003

watch_endpoint = try_get(

4004

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4005

headers = self.generate_api_headers(

4006

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4007

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4008

query = {

4009

'playlistId': playlist_id,

4010

'videoId': watch_endpoint.get('videoId') or last_id,

4011

'index': watch_endpoint.get('index') or len(videos),

4012

'params': watch_endpoint.get('params') or 'OAE%3D'

4013

}

4014

response = self._extract_response(

4015

item_id='%s page %d' % (playlist_id, page_num),

4016

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4017

check_get_keys='contents'

4018

)

4019

playlist = try_get(

4020

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4021

4022

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4023

title = playlist.get('title') or try_get(

4024

data, lambda x: x['titleText']['simpleText'], compat_str)

4025

playlist_id = playlist.get('playlistId') or item_id

4026

4027

# Delegating everything except mix playlists to regular tab-based playlist URL

4028

playlist_url = urljoin(url, try_get(

4029

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4030

compat_str))

4031

if playlist_url and playlist_url != url:

4032

return self.url_result(

4033

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4034

video_title=title)

4035

4036

return self.playlist_result(

4037

self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),

4038

playlist_id=playlist_id, playlist_title=title)

4039

4040

def _extract_availability(self, data):

4041

"""

4042

Gets the availability of a given playlist/tab.

4043

Note: Unless YouTube tells us explicitly, we do not assume it is public

4044

@param data: response

4045

"""

4046

is_private = is_unlisted = None

4047

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4048

badge_labels = self._extract_badges(renderer)

4049

4050

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4051

privacy_dropdown_entries = try_get(

4052

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4053

for renderer_dict in privacy_dropdown_entries:

4054

is_selected = try_get(

4055

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4056

if not is_selected:

4057

continue

4058

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4059

if label:

4060

badge_labels.add(label.lower())

4061

break

4062

4063

for badge_label in badge_labels:

4064

if badge_label == 'unlisted':

4065

is_unlisted = True

4066

elif badge_label == 'private':

4067

is_private = True

4068

elif badge_label == 'public':

4069

is_unlisted = is_private = False

4070

return self._availability(is_private, False, False, False, is_unlisted)

4071

4072

@staticmethod

4073

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4074

sidebar_renderer = try_get(

4075

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4076

for item in sidebar_renderer:

4077

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4082

"""

4083

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4084

"""

4085

browse_id = params = None

4086

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4087

if not renderer:

4088

return

4089

menu_renderer = try_get(

4090

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4091

for menu_item in menu_renderer:

4092

if not isinstance(menu_item, dict):

4093

continue

4094

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4095

text = try_get(

4096

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4097

if not text or text.lower() != 'show unavailable videos':

4098

continue

4099

browse_endpoint = try_get(

4100

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4101

browse_id = browse_endpoint.get('browseId')

4102

params = browse_endpoint.get('params')

4103

break

4104

4105

headers = self.generate_api_headers(

4106

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4107

visitor_data=self._extract_visitor_data(data, ytcfg))

4108

query = {

4109

'params': params or 'wgYCCAA=',

4110

'browseId': browse_id or 'VL%s' % item_id

4111

}

4112

return self._extract_response(

4113

item_id=item_id, headers=headers, query=query,

4114

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4115

note='Downloading API JSON with unavailable videos')

4116

4117

def _extract_webpage(self, url, item_id, fatal=True):

4118

retries = self.get_param('extractor_retries', 3)

4119

count = -1

4120

webpage = data = last_error = None

4121

while count < retries:

4122

count += 1

4123

# Sometimes youtube returns a webpage with incomplete ytInitialData

4124

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4125

if last_error:

4126

self.report_warning('%s. Retrying ...' % last_error)

4127

try:

4128

webpage = self._download_webpage(

4129

url, item_id,

4130

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4131

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4132

except ExtractorError as e:

4133

if isinstance(e.cause, network_exceptions):

4134

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4135

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4145

except ExtractorError as e:

4146

if fatal:

4147

raise

4148

self.report_warning(error_to_compat_str(e))

4149

break

4150

4151

if dict_get(data, ('contents', 'currentVideoEndpoint')):

4152

break

4153

4154

last_error = 'Incomplete yt initial data received'

4155

if count >= retries:

4156

if fatal:

4157

raise ExtractorError(last_error)

4158

self.report_warning(last_error)

break

return webpage, data

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4164

data = None

4165

if 'webpage' not in self._configuration_arg('skip'):

4166

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4167

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4168

if not data:

4169

if not ytcfg and self.is_authenticated:

4170

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

4171

if 'authcheck' not in self._configuration_arg('skip') and fatal:

4172

raise ExtractorError(

4173

msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'

4174

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4175

expected=True)

4176

self.report_warning(msg, only_once=True)

4177

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4178

return data, ytcfg

4179

4180

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4181

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4182

resolve_response = self._extract_response(

4183

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4184

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4185

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4186

for ep_key, ep in endpoints.items():

4187

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4188

if params:

4189

return self._extract_response(

4190

item_id=item_id, query=params, ep=ep, headers=headers,

4191

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4192

check_get_keys=('contents', 'currentVideoEndpoint'))

4193

err_note = 'Failed to resolve url (does the playlist exist?)'

4194

if fatal:

4195

raise ExtractorError(err_note, expected=True)

4196

self.report_warning(err_note, item_id)

4197

4198

@staticmethod

4199

def _smuggle_data(entries, data):

4200

for entry in entries:

4201

if data:

4202

entry['url'] = smuggle_url(entry['url'], data)

4203

yield entry

4204

4205

_SEARCH_PARAMS = None

4206

4207

def _search_results(self, query, params=NO_DEFAULT):

4208

data = {'query': query}

4209

if params is NO_DEFAULT:

4210

params = self._SEARCH_PARAMS

4211

if params:

4212

data['params'] = params

4213

continuation_list = [None]

4214

for page_num in itertools.count(1):

4215

data.update(continuation_list[0] or {})

4216

search = self._extract_response(

4217

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4218

check_get_keys=('contents', 'onResponseReceivedCommands'))

4219

slr_contents = try_get(

4220

search,

4221

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

4222

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

4223

list)

4224

yield from self._extract_entries({'contents': slr_contents}, continuation_list)

4225

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4230

IE_DESC = 'YouTube Tabs'

4231

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4240

(?P<not_channel>

4241

feed/|hashtag/|

4242

(?:playlist|watch)\?.*?\blist=

4243

)|

4244

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4249

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4250

}

4251

IE_NAME = 'youtube:tab'

4252

4253

_TESTS = [{

4254

'note': 'playlists, multipage',

4255

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4256

'playlist_mincount': 94,

4257

'info_dict': {

4258

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4259

'title': 'Igor Kleiner - Playlists',

4260

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4261

'uploader': 'Igor Kleiner',

4262

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4263

'channel': 'Igor Kleiner',

4264

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4265

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4266

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4267

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4268

},

4269

}, {

4270

'note': 'playlists, multipage, different order',

4271

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4272

'playlist_mincount': 94,

4273

'info_dict': {

4274

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4275

'title': 'Igor Kleiner - Playlists',

4276

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4277

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4278

'uploader': 'Igor Kleiner',

4279

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4280

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4281

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4282

'channel': 'Igor Kleiner',

4283

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4284

},

4285

}, {

4286

'note': 'playlists, series',

4287

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4288

'playlist_mincount': 5,

4289

'info_dict': {

4290

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4291

'title': '3Blue1Brown - Playlists',

4292

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4293

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4294

'uploader': '3Blue1Brown',

4295

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4296

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4297

'channel': '3Blue1Brown',

4298

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4299

'tags': ['Mathematics'],

4300

},

4301

}, {

4302

'note': 'playlists, singlepage',

4303

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4304

'playlist_mincount': 4,

4305

'info_dict': {

4306

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4307

'title': 'ThirstForScience - Playlists',

4308

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4309

'uploader': 'ThirstForScience',

4310

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4311

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4312

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4313

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4314

'tags': 'count:13',

4315

'channel': 'ThirstForScience',

4316

}

4317

}, {

4318

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4319

'only_matching': True,

4320

}, {

4321

'note': 'basic, single video playlist',

4322

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4323

'info_dict': {

4324

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4325

'uploader': 'Sergey M.',

4326

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4327

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4332

'channel': 'Sergey M.',

4333

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4334

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4335

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4340

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4341

'info_dict': {

4342

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4343

'uploader': 'Sergey M.',

4344

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4345

'title': 'youtube-dl empty playlist',

4346

'tags': [],

4347

'channel': 'Sergey M.',

4348

'description': '',

4349

'modified_date': '20160902',

4350

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4351

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4352

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4358

'info_dict': {

4359

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4360

'title': 'lex will - Home',

4361

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4362

'uploader': 'lex will',

4363

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4364

'channel': 'lex will',

4365

'tags': ['bible', 'history', 'prophesy'],

4366

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4367

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4368

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4369

},

4370

'playlist_mincount': 2,

4371

}, {

4372

'note': 'Videos tab',

4373

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4374

'info_dict': {

4375

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4376

'title': 'lex will - Videos',

4377

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4378

'uploader': 'lex will',

4379

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4380

'tags': ['bible', 'history', 'prophesy'],

4381

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4382

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4383

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4384

'channel': 'lex will',

4385

},

4386

'playlist_mincount': 975,

4387

}, {

4388

'note': 'Videos tab, sorted by popular',

4389

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4390

'info_dict': {

4391

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4392

'title': 'lex will - Videos',

4393

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4394

'uploader': 'lex will',

4395

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4396

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4397

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4398

'channel': 'lex will',

4399

'tags': ['bible', 'history', 'prophesy'],

4400

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4401

},

4402

'playlist_mincount': 199,

4403

}, {

4404

'note': 'Playlists tab',

4405

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4406

'info_dict': {

4407

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4408

'title': 'lex will - Playlists',

4409

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4410

'uploader': 'lex will',

4411

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4412

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4413

'channel': 'lex will',

4414

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4415

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4416

'tags': ['bible', 'history', 'prophesy'],

4417

},

4418

'playlist_mincount': 17,

4419

}, {

4420

'note': 'Community tab',

4421

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4422

'info_dict': {

4423

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4424

'title': 'lex will - Community',

4425

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4426

'uploader': 'lex will',

4427

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4428

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4429

'channel': 'lex will',

4430

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4431

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4432

'tags': ['bible', 'history', 'prophesy'],

4433

},

4434

'playlist_mincount': 18,

4435

}, {

4436

'note': 'Channels tab',

4437

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4438

'info_dict': {

4439

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4440

'title': 'lex will - Channels',

4441

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4442

'uploader': 'lex will',

4443

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4444

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4445

'channel': 'lex will',

4446

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4447

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4448

'tags': ['bible', 'history', 'prophesy'],

4449

},

4450

'playlist_mincount': 12,

4451

}, {

4452

'note': 'Search tab',

4453

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4454

'playlist_mincount': 40,

4455

'info_dict': {

4456

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4457

'title': '3Blue1Brown - Search - linear algebra',

4458

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4459

'uploader': '3Blue1Brown',

4460

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4461

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4462

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4463

'tags': ['Mathematics'],

4464

'channel': '3Blue1Brown',

4465

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4466

},

4467

}, {

4468

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4469

'only_matching': True,

4470

}, {

4471

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4472

'only_matching': True,

4473

}, {

4474

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4475

'only_matching': True,

4476

}, {

4477

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4478

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4479

'info_dict': {

4480

'title': '29C3: Not my department',

4481

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4482

'uploader': 'Christiaan008',

4483

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4484

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4485

'tags': [],

4486

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4487

'view_count': int,

4488

'modified_date': '20150605',

4489

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4490

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4491

'channel': 'Christiaan008',

4492

},

4493

'playlist_count': 96,

4494

}, {

4495

'note': 'Large playlist',

4496

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4497

'info_dict': {

4498

'title': 'Uploads from Cauchemar',

4499

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4500

'uploader': 'Cauchemar',

4501

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4502

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4503

'tags': [],

4504

'modified_date': r're:\d{8}',

4505

'channel': 'Cauchemar',

4506

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4507

'view_count': int,

4508

'description': '',

4509

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4510

},

4511

'playlist_mincount': 1123,

4512

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4513

}, {

4514

'note': 'even larger playlist, 8832 videos',

4515

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4516

'only_matching': True,

4517

}, {

4518

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4519

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4520

'info_dict': {

4521

'title': 'Uploads from Interstellar Movie',

4522

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4523

'uploader': 'Interstellar Movie',

4524

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4525

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4526

'tags': [],

4527

'view_count': int,

4528

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4529

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4530

'channel': 'Interstellar Movie',

4531

'description': '',

4532

'modified_date': r're:\d{8}',

4533

},

4534

'playlist_mincount': 21,

4535

}, {

4536

'note': 'Playlist with "show unavailable videos" button',

4537

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4538

'info_dict': {

4539

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4540

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4541

'uploader': 'Phim Siêu Nhân Nhật Bản',

4542

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4543

'view_count': int,

4544

'channel': 'Phim Siêu Nhân Nhật Bản',

4545

'tags': [],

4546

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4547

'description': '',

4548

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4549

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4550

'modified_date': r're:\d{8}',

4551

},

4552

'playlist_mincount': 200,

4553

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4554

}, {

4555

'note': 'Playlist with unavailable videos in page 7',

4556

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4557

'info_dict': {

4558

'title': 'Uploads from BlankTV',

4559

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4560

'uploader': 'BlankTV',

4561

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4562

'channel': 'BlankTV',

4563

'channel_url': 'https://www.youtube.com/c/blanktv',

4564

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4565

'view_count': int,

4566

'tags': [],

4567

'uploader_url': 'https://www.youtube.com/c/blanktv',

4568

'modified_date': r're:\d{8}',

4569

'description': '',

4570

},

4571

'playlist_mincount': 1000,

4572

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4573

}, {

4574

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4575

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4576

'info_dict': {

4577

'title': 'Data Analysis with Dr Mike Pound',

4578

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4579

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4580

'uploader': 'Computerphile',

4581

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4582

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4583

'tags': [],

4584

'view_count': int,

4585

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4586

'channel_url': 'https://www.youtube.com/user/Computerphile',

4587

'channel': 'Computerphile',

4588

},

4589

'playlist_mincount': 11,

4590

}, {

4591

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4592

'only_matching': True,

4593

}, {

4594

'note': 'Playlist URL that does not actually serve a playlist',

4595

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4600

'uploader': 'STREEM',

4601

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4602

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4603

'upload_date': '20150526',

4604

'license': 'Standard YouTube License',

4605

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4606

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4613

},

4614

'skip': 'This video is not available.',

4615

'add_ie': [YoutubeIE.ie_key()],

4616

}, {

4617

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4618

'only_matching': True,

4619

}, {

4620

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4621

'only_matching': True,

4622

}, {

4623

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4624

'info_dict': {

4625

'id': 'zpsbVPFwsqk', # This will keep changing

4626

'ext': 'mp4',

4627

'title': str,

4628

'uploader': 'Sky News',

4629

'uploader_id': 'skynews',

4630

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4631

'upload_date': r're:\d{8}',

4632

'description': str,

4633

'categories': ['News & Politics'],

4634

'tags': list,

4635

'like_count': int,

4636

'release_timestamp': 1640164857,

4637

'channel': 'Sky News',

4638

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4639

'age_limit': 0,

4640

'view_count': int,

4641

'thumbnail': 'https://i.ytimg.com/vi/zpsbVPFwsqk/maxresdefault_live.jpg',

4642

'playable_in_embed': True,

4643

'release_date': '20211222',

4644

'availability': 'public',

4645

'live_status': 'is_live',

4646

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4647

},

4648

'params': {

4649

'skip_download': True,

4650

},

4651

'expected_warnings': ['Ignoring subtitle tracks found in '],

4652

}, {

4653

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4658

'uploader': 'The Young Turks',

4659

'uploader_id': 'TheYoungTurks',

4660

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4661

'upload_date': '20150715',

4662

'license': 'Standard YouTube License',

4663

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4664

'categories': ['News & Politics'],

4665

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4670

},

4671

'only_matching': True,

4672

}, {

4673

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4674

'only_matching': True,

4675

}, {

4676

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4677

'only_matching': True,

4678

}, {

4679

'note': 'A channel that is not live. Should raise error',

4680

'url': 'https://www.youtube.com/user/numberphile/live',

4681

'only_matching': True,

4682

}, {

4683

'url': 'https://www.youtube.com/feed/trending',

4684

'only_matching': True,

4685

}, {

4686

'url': 'https://www.youtube.com/feed/library',

4687

'only_matching': True,

4688

}, {

4689

'url': 'https://www.youtube.com/feed/history',

4690

'only_matching': True,

4691

}, {

4692

'url': 'https://www.youtube.com/feed/subscriptions',

4693

'only_matching': True,

4694

}, {

4695

'url': 'https://www.youtube.com/feed/watch_later',

4696

'only_matching': True,

4697

}, {

4698

'note': 'Recommended - redirects to home page.',

4699

'url': 'https://www.youtube.com/feed/recommended',

4700

'only_matching': True,

4701

}, {

4702

'note': 'inline playlist with not always working continuations',

4703

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

4704

'only_matching': True,

4705

}, {

4706

'url': 'https://www.youtube.com/course',

4707

'only_matching': True,

4708

}, {

4709

'url': 'https://www.youtube.com/zsecurity',

4710

'only_matching': True,

4711

}, {

4712

'url': 'http://www.youtube.com/NASAgovVideo/videos',

4713

'only_matching': True,

4714

}, {

4715

'url': 'https://www.youtube.com/TheYoungTurks/live',

4716

'only_matching': True,

4717

}, {

4718

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

4725

}, {

4726

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

4727

'only_matching': True,

4728

}, {

4729

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

4730

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4731

'only_matching': True

4732

}, {

4733

'note': '/browse/ should redirect to /channel/',

4734

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

4735

'only_matching': True

4736

}, {

4737

'note': 'VLPL, should redirect to playlist?list=PL...',

4738

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4739

'info_dict': {

4740

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4741

'uploader': 'NoCopyrightSounds',

4742

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

4743

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4744

'title': 'NCS Releases',

4745

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4746

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4747

'modified_date': r're:\d{8}',

4748

'view_count': int,

4749

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4750

'tags': [],

4751

'channel': 'NoCopyrightSounds',

4752

},

4753

'playlist_mincount': 166,

4754

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4755

}, {

4756

'note': 'Topic, should redirect to playlist?list=UU...',

4757

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4758

'info_dict': {

4759

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4760

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4761

'title': 'Uploads from Royalty Free Music - Topic',

4762

'uploader': 'Royalty Free Music - Topic',

4763

'tags': [],

4764

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4765

'channel': 'Royalty Free Music - Topic',

4766

'view_count': int,

4767

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4768

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4769

'modified_date': r're:\d{8}',

4770

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4771

'description': '',

4772

},

4773

'expected_warnings': [

4774

'The URL does not have a videos tab',

4775

r'[Uu]navailable videos (are|will be) hidden',

4776

],

4777

'playlist_mincount': 101,

4778

}, {

4779

'note': 'Topic without a UU playlist',

4780

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

4781

'info_dict': {

4782

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

4783

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

4784

'tags': [],

4785

},

4786

'expected_warnings': [

4787

'the playlist redirect gave error',

4788

],

4789

'playlist_mincount': 9,

4790

}, {

4791

'note': 'Youtube music Album',

4792

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

4793

'info_dict': {

4794

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

4795

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

4800

'modified_date': r're:\d{8}',

4801

},

4802

'playlist_count': 50,

4803

}, {

4804

'note': 'unlisted single video playlist',

4805

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4806

'info_dict': {

4807

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4808

'uploader': 'colethedj',

4809

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4810

'title': 'yt-dlp unlisted playlist test',

4811

'availability': 'unlisted',

4812

'tags': [],

4813

'modified_date': '20211208',

4814

'channel': 'colethedj',

4815

'view_count': int,

4816

'description': '',

4817

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

4818

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4819

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

4824

'url': 'https://www.youtube.com/feed/recommended',

4825

'info_dict': {

4826

'id': 'recommended',

4827

'title': 'recommended',

4828

},

4829

'playlist_mincount': 50,

4830

'params': {

4831

'skip_download': True,

4832

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4833

},

4834

}, {

4835

'note': 'API Fallback: /videos tab, sorted by oldest first',

4836

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

4837

'info_dict': {

4838

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4839

'title': 'Cody\'sLab - Videos',

4840

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

4841

'uploader': 'Cody\'sLab',

4842

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4843

'channel': 'Cody\'sLab',

4844

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4845

'tags': [],

4846

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4847

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4848

},

4849

'playlist_mincount': 650,

4850

'params': {

4851

'skip_download': True,

4852

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4853

},

4854

}, {

4855

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

4856

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4857

'info_dict': {

4858

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4859

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4860

'title': 'Uploads from Royalty Free Music - Topic',

4861

'uploader': 'Royalty Free Music - Topic',

4862

'modified_date': r're:\d{8}',

4863

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4864

'description': '',

4865

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4866

'tags': [],

4867

'channel': 'Royalty Free Music - Topic',

4868

'view_count': int,

4869

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4870

},

4871

'expected_warnings': [

4872

'does not have a videos tab',

4873

r'[Uu]navailable videos (are|will be) hidden',

4874

],

4875

'playlist_mincount': 101,

4876

'params': {

4877

'skip_download': True,

4878

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

},

}]

@classmethod

def suitable(cls, url):

4884

return False if YoutubeIE.suitable(url) else super(

4885

YoutubeTabIE, cls).suitable(url)

4886

4887

def _real_extract(self, url):

4888

url, smuggled_data = unsmuggle_url(url, {})

4889

if self.is_music_url(url):

4890

smuggled_data['is_music_url'] = True

4891

info_dict = self.__real_extract(url, smuggled_data)

4892

if info_dict.get('entries'):

4893

info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)

4894

return info_dict

4895

4896

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

4897

4898

def __real_extract(self, url, smuggled_data):

4899

item_id = self._match_id(url)

4900

url = compat_urlparse.urlunparse(

4901

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

4902

compat_opts = self.get_param('compat_opts', [])

4903

4904

def get_mobj(url):

4905

mobj = self._URL_RE.match(url).groupdict()

4906

mobj.update((k, '') for k, v in mobj.items() if v is None)

4907

return mobj

4908

4909

mobj, redirect_warning = get_mobj(url), None

4910

# Youtube returns incomplete data if tabname is not lower case

4911

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

4912

if is_channel:

4913

if smuggled_data.get('is_music_url'):

4914

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

4915

item_id = item_id[2:]

4916

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

4917

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

4918

mdata = self._extract_tab_endpoint(

4919

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

4920

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

4921

get_all=False, expected_type=compat_str)

4922

if not murl:

4923

raise ExtractorError('Failed to resolve album to playlist')

4924

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

4925

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

4926

pre = f'https://www.youtube.com/channel/{item_id}'

4927

4928

original_tab_name = tab

4929

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

4930

# Home URLs should redirect to /videos/

4931

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

4932

'To download only the videos in the home page, add a "/featured" to the URL')

4933

tab = '/videos'

4934

4935

url = ''.join((pre, tab, post))

4936

mobj = get_mobj(url)

4937

4938

# Handle both video/playlist URLs

4939

qs = parse_qs(url)

4940

video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]

4941

4942

if not video_id and mobj['not_channel'].startswith('watch'):

4943

if not playlist_id:

4944

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

4945

raise ExtractorError('Unable to recognize tab page')

4946

# Common mistake: https://www.youtube.com/watch?list=playlist_id

4947

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

4948

url = f'https://www.youtube.com/playlist?list={playlist_id}'

4949

mobj = get_mobj(url)

4950

4951

if video_id and playlist_id:

4952

if self.get_param('noplaylist'):

4953

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

4954

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

4955

ie=YoutubeIE.ie_key(), video_id=video_id)

4956

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

4957

4958

data, ytcfg = self._extract_data(url, item_id)

4959

4960

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

4961

if tabs:

4962

selected_tab = self._extract_selected_tab(tabs)

4963

selected_tab_name = selected_tab.get('title', '').lower()

4964

if selected_tab_name == 'home':

4965

selected_tab_name = 'featured'

4966

requested_tab_name = mobj['tab'][1:]

4967

if 'no-youtube-channel-redirect' not in compat_opts:

4968

if requested_tab_name == 'live':

4969

# Live tab should have redirected to the video

4970

raise ExtractorError('The channel is not currently live', expected=True)

4971

if requested_tab_name not in ('', selected_tab_name):

4972

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

4973

if not original_tab_name:

4974

if item_id[:2] == 'UC':

4975

# Topic channels don't have /videos. Use the equivalent playlist instead

4976

pl_id = f'UU{item_id[2:]}'

4977

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

4978

try:

4979

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

4980

except ExtractorError:

4981

redirect_warning += ' and the playlist redirect gave error'

4982

else:

4983

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

4984

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

4985

if selected_tab_name and selected_tab_name != requested_tab_name:

4986

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

4987

else:

4988

raise ExtractorError(redirect_warning, expected=True)

4989

4990

if redirect_warning:

4991

self.to_screen(redirect_warning)

4992

self.write_debug(f'Final URL: {url}')

4993

4994

# YouTube sometimes provides a button to reload playlist with unavailable videos.

4995

if 'no-youtube-unavailable-videos' not in compat_opts:

4996

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

4997

self._extract_and_report_alerts(data, only_once=True)

4998

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

4999

if tabs:

5000

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5001

5002

playlist = traverse_obj(

5003

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5004

if playlist:

5005

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5006

5007

video_id = traverse_obj(

5008

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5009

if video_id:

5010

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5011

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5012

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5013

ie=YoutubeIE.ie_key(), video_id=video_id)

5014

5015

raise ExtractorError('Unable to recognize tab page')

5016

5017

5018

class YoutubePlaylistIE(InfoExtractor):

5019

IE_DESC = 'YouTube playlists'

5020

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5031

)''' % {

5032

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5033

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5034

}

5035

IE_NAME = 'youtube:playlist'

5036

_TESTS = [{

5037

'note': 'issue #673',

5038

'url': 'PLBB231211A4F62143',

5039

'info_dict': {

5040

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5041

'id': 'PLBB231211A4F62143',

5042

'uploader': 'Wickman',

5043

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5044

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5045

'view_count': int,

5046

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5047

'modified_date': r're:\d{8}',

5048

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5049

'channel': 'Wickman',

5050

'tags': [],

5051

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5052

},

5053

'playlist_mincount': 29,

5054

}, {

5055

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5056

'info_dict': {

5057

'title': 'YDL_safe_search',

5058

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5059

},

5060

'playlist_count': 2,

5061

'skip': 'This playlist is private',

5062

}, {

5063

'note': 'embedded',

5064

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5069

'uploader': 'milan',

5070

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5071

'description': '',

5072

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5073

'tags': [],

5074

'modified_date': '20140919',

5075

'view_count': int,

5076

'channel': 'milan',

5077

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5078

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5079

},

5080

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5081

}, {

5082

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5083

'playlist_mincount': 654,

5084

'info_dict': {

5085

'title': '2018 Chinese New Singles (11/6 updated)',

5086

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5087

'uploader': 'LBK',

5088

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5089

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5090

'channel': 'LBK',

5091

'view_count': int,

5092

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5093

'tags': [],

5094

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5095

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5096

'modified_date': r're:\d{8}',

5097

},

5098

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5099

}, {

5100

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5101

'only_matching': True,

5102

}, {

5103

# music album playlist

5104

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5105

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5110

if YoutubeTabIE.suitable(url):

5111

return False

5112

from ..utils import parse_qs

5113

qs = parse_qs(url)

5114

if qs.get('v', [None])[0]:

5115

return False

5116

return super(YoutubePlaylistIE, cls).suitable(url)

5117

5118

def _real_extract(self, url):

5119

playlist_id = self._match_id(url)

5120

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5121

url = update_url_query(

5122

'https://www.youtube.com/playlist',

5123

parse_qs(url) or {'list': playlist_id})

5124

if is_music_url:

5125

url = smuggle_url(url, {'is_music_url': True})

5126

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5127

5128

5129

class YoutubeYtBeIE(InfoExtractor):

5130

IE_DESC = 'youtu.be'

5131

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5132

_TESTS = [{

5133

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5138

'uploader': 'Backus-Page House Museum',

5139

'uploader_id': 'backuspagemuseum',

5140

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5141

'upload_date': '20161008',

5142

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5143

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5148

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5149

'channel': 'Backus-Page House Museum',

5150

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5151

'live_status': 'not_live',

5152

'view_count': int,

5153

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5154

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5160

},

5161

}, {

5162

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5163

'only_matching': True,

5164

}]

5165

5166

def _real_extract(self, url):

5167

mobj = self._match_valid_url(url)

5168

video_id = mobj.group('id')

5169

playlist_id = mobj.group('playlist_id')

5170

return self.url_result(

5171

update_url_query('https://www.youtube.com/watch', {

5172

'v': video_id,

5173

'list': playlist_id,

5174

'feature': 'youtu.be',

5175

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5176

5177

5178

class YoutubeLivestreamEmbedIE(InfoExtractor):

5179

IE_DESC = 'YouTube livestream embeds'

5180

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5181

_TESTS = [{

5182

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5183

'only_matching': True,

5184

}]

5185

5186

def _real_extract(self, url):

5187

channel_id = self._match_id(url)

5188

return self.url_result(

5189

f'https://www.youtube.com/channel/{channel_id}/live',

5190

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5191

5192

5193

class YoutubeYtUserIE(InfoExtractor):

5194

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5195

IE_NAME = 'youtube:user'

5196

_VALID_URL = r'ytuser:(?P<id>.+)'

5197

_TESTS = [{

5198

'url': 'ytuser:phihag',

5199

'only_matching': True,

5200

}]

5201

5202

def _real_extract(self, url):

5203

user_id = self._match_id(url)

5204

return self.url_result(

5205

'https://www.youtube.com/user/%s/videos' % user_id,

5206

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5207

5208

5209

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5210

IE_NAME = 'youtube:favorites'

5211

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5212

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5213

_LOGIN_REQUIRED = True

5214

_TESTS = [{

5215

'url': ':ytfav',

5216

'only_matching': True,

5217

}, {

5218

'url': ':ytfavorites',

5219

'only_matching': True,

5220

}]

5221

5222

def _real_extract(self, url):

5223

return self.url_result(

5224

'https://www.youtube.com/playlist?list=LL',

5225

ie=YoutubeTabIE.ie_key())

5226

5227

5228

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5229

IE_DESC = 'YouTube search'

5230

IE_NAME = 'youtube:search'

5231

_SEARCH_KEY = 'ytsearch'

5232

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

_TESTS = []

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5237

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5238

_SEARCH_KEY = 'ytsearchdate'

5239

IE_DESC = 'YouTube search, newest videos first'

5240

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5241

5242

5243

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5244

IE_DESC = 'YouTube search URLs with sorting and filter support'

5245

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5246

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'

5247

_TESTS = [{

5248

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5249

'playlist_mincount': 5,

5250

'info_dict': {

5251

'id': 'youtube-dl test video',

5252

'title': 'youtube-dl test video',

5253

}

5254

}, {

5255

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5256

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5264

'only_matching': True,

5265

}]

5266

5267

def _real_extract(self, url):

5268

qs = parse_qs(url)

5269

query = (qs.get('search_query') or qs.get('q'))[0]

5270

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5271

5272

5273

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

5274

"""

5275

Base class for feed extractors

5276

Subclasses must define the _FEED_NAME property.

5277

"""

5278

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5284

5285

def _real_extract(self, url):

5286

return self.url_result(

5287

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

5288

ie=YoutubeTabIE.ie_key())

5289

5290

5291

class YoutubeWatchLaterIE(InfoExtractor):

5292

IE_NAME = 'youtube:watchlater'

5293

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5294

_VALID_URL = r':ytwatchlater'

5295

_TESTS = [{

5296

'url': ':ytwatchlater',

5297

'only_matching': True,

5298

}]

5299

5300

def _real_extract(self, url):

5301

return self.url_result(

5302

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5303

5304

5305

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5306

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5307

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5308

_FEED_NAME = 'recommended'

5309

_LOGIN_REQUIRED = False

5310

_TESTS = [{

5311

'url': ':ytrec',

5312

'only_matching': True,

5313

}, {

5314

'url': ':ytrecommended',

5315

'only_matching': True,

5316

}, {

5317

'url': 'https://youtube.com',

5318

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5323

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5324

_VALID_URL = r':ytsub(?:scription)?s?'

5325

_FEED_NAME = 'subscriptions'

5326

_TESTS = [{

5327

'url': ':ytsubs',

5328

'only_matching': True,

5329

}, {

5330

'url': ':ytsubscriptions',

5331

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5336

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5337

_VALID_URL = r':ythis(?:tory)?'

5338

_FEED_NAME = 'history'

5339

_TESTS = [{

5340

'url': ':ythistory',

5341

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

5346

IE_NAME = 'youtube:truncated_url'

5347

IE_DESC = False # Do not list

5348

_VALID_URL = r'''(?x)

5349

(?:https?://)?

5350

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5351

(?:watch\?(?:

5352

feature=[a-z_]+|

5353

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5366

'only_matching': True,

5367

}, {

5368

'url': 'https://www.youtube.com/watch?',

5369

'only_matching': True,

5370

}, {

5371

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5372

'only_matching': True,

5373

}, {

5374

'url': 'https://www.youtube.com/watch?feature=foo',

5375

'only_matching': True,

5376

}, {

5377

'url': 'https://www.youtube.com/watch?hl=en-GB',

5378

'only_matching': True,

5379

}, {

5380

'url': 'https://www.youtube.com/watch?t=2372',

5381

'only_matching': True,

5382

}]

5383

5384

def _real_extract(self, url):

5385

raise ExtractorError(

5386

'Did you forget to quote the URL? Remember that & is a meta '

5387

'character in most shells, so you want to put the URL in quotes, '

5388

'like youtube-dl '

5389

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5390

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5395

IE_NAME = 'youtube:clip'

5396

IE_DESC = False # Do not list

5397

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5398

5399

def _real_extract(self, url):

5400

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5401

return self.url_result(url, 'Generic')

5402

5403

5404

class YoutubeTruncatedIDIE(InfoExtractor):

5405

IE_NAME = 'youtube:truncated_id'

5406

IE_DESC = False # Do not list

5407

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5408

5409

_TESTS = [{

5410

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5411

'only_matching': True,

5412

}]

5413

5414

def _real_extract(self, url):

5415

video_id = self._match_id(url)

5416

raise ExtractorError(

5417

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

5418

expected=True)