jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import calendar
	6	import copy
	7	import datetime
	8	import functools
	9	import hashlib
	10	import itertools
	11	import json
	12	import math
	13	import os.path
	14	import random
	15	import re
	16	import sys
	17	import time
	18	import traceback
	19	import threading
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from ..compat import (
	23	compat_chr,
	24	compat_HTTPError,
	25	compat_parse_qs,
	26	compat_str,
	27	compat_urllib_parse_unquote_plus,
	28	compat_urllib_parse_urlencode,
	29	compat_urllib_parse_urlparse,
	30	compat_urlparse,
	31	)
	32	from ..jsinterp import JSInterpreter
	33	from ..utils import (
	34	bug_reports_message,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	ExtractorError,
	40	float_or_none,
	41	format_field,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	mimetype2ext,
	46	network_exceptions,
	47	NO_DEFAULT,
	48	orderedSet,
	49	parse_codecs,
	50	parse_count,
	51	parse_duration,
	52	parse_iso8601,
	53	parse_qs,
	54	qualities,
	55	remove_end,
	56	remove_start,
	57	smuggle_url,
	58	str_or_none,
	59	str_to_int,
	60	strftime_or_none,
	61	traverse_obj,
	62	try_get,
	63	unescapeHTML,
	64	unified_strdate,
	65	unified_timestamp,
	66	unsmuggle_url,
	67	update_url_query,
	68	url_or_none,
	69	urljoin,
	70	variadic,
	71	)
	72
	73
	74	def get_first(obj, keys, **kwargs):
	75	return traverse_obj(obj, (..., variadic(keys)), *kwargs, get_all=False)
	76
	77
	78	# any clients starting with _ cannot be explicity requested by the user
	79	INNERTUBE_CLIENTS = {
	80	'web': {
	81	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	82	'INNERTUBE_CONTEXT': {
	83	'client': {
	84	'clientName': 'WEB',
	85	'clientVersion': '2.20211221.00.00',
	86	}
	87	},
	88	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	89	},
	90	'web_embedded': {
	91	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	92	'INNERTUBE_CONTEXT': {
	93	'client': {
	94	'clientName': 'WEB_EMBEDDED_PLAYER',
	95	'clientVersion': '1.20211215.00.01',
	96	},
	97	},
	98	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	99	},
	100	'web_music': {
	101	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	102	'INNERTUBE_HOST': 'music.youtube.com',
	103	'INNERTUBE_CONTEXT': {
	104	'client': {
	105	'clientName': 'WEB_REMIX',
	106	'clientVersion': '1.20211213.00.00',
	107	}
	108	},
	109	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	110	},
	111	'web_creator': {
	112	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	113	'INNERTUBE_CONTEXT': {
	114	'client': {
	115	'clientName': 'WEB_CREATOR',
	116	'clientVersion': '1.20211220.02.00',
	117	}
	118	},
	119	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	120	},
	121	'android': {
	122	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	123	'INNERTUBE_CONTEXT': {
	124	'client': {
	125	'clientName': 'ANDROID',
	126	'clientVersion': '16.49',
	127	}
	128	},
	129	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	130	'REQUIRE_JS_PLAYER': False
	131	},
	132	'android_embedded': {
	133	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	134	'INNERTUBE_CONTEXT': {
	135	'client': {
	136	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	137	'clientVersion': '16.49',
	138	},
	139	},
	140	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	141	'REQUIRE_JS_PLAYER': False
	142	},
	143	'android_music': {
	144	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	145	'INNERTUBE_CONTEXT': {
	146	'client': {
	147	'clientName': 'ANDROID_MUSIC',
	148	'clientVersion': '4.57',
	149	}
	150	},
	151	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	152	'REQUIRE_JS_PLAYER': False
	153	},
	154	'android_creator': {
	155	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	156	'INNERTUBE_CONTEXT': {
	157	'client': {
	158	'clientName': 'ANDROID_CREATOR',
	159	'clientVersion': '21.47',
	160	},
	161	},
	162	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	163	'REQUIRE_JS_PLAYER': False
	164	},
	165	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	166	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	167	'ios': {
	168	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	169	'INNERTUBE_CONTEXT': {
	170	'client': {
	171	'clientName': 'IOS',
	172	'clientVersion': '16.46',
	173	'deviceModel': 'iPhone14,3',
	174	}
	175	},
	176	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	177	'REQUIRE_JS_PLAYER': False
	178	},
	179	'ios_embedded': {
	180	'INNERTUBE_CONTEXT': {
	181	'client': {
	182	'clientName': 'IOS_MESSAGES_EXTENSION',
	183	'clientVersion': '16.46',
	184	'deviceModel': 'iPhone14,3',
	185	},
	186	},
	187	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	188	'REQUIRE_JS_PLAYER': False
	189	},
	190	'ios_music': {
	191	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_MUSIC',
	195	'clientVersion': '4.57',
	196	},
	197	},
	198	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	199	'REQUIRE_JS_PLAYER': False
	200	},
	201	'ios_creator': {
	202	'INNERTUBE_CONTEXT': {
	203	'client': {
	204	'clientName': 'IOS_CREATOR',
	205	'clientVersion': '21.47',
	206	},
	207	},
	208	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	209	'REQUIRE_JS_PLAYER': False
	210	},
	211	# mweb has 'ultralow' formats
	212	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	213	'mweb': {
	214	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	215	'INNERTUBE_CONTEXT': {
	216	'client': {
	217	'clientName': 'MWEB',
	218	'clientVersion': '2.20211221.01.00',
	219	}
	220	},
	221	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	222	}
	223	}
	224
	225
	226	def build_innertube_clients():
	227	third_party = {
	228	'embedUrl': 'https://google.com', # Can be any valid URL
	229	}
	230	base_clients = ('android', 'web', 'ios', 'mweb')
	231	priority = qualities(base_clients[::-1])
	232
	233	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	234	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	235	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	236	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	237	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	238	ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
	239
	240	if client in base_clients:
	241	INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
	242	agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	243	agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
	244	agegate_ytcfg['priority'] -= 1
	245	elif client.endswith('_embedded'):
	246	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
	247	ytcfg['priority'] -= 2
	248	else:
	249	ytcfg['priority'] -= 3
	250
	251
	252	build_innertube_clients()
	253
	254
	255	class YoutubeBaseInfoExtractor(InfoExtractor):
	256	"""Provide base functions for Youtube extractors"""
	257
	258	_RESERVED_NAMES = (
	259	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	260	r'shorts\|movies\|results\|shared\|hashtag\|trending\|feed\|feeds\|'
	261	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	262	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	263
	264	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	265
	266	_NETRC_MACHINE = 'youtube'
	267
	268	# If True it will raise an error if no login info is provided
	269	_LOGIN_REQUIRED = False
	270
	271	_INVIDIOUS_SITES = (
	272	# invidious-redirect websites
	273	r'(?:www\.)?redirect\.invidious\.io',
	274	r'(?:(?:www\|dev)\.)?invidio\.us',
	275	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
	276	r'(?:www\.)?invidious\.pussthecat\.org',
	277	r'(?:www\.)?invidious\.zee\.li',
	278	r'(?:www\.)?invidious\.ethibox\.fr',
	279	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	280	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	281	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	282	# youtube-dl invidious instances list
	283	r'(?:(?:www\|no)\.)?invidiou\.sh',
	284	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	285	r'(?:www\.)?invidious\.kabi\.tk',
	286	r'(?:www\.)?invidious\.mastodon\.host',
	287	r'(?:www\.)?invidious\.zapashcanon\.fr',
	288	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	289	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	290	r'(?:www\.)?invidious\.himiko\.cloud',
	291	r'(?:www\.)?invidious\.reallyancient\.tech',
	292	r'(?:www\.)?invidious\.tube',
	293	r'(?:www\.)?invidiou\.site',
	294	r'(?:www\.)?invidious\.site',
	295	r'(?:www\.)?invidious\.xyz',
	296	r'(?:www\.)?invidious\.nixnet\.xyz',
	297	r'(?:www\.)?invidious\.048596\.xyz',
	298	r'(?:www\.)?invidious\.drycat\.fr',
	299	r'(?:www\.)?inv\.skyn3t\.in',
	300	r'(?:www\.)?tube\.poal\.co',
	301	r'(?:www\.)?tube\.connect\.cafe',
	302	r'(?:www\.)?vid\.wxzm\.sx',
	303	r'(?:www\.)?vid\.mint\.lgbt',
	304	r'(?:www\.)?vid\.puffyan\.us',
	305	r'(?:www\.)?yewtu\.be',
	306	r'(?:www\.)?yt\.elukerio\.org',
	307	r'(?:www\.)?yt\.lelux\.fi',
	308	r'(?:www\.)?invidious\.ggc-project\.de',
	309	r'(?:www\.)?yt\.maisputain\.ovh',
	310	r'(?:www\.)?ytprivate\.com',
	311	r'(?:www\.)?invidious\.13ad\.de',
	312	r'(?:www\.)?invidious\.toot\.koeln',
	313	r'(?:www\.)?invidious\.fdn\.fr',
	314	r'(?:www\.)?watch\.nettohikari\.com',
	315	r'(?:www\.)?invidious\.namazso\.eu',
	316	r'(?:www\.)?invidious\.silkky\.cloud',
	317	r'(?:www\.)?invidious\.exonip\.de',
	318	r'(?:www\.)?invidious\.riverside\.rocks',
	319	r'(?:www\.)?invidious\.blamefran\.net',
	320	r'(?:www\.)?invidious\.moomoo\.de',
	321	r'(?:www\.)?ytb\.trom\.tf',
	322	r'(?:www\.)?yt\.cyberhost\.uk',
	323	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	324	r'(?:www\.)?qklhadlycap4cnod\.onion',
	325	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	326	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	327	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	328	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	329	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	330	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	331	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	332	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	333	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	334	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	335	)
	336
	337	def _login(self):
	338	"""
	339	Attempt to log in to YouTube.
	340	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	341	"""
	342
	343	if (self._LOGIN_REQUIRED
	344	and self.get_param('cookiefile') is None
	345	and self.get_param('cookiesfrombrowser') is None):
	346	self.raise_login_required(
	347	'Login details are needed to download this content', method='cookies')
	348	username, password = self._get_login_info()
	349	if username:
	350	self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
	351
	352	def _initialize_consent(self):
	353	cookies = self._get_cookies('https://www.youtube.com/')
	354	if cookies.get('__Secure-3PSID'):
	355	return
	356	consent_id = None
	357	consent = cookies.get('CONSENT')
	358	if consent:
	359	if 'YES' in consent.value:
	360	return
	361	consent_id = self._search_regex(
	362	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	363	if not consent_id:
	364	consent_id = random.randint(100, 999)
	365	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	366
	367	def _initialize_pref(self):
	368	cookies = self._get_cookies('https://www.youtube.com/')
	369	pref_cookie = cookies.get('PREF')
	370	pref = {}
	371	if pref_cookie:
	372	try:
	373	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	374	except ValueError:
	375	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	376	pref.update({'hl': 'en'})
	377	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	378
	379	def _real_initialize(self):
	380	self._initialize_pref()
	381	self._initialize_consent()
	382	self._login()
	383
	384	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	385	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	386	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	387
	388	def _get_default_ytcfg(self, client='web'):
	389	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	390
	391	def _get_innertube_host(self, client='web'):
	392	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	393
	394	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	395	# try_get but with fallback to default ytcfg client values when present
	396	_func = lambda y: try_get(y, getter, expected_type)
	397	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	398
	399	def _extract_client_name(self, ytcfg, default_client='web'):
	400	return self._ytcfg_get_safe(
	401	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	402	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	403
	404	def _extract_client_version(self, ytcfg, default_client='web'):
	405	return self._ytcfg_get_safe(
	406	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	407	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	408
	409	def _extract_api_key(self, ytcfg=None, default_client='web'):
	410	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	411
	412	def _extract_context(self, ytcfg=None, default_client='web'):
	413	context = get_first(
	414	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	415	# Enforce language for extraction
	416	traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
	417	return context
	418
	419	_SAPISID = None
	420
	421	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	422	time_now = round(time.time())
	423	if self._SAPISID is None:
	424	yt_cookies = self._get_cookies('https://www.youtube.com')
	425	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	426	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	427	sapisid_cookie = dict_get(
	428	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	429	if sapisid_cookie and sapisid_cookie.value:
	430	self._SAPISID = sapisid_cookie.value
	431	self.write_debug('Extracted SAPISID cookie')
	432	# SAPISID cookie is required if not already present
	433	if not yt_cookies.get('SAPISID'):
	434	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	435	self._set_cookie(
	436	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	437	else:
	438	self._SAPISID = False
	439	if not self._SAPISID:
	440	return None
	441	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	442	sapisidhash = hashlib.sha1(
	443	f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
	444	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	445
	446	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	447	note='Downloading API JSON', errnote='Unable to download API page',
	448	context=None, api_key=None, api_hostname=None, default_client='web'):
	449
	450	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	451	data.update(query)
	452	real_headers = self.generate_api_headers(default_client=default_client)
	453	real_headers.update({'content-type': 'application/json'})
	454	if headers:
	455	real_headers.update(headers)
	456	return self._download_json(
	457	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	458	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	459	data=json.dumps(data).encode('utf8'), headers=real_headers,
	460	query={'key': api_key or self._extract_api_key()})
	461
	462	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	463	data = self._search_regex(
	464	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	465	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	466	if data:
	467	return self._parse_json(data, item_id, fatal=fatal)
	468
	469	@staticmethod
	470	def _extract_session_index(*data):
	471	"""
	472	Index of current account in account list.
	473	See: https://github.com/yt-dlp/yt-dlp/pull/519
	474	"""
	475	for ytcfg in data:
	476	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	477	if session_index is not None:
	478	return session_index
	479
	480	# Deprecated?
	481	def _extract_identity_token(self, ytcfg=None, webpage=None):
	482	if ytcfg:
	483	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	484	if token:
	485	return token
	486	if webpage:
	487	return self._search_regex(
	488	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	489	'identity token', default=None, fatal=False)
	490
	491	@staticmethod
	492	def _extract_account_syncid(*args):
	493	"""
	494	Extract syncId required to download private playlists of secondary channels
	495	@params response and/or ytcfg
	496	"""
	497	for data in args:
	498	# ytcfg includes channel_syncid if on secondary channel
	499	delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
	500	if delegated_sid:

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import time

import traceback

import threading

from .common import InfoExtractor, SearchInfoExtractor

22

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

28

compat_urllib_parse_urlencode,

29

compat_urllib_parse_urlparse,

30

compat_urlparse,

31

)

32

from ..jsinterp import JSInterpreter

33

from ..utils import (

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

format_field,

int_or_none,

is_html,

join_nonempty,

mimetype2ext,

network_exceptions,

NO_DEFAULT,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

def get_first(obj, keys, **kwargs):

75

return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)

76

77

78

# any clients starting with _ cannot be explicity requested by the user

79

INNERTUBE_CLIENTS = {

80

'web': {

81

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

82

'INNERTUBE_CONTEXT': {

83

'client': {

84

'clientName': 'WEB',

85

'clientVersion': '2.20211221.00.00',

86

}

87

},

88

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

89

},

90

'web_embedded': {

91

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

92

'INNERTUBE_CONTEXT': {

93

'client': {

94

'clientName': 'WEB_EMBEDDED_PLAYER',

95

'clientVersion': '1.20211215.00.01',

96

},

97

},

98

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

99

},

100

'web_music': {

101

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

102

'INNERTUBE_HOST': 'music.youtube.com',

103

'INNERTUBE_CONTEXT': {

104

'client': {

105

'clientName': 'WEB_REMIX',

106

'clientVersion': '1.20211213.00.00',

107

}

108

},

109

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

110

},

111

'web_creator': {

112

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

113

'INNERTUBE_CONTEXT': {

114

'client': {

115

'clientName': 'WEB_CREATOR',

116

'clientVersion': '1.20211220.02.00',

117

}

118

},

119

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

120

},

121

'android': {

122

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

123

'INNERTUBE_CONTEXT': {

124

'client': {

125

'clientName': 'ANDROID',

126

'clientVersion': '16.49',

127

}

128

},

129

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

130

'REQUIRE_JS_PLAYER': False

131

},

132

'android_embedded': {

133

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

134

'INNERTUBE_CONTEXT': {

135

'client': {

136

'clientName': 'ANDROID_EMBEDDED_PLAYER',

137

'clientVersion': '16.49',

138

},

139

},

140

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

141

'REQUIRE_JS_PLAYER': False

142

},

143

'android_music': {

144

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

145

'INNERTUBE_CONTEXT': {

146

'client': {

147

'clientName': 'ANDROID_MUSIC',

148

'clientVersion': '4.57',

149

}

150

},

151

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

152

'REQUIRE_JS_PLAYER': False

153

},

154

'android_creator': {

155

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

156

'INNERTUBE_CONTEXT': {

157

'client': {

158

'clientName': 'ANDROID_CREATOR',

159

'clientVersion': '21.47',

160

},

161

},

162

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

163

'REQUIRE_JS_PLAYER': False

164

},

165

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

166

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

167

'ios': {

168

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

169

'INNERTUBE_CONTEXT': {

170

'client': {

171

'clientName': 'IOS',

172

'clientVersion': '16.46',

173

'deviceModel': 'iPhone14,3',

174

}

175

},

176

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

177

'REQUIRE_JS_PLAYER': False

178

},

179

'ios_embedded': {

180

'INNERTUBE_CONTEXT': {

181

'client': {

182

'clientName': 'IOS_MESSAGES_EXTENSION',

183

'clientVersion': '16.46',

184

'deviceModel': 'iPhone14,3',

185

},

186

},

187

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

188

'REQUIRE_JS_PLAYER': False

189

},

190

'ios_music': {

191

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_MUSIC',

195

'clientVersion': '4.57',

196

},

197

},

198

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

199

'REQUIRE_JS_PLAYER': False

200

},

201

'ios_creator': {

202

'INNERTUBE_CONTEXT': {

203

'client': {

204

'clientName': 'IOS_CREATOR',

205

'clientVersion': '21.47',

206

},

207

},

208

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

209

'REQUIRE_JS_PLAYER': False

210

},

211

# mweb has 'ultralow' formats

212

# See: https://github.com/yt-dlp/yt-dlp/pull/557

213

'mweb': {

214

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

215

'INNERTUBE_CONTEXT': {

216

'client': {

217

'clientName': 'MWEB',

218

'clientVersion': '2.20211221.01.00',

219

}

220

},

221

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

}

}

def build_innertube_clients():

227

third_party = {

228

'embedUrl': 'https://google.com', # Can be any valid URL

229

}

230

base_clients = ('android', 'web', 'ios', 'mweb')

231

priority = qualities(base_clients[::-1])

232

233

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

234

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

235

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

236

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

237

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

238

ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])

239

240

if client in base_clients:

241

INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)

242

agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

243

agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party

244

agegate_ytcfg['priority'] -= 1

245

elif client.endswith('_embedded'):

246

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party

247

ytcfg['priority'] -= 2

248

else:

249

ytcfg['priority'] -= 3

250

251

252

build_innertube_clients()

253

254

255

class YoutubeBaseInfoExtractor(InfoExtractor):

256

"""Provide base functions for Youtube extractors"""

257

258

_RESERVED_NAMES = (

259

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

265

266

_NETRC_MACHINE = 'youtube'

267

268

# If True it will raise an error if no login info is provided

269

_LOGIN_REQUIRED = False

270

271

_INVIDIOUS_SITES = (

272

# invidious-redirect websites

273

r'(?:www\.)?redirect\.invidious\.io',

274

r'(?:(?:www|dev)\.)?invidio\.us',

275

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

276

r'(?:www\.)?invidious\.pussthecat\.org',

277

r'(?:www\.)?invidious\.zee\.li',

278

r'(?:www\.)?invidious\.ethibox\.fr',

279

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

280

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

281

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

282

# youtube-dl invidious instances list

283

r'(?:(?:www|no)\.)?invidiou\.sh',

284

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

285

r'(?:www\.)?invidious\.kabi\.tk',

286

r'(?:www\.)?invidious\.mastodon\.host',

287

r'(?:www\.)?invidious\.zapashcanon\.fr',

288

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

289

r'(?:www\.)?invidious\.tinfoil-hat\.net',

290

r'(?:www\.)?invidious\.himiko\.cloud',

291

r'(?:www\.)?invidious\.reallyancient\.tech',

292

r'(?:www\.)?invidious\.tube',

293

r'(?:www\.)?invidiou\.site',

294

r'(?:www\.)?invidious\.site',

295

r'(?:www\.)?invidious\.xyz',

296

r'(?:www\.)?invidious\.nixnet\.xyz',

297

r'(?:www\.)?invidious\.048596\.xyz',

298

r'(?:www\.)?invidious\.drycat\.fr',

299

r'(?:www\.)?inv\.skyn3t\.in',

300

r'(?:www\.)?tube\.poal\.co',

301

r'(?:www\.)?tube\.connect\.cafe',

302

r'(?:www\.)?vid\.wxzm\.sx',

303

r'(?:www\.)?vid\.mint\.lgbt',

304

r'(?:www\.)?vid\.puffyan\.us',

305

r'(?:www\.)?yewtu\.be',

306

r'(?:www\.)?yt\.elukerio\.org',

307

r'(?:www\.)?yt\.lelux\.fi',

308

r'(?:www\.)?invidious\.ggc-project\.de',

309

r'(?:www\.)?yt\.maisputain\.ovh',

310

r'(?:www\.)?ytprivate\.com',

311

r'(?:www\.)?invidious\.13ad\.de',

312

r'(?:www\.)?invidious\.toot\.koeln',

313

r'(?:www\.)?invidious\.fdn\.fr',

314

r'(?:www\.)?watch\.nettohikari\.com',

315

r'(?:www\.)?invidious\.namazso\.eu',

316

r'(?:www\.)?invidious\.silkky\.cloud',

317

r'(?:www\.)?invidious\.exonip\.de',

318

r'(?:www\.)?invidious\.riverside\.rocks',

319

r'(?:www\.)?invidious\.blamefran\.net',

320

r'(?:www\.)?invidious\.moomoo\.de',

321

r'(?:www\.)?ytb\.trom\.tf',

322

r'(?:www\.)?yt\.cyberhost\.uk',

323

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

324

r'(?:www\.)?qklhadlycap4cnod\.onion',

325

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

326

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

327

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

328

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

329

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

330

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

331

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

332

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

333

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

334

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

)

def _login(self):

"""

Attempt to log in to YouTube.

340

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

341

"""

342

343

if (self._LOGIN_REQUIRED

344

and self.get_param('cookiefile') is None

345

and self.get_param('cookiesfrombrowser') is None):

346

self.raise_login_required(

347

'Login details are needed to download this content', method='cookies')

348

username, password = self._get_login_info()

349

if username:

350

self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')

351

352

def _initialize_consent(self):

353

cookies = self._get_cookies('https://www.youtube.com/')

354

if cookies.get('__Secure-3PSID'):

355

return

356

consent_id = None

357

consent = cookies.get('CONSENT')

358

if consent:

359

if 'YES' in consent.value:

360

return

361

consent_id = self._search_regex(

362

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

363

if not consent_id:

364

consent_id = random.randint(100, 999)

365

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

366

367

def _initialize_pref(self):

368

cookies = self._get_cookies('https://www.youtube.com/')

369

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

374

except ValueError:

375

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

376

pref.update({'hl': 'en'})

377

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

378

379

def _real_initialize(self):

380

self._initialize_pref()

381

self._initialize_consent()

382

self._login()

383

384

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

385

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

386

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

387

388

def _get_default_ytcfg(self, client='web'):

389

return copy.deepcopy(INNERTUBE_CLIENTS[client])

390

391

def _get_innertube_host(self, client='web'):

392

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

393

394

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

395

# try_get but with fallback to default ytcfg client values when present

396

_func = lambda y: try_get(y, getter, expected_type)

397

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

398

399

def _extract_client_name(self, ytcfg, default_client='web'):

400

return self._ytcfg_get_safe(

401

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

402

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

403

404

def _extract_client_version(self, ytcfg, default_client='web'):

405

return self._ytcfg_get_safe(

406

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

407

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

408

409

def _extract_api_key(self, ytcfg=None, default_client='web'):

410

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

411

412

def _extract_context(self, ytcfg=None, default_client='web'):

413

context = get_first(

414

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

415

# Enforce language for extraction

416

traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

422

time_now = round(time.time())

423

if self._SAPISID is None:

424

yt_cookies = self._get_cookies('https://www.youtube.com')

425

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

426

# See: https://github.com/yt-dlp/yt-dlp/issues/393

427

sapisid_cookie = dict_get(

428

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

429

if sapisid_cookie and sapisid_cookie.value:

430

self._SAPISID = sapisid_cookie.value

431

self.write_debug('Extracted SAPISID cookie')

432

# SAPISID cookie is required if not already present

433

if not yt_cookies.get('SAPISID'):

434

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

435

self._set_cookie(

436

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

437

else:

438

self._SAPISID = False

439

if not self._SAPISID:

440

return None

441

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

442

sapisidhash = hashlib.sha1(

443

f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()

444

return f'SAPISIDHASH {time_now}_{sapisidhash}'

445

446

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

447

note='Downloading API JSON', errnote='Unable to download API page',

448

context=None, api_key=None, api_hostname=None, default_client='web'):

449

450

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

451

data.update(query)

452

real_headers = self.generate_api_headers(default_client=default_client)

453

real_headers.update({'content-type': 'application/json'})

454

if headers:

455

real_headers.update(headers)

456

return self._download_json(

457

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

458

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

459

data=json.dumps(data).encode('utf8'), headers=real_headers,

460

query={'key': api_key or self._extract_api_key()})

461

462

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

463

data = self._search_regex(

464

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

465

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

466

if data:

467

return self._parse_json(data, item_id, fatal=fatal)

468

469

@staticmethod

470

def _extract_session_index(*data):

471

"""

472

Index of current account in account list.

473

See: https://github.com/yt-dlp/yt-dlp/pull/519

474

"""

475

for ytcfg in data:

476

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

477

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

482

if ytcfg:

483

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

488

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

489

'identity token', default=None, fatal=False)

490

491

@staticmethod

492

def _extract_account_syncid(*args):

493

"""

494

Extract syncId required to download private playlists of secondary channels

495

@params response and/or ytcfg

496

"""

497

for data in args:

498

# ytcfg includes channel_syncid if on secondary channel

499

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

504

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

505

if len(sync_ids) >= 2 and sync_ids[1]:

506

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

507

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

512

"""

513

Extracts visitorData from an API response or ytcfg

514

Appears to be used to track session state

515

"""

516

return get_first(

517

args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),

expected_type=str)

@property

def is_authenticated(self):

522

return bool(self._generate_sapisidhash_header())

523

524

def extract_ytcfg(self, video_id, webpage):

525

if not webpage:

526

return {}

527

return self._parse_json(

528

self._search_regex(

529

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

530

default='{}'), video_id, fatal=False) or {}

531

532

def generate_api_headers(

533

self, *, ytcfg=None, account_syncid=None, session_index=None,

534

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

535

536

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

537

headers = {

538

'X-YouTube-Client-Name': compat_str(

539

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

540

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

541

'Origin': origin,

542

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

543

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

544

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

545

}

546

if session_index is None:

547

session_index = self._extract_session_index(ytcfg)

548

if account_syncid or session_index is not None:

549

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

550

551

auth = self._generate_sapisidhash_header(origin)

552

if auth is not None:

553

headers['Authorization'] = auth

554

headers['X-Origin'] = origin

555

return {h: v for h, v in headers.items() if v is not None}

556

557

@staticmethod

558

def _build_api_continuation_query(continuation, ctp=None):

559

query = {

560

'continuation': continuation

561

}

562

# TODO: Inconsistency with clickTrackingParams.

563

# Currently we have a fixed ctp contained within context (from ytcfg)

564

# and a ctp in root query for continuation.

565

if ctp:

566

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

571

next_continuation = try_get(

572

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

573

lambda x: x['continuation']['reloadContinuationData']), dict)

574

if not next_continuation:

575

return

576

continuation = next_continuation.get('continuation')

577

if not continuation:

578

return

579

ctp = next_continuation.get('clickTrackingParams')

580

return cls._build_api_continuation_query(continuation, ctp)

581

582

@classmethod

583

def _extract_continuation_ep_data(cls, continuation_ep: dict):

584

if isinstance(continuation_ep, dict):

585

continuation = try_get(

586

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

587

if not continuation:

588

return

589

ctp = continuation_ep.get('clickTrackingParams')

590

return cls._build_api_continuation_query(continuation, ctp)

591

592

@classmethod

593

def _extract_continuation(cls, renderer):

594

next_continuation = cls._extract_next_continuation_data(renderer)

595

if next_continuation:

596

return next_continuation

597

598

contents = []

599

for key in ('contents', 'items'):

600

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

601

602

for content in contents:

603

if not isinstance(content, dict):

604

continue

605

continuation_ep = try_get(

606

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

607

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

608

dict)

609

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

615

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

616

if not isinstance(alert_dict, dict):

617

continue

618

for alert in alert_dict.values():

619

alert_type = alert.get('type')

620

if not alert_type:

621

continue

622

message = cls._get_text(alert, 'text')

623

if message:

624

yield alert_type, message

625

626

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

627

errors = []

628

warnings = []

629

for alert_type, alert_message in alerts:

630

if alert_type.lower() == 'error' and fatal:

631

errors.append([alert_type, alert_message])

632

else:

633

warnings.append([alert_type, alert_message])

634

635

for alert_type, alert_message in (warnings + errors[:-1]):

636

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)

637

if errors:

638

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

639

640

def _extract_and_report_alerts(self, data, *args, **kwargs):

641

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

642

643

def _extract_badges(self, renderer: dict):

644

badges = set()

645

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

646

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

647

if label:

648

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

653

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

658

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

659

obj = [obj]

660

for item in obj:

661

text = try_get(item, lambda x: x['simpleText'], compat_str)

662

if text:

663

return text

664

runs = try_get(item, lambda x: x['runs'], list) or []

665

if not runs and isinstance(item, list):

666

runs = item

667

668

runs = runs[:min(len(runs), max_runs or len(runs))]

669

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

674

count_text = self._get_text(data, *path_list) or ''

675

count = parse_count(count_text)

676

if count is None:

677

count = str_to_int(

678

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

683

"""

684

Extract thumbnails from thumbnails dict

685

@param path_list: path list to level that contains 'thumbnails' key

686

"""

687

thumbnails = []

688

for path in path_list or [()]:

689

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

690

thumbnail_url = url_or_none(thumbnail.get('url'))

691

if not thumbnail_url:

692

continue

693

# Sometimes youtube gives a wrong thumbnail URL. See:

694

# https://github.com/yt-dlp/yt-dlp/issues/233

695

# https://github.com/ytdl-org/youtube-dl/issues/28023

696

if 'maxresdefault' in thumbnail_url:

697

thumbnail_url = thumbnail_url.split('?')[0]

698

thumbnails.append({

699

'url': thumbnail_url,

700

'height': int_or_none(thumbnail.get('height')),

701

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

707

"""

708

Extracts a relative time from string and converts to dt object

709

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

714

if start:

715

return datetime_from_str(start)

716

try:

717

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

722

text = self._get_text(renderer, *path_list) or ''

723

dt = self.extract_relative_time(text)

724

timestamp = None

725

if isinstance(dt, datetime.datetime):

726

timestamp = calendar.timegm(dt.timetuple())

727

728

if timestamp is None:

729

timestamp = (

730

unified_timestamp(text) or unified_timestamp(

731

self._search_regex(

732

(r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'), text.lower(), 'time text', default=None)))

733

734

if text and timestamp is None:

735

self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)

736

return timestamp, text

737

738

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

739

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

740

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

745

if check_get_keys is None:

746

check_get_keys = []

747

while count < retries:

748

count += 1

749

if last_error:

750

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

751

try:

752

response = self._call_api(

753

ep=ep, fatal=True, headers=headers,

754

video_id=item_id, query=query,

755

context=self._extract_context(ytcfg, default_client),

756

api_key=self._extract_api_key(ytcfg, default_client),

757

api_hostname=api_hostname, default_client=default_client,

758

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

759

except ExtractorError as e:

760

if isinstance(e.cause, network_exceptions):

761

if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):

762

e.cause.seek(0)

763

yt_error = try_get(

764

self._parse_json(e.cause.read().decode(), item_id, fatal=False),

765

lambda x: x['error']['message'], compat_str)

766

if yt_error:

767

self._report_alerts([('ERROR', yt_error)], fatal=False)

768

# Downloading page may result in intermittent 5xx HTTP error

769

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

770

# We also want to catch all other network exceptions since errors in later pages can be troublesome

771

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

772

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

773

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

785

except ExtractorError as e:

786

# YouTube servers may return errors we want to retry on in a 200 OK response

787

# See: https://github.com/yt-dlp/yt-dlp/issues/839

788

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

794

return

795

if not check_get_keys or dict_get(response, check_get_keys):

796

break

797

# Youtube sometimes sends incomplete data

798

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

799

last_error = 'Incomplete data received'

800

if count >= retries:

801

if fatal:

802

raise ExtractorError(last_error)

803

else:

804

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

810

return re.match(r'https?://music\.youtube\.com/', url) is not None

811

812

def _extract_video(self, renderer):

813

video_id = renderer.get('videoId')

814

title = self._get_text(renderer, 'title')

815

description = self._get_text(renderer, 'descriptionSnippet')

816

duration = parse_duration(self._get_text(

817

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

818

view_count = self._get_count(renderer, 'viewCountText')

819

820

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

821

channel_id = traverse_obj(

822

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)

823

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

824

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

825

overlay_style = traverse_obj(

826

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)

827

badges = self._extract_badges(renderer)

828

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

833

'id': video_id,

834

'url': f'https://www.youtube.com/watch?v={video_id}',

835

'title': title,

836

'description': description,

837

'duration': duration,

838

'view_count': view_count,

839

'uploader': uploader,

840

'channel_id': channel_id,

841

'thumbnails': thumbnails,

842

'upload_date': strftime_or_none(timestamp, '%Y%m%d'),

843

'live_status': ('is_upcoming' if scheduled_timestamp is not None

844

else 'was_live' if 'streamed' in time_text.lower()

845

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

846

else None),

847

'release_timestamp': scheduled_timestamp,

848

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

853

IE_DESC = 'YouTube'

854

_VALID_URL = r"""(?x)^

855

(

856

(?:https?://|//) # http(s):// or protocol-independent URL

857

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

858

(?:www\.)?deturl\.com/www\.youtube\.com|

859

(?:www\.)?pwnyoutube\.com|

860

(?:www\.)?hooktube\.com|

861

(?:www\.)?yourepeat\.com|

862

tube\.majestyc\.net|

863

%(invidious)s|

864

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

865

(?:.*?\#/)? # handle anchor (#/) redirect urls

866

(?: # the various things that can precede the ID:

867

(?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/

868

|(?: # or the v= param in all its forms

869

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

870

(?:\?|\#!?) # the params delimiter ? or # or #!

871

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

877

vid\.plus| # or vid.plus/xxxx

878

zwearz\.com/watch| # or zwearz.com/watch/xxxx

879

%(invidious)s

880

)/

881

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

882

)

883

)? # all until now is optional -> you can pass the naked ID

884

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

885

(?(1).+)? # if we found the ID, everything can follow

886

(?:\#|$)""" % {

887

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

888

}

889

_PLAYER_INFO_RE = (

890

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

891

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

892

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

893

)

894

_formats = {

895

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

896

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

897

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

898

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

899

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

900

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

901

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

902

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

903

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

904

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

905

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

906

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

907

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

908

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

909

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

910

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

911

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

912

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

917

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

918

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

919

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

920

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

921

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

922

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

923

924

# Apple HTTP Live Streaming

925

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

926

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

927

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

928

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

929

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

930

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

931

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

932

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

933

934

# DASH mp4 video

935

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

936

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

937

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

938

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

939

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

940

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

941

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

942

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

943

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

944

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

945

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

946

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

947

948

# Dash mp4 audio

949

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

950

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

951

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

952

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

953

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

954

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

955

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

956

957

# Dash webm

958

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

959

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

960

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

961

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

962

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

963

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

964

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

965

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

966

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

967

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

968

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

969

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

970

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

971

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

972

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

973

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

974

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

975

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

976

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

977

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

978

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

979

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

980

981

# Dash webm audio

982

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

983

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

984

985

# Dash webm audio with opus inside

986

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

987

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

988

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

989

990

# RTMP (unnamed)

991

'_rtmp': {'protocol': 'rtmp'},

992

993

# av01 video only formats sometimes served with "unknown" codecs

994

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

995

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

996

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

997

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

998

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

999

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1000

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1001

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1002

}

1003

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1015

'uploader': 'Philipp Hagemeister',

1016

'uploader_id': 'phihag',

1017

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1018

'channel': 'Philipp Hagemeister',

1019

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1020

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1021

'upload_date': '20121002',

1022

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1023

'categories': ['Science & Technology'],

1024

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1029

'playable_in_embed': True,

1030

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1031

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1039

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1044

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1045

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1046

'uploader': 'SET India',

1047

'uploader_id': 'setindia',

1048

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1049

'age_limit': 18,

1050

},

1051

'skip': 'Private video',

1052

},

1053

{

1054

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1055

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1060

'uploader': 'Philipp Hagemeister',

1061

'uploader_id': 'phihag',

1062

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1063

'channel': 'Philipp Hagemeister',

1064

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1065

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1066

'upload_date': '20121002',

1067

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1068

'categories': ['Science & Technology'],

1069

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1074

'playable_in_embed': True,

1075

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1076

'live_status': 'not_live',

'age_limit': 0,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1085

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1090

'uploader_id': '8KVIDEO',

1091

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1092

'description': '',

1093

'uploader': '8KVIDEO',

1094

'title': 'UHDTV TEST 8K VIDEO.mp4'

1095

},

1096

'params': {

1097

'youtube_include_dash_manifest': True,

1098

'format': '141',

1099

},

1100

'skip': 'format 141 not served anymore',

1101

},

1102

# DASH manifest with encrypted signature

1103

{

1104

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1109

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1110

'duration': 244,

1111

'uploader': 'AfrojackVEVO',

1112

'uploader_id': 'AfrojackVEVO',

1113

'upload_date': '20131011',

1114

'abr': 129.495,

1115

'like_count': int,

1116

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1117

'playable_in_embed': True,

1118

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1119

'view_count': int,

1120

'track': 'The Spark',

1121

'live_status': 'not_live',

1122

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1123

'channel': 'Afrojack',

1124

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1125

'tags': 'count:19',

1126

'availability': 'public',

1127

'categories': ['Music'],

1128

'age_limit': 0,

1129

'alt_title': 'The Spark',

1130

},

1131

'params': {

1132

'youtube_include_dash_manifest': True,

1133

'format': '141/bestaudio[ext=m4a]',

1134

},

1135

},

1136

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1137

{

1138

'note': 'Embed allowed age-gate video',

1139

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1144

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1145

'duration': 142,

1146

'uploader': 'The Witcher',

1147

'uploader_id': 'WitcherGame',

1148

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1149

'upload_date': '20140605',

1150

'age_limit': 18,

1151

'categories': ['Gaming'],

1152

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1153

'availability': 'needs_auth',

1154

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1155

'like_count': int,

1156

'channel': 'The Witcher',

1157

'live_status': 'not_live',

1158

'tags': 'count:17',

1159

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1160

'playable_in_embed': True,

'view_count': int,

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1166

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1171

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1172

'upload_date': '20200408',

1173

'uploader_id': 'FlyingKitty900',

1174

'uploader': 'FlyingKitty',

1175

'age_limit': 18,

1176

'availability': 'needs_auth',

1177

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1178

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1179

'channel': 'FlyingKitty',

1180

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1181

'view_count': int,

1182

'categories': ['Entertainment'],

1183

'live_status': 'not_live',

1184

'tags': ['Flyingkitty', 'godzilla 2'],

1185

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1186

'like_count': int,

1187

'duration': 177,

1188

'playable_in_embed': True,

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1193

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1194

'info_dict': {

1195

'id': 'Tq92D6wQ1mg',

1196

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1197

'ext': 'mp4',

1198

'upload_date': '20191227',

1199

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1200

'uploader': 'Projekt Melody',

1201

'description': 'md5:17eccca93a786d51bc67646756894066',

1202

'age_limit': 18,

1203

'like_count': int,

1204

'availability': 'needs_auth',

1205

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1206

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1207

'view_count': int,

1208

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1209

'channel': 'Projekt Melody',

1210

'live_status': 'not_live',

1211

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1212

'playable_in_embed': True,

1213

'categories': ['Entertainment'],

1214

'duration': 106,

1215

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

},

},

{

'note': 'Non-Agegated non-embeddable video',

1220

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1225

'uploader': 'Herr Lurik',

1226

'uploader_id': 'st3in234',

1227

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1228

'upload_date': '20130730',

1229

'track': 'Such mich find mich',

1230

'age_limit': 0,

1231

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1232

'like_count': int,

1233

'playable_in_embed': False,

1234

'creator': 'OOMPH!',

1235

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1236

'view_count': int,

1237

'alt_title': 'Such mich find mich',

1238

'duration': 210,

1239

'channel': 'Herr Lurik',

1240

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1241

'categories': ['Music'],

1242

'availability': 'public',

1243

'uploader_url': 'http://www.youtube.com/user/st3in234',

1244

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1245

'live_status': 'not_live',

'artist': 'OOMPH!',

},

},

{

'note': 'Non-bypassable age-gated video',

1251

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1252

'only_matching': True,

1253

},

1254

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1255

# YouTube Red ad is not captured for creator

1256

{

1257

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1263

'uploader_id': 'deadmau5',

1264

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1265

'creator': 'deadmau5',

1266

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1267

'uploader': 'deadmau5',

1268

'title': 'Deadmau5 - Some Chords (HD)',

1269

'alt_title': 'Some Chords',

1270

'availability': 'public',

1271

'tags': 'count:14',

1272

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1273

'view_count': int,

1274

'live_status': 'not_live',

1275

'channel': 'deadmau5',

1276

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1277

'like_count': int,

1278

'track': 'Some Chords',

1279

'artist': 'deadmau5',

1280

'playable_in_embed': True,

1281

'age_limit': 0,

1282

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1283

'categories': ['Music'],

1284

'album': 'Some Chords',

1285

},

1286

'expected_warnings': [

1287

'DASH manifest missing',

1288

]

1289

},

1290

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1291

{

1292

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1298

'uploader_id': 'olympic',

1299

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1300

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1301

'uploader': 'Olympics',

1302

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1303

'like_count': int,

1304

'release_timestamp': 1343767800,

1305

'playable_in_embed': True,

1306

'categories': ['Sports'],

1307

'release_date': '20120731',

1308

'channel': 'Olympics',

1309

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1310

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1311

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1312

'age_limit': 0,

1313

'availability': 'public',

1314

'live_status': 'was_live',

1315

'view_count': int,

1316

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1317

},

1318

'params': {

1319

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1329

'duration': 85,

1330

'upload_date': '20110310',

1331

'uploader_id': 'AllenMeow',

1332

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1333

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1334

'uploader': '孫ᄋᄅ',

1335

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1336

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1341

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1342

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1343

'view_count': int,

1344

'categories': ['People & Blogs'],

1345

'like_count': int,

1346

'live_status': 'not_live',

1347

'availability': 'unlisted',

1348

},

1349

},

1350

# url_encoded_fmt_stream_map is empty string

1351

{

1352

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1357

'description': '',

1358

'upload_date': '20150404',

1359

'uploader_id': 'spbelect',

1360

'uploader': 'Наблюдатели Петербурга',

1361

},

1362

'params': {

1363

'skip_download': 'requires avconv',

1364

},

1365

'skip': 'This live event has ended.',

1366

},

1367

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1368

{

1369

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1374

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1375

'duration': 220,

1376

'upload_date': '20150625',

1377

'uploader_id': 'dorappi2000',

1378

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1379

'uploader': 'dorappi2000',

1380

'formats': 'mincount:31',

1381

},

1382

'skip': 'not actual anymore',

1383

},

1384

# DASH manifest with segment_list

1385

{

1386

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1387

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1392

'uploader': 'Airtek',

1393

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1394

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1395

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1396

},

1397

'params': {

1398

'youtube_include_dash_manifest': True,

1399

'format': '135', # bestvideo

1400

},

1401

'skip': 'This live event has ended.',

1402

},

1403

{

1404

# Multifeed videos (multiple cameras), URL is for Main Camera

1405

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1406

'info_dict': {

1407

'id': 'jvGDaLqkpTg',

1408

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1409

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1416

'description': 'md5:e03b909557865076822aa169218d6a5d',

1417

'duration': 10643,

1418

'upload_date': '20161111',

1419

'uploader': 'Team PGP',

1420

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1421

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1428

'description': 'md5:e03b909557865076822aa169218d6a5d',

1429

'duration': 10991,

1430

'upload_date': '20161111',

1431

'uploader': 'Team PGP',

1432

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1433

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1440

'description': 'md5:e03b909557865076822aa169218d6a5d',

1441

'duration': 10995,

1442

'upload_date': '20161111',

1443

'uploader': 'Team PGP',

1444

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1445

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1452

'description': 'md5:e03b909557865076822aa169218d6a5d',

1453

'duration': 10990,

1454

'upload_date': '20161111',

1455

'uploader': 'Team PGP',

1456

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1457

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1462

},

1463

'skip': 'Not multifeed anymore',

1464

},

1465

{

1466

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1467

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1468

'info_dict': {

1469

'id': 'gVfLd0zydlo',

1470

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1471

},

1472

'playlist_count': 2,

1473

'skip': 'Not multifeed anymore',

1474

},

1475

{

1476

'url': 'https://vid.plus/FlRa-iH7PGw',

1477

'only_matching': True,

1478

},

1479

{

1480

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1481

'only_matching': True,

1482

},

1483

{

1484

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1485

# Also tests cut-off URL expansion in video description (see

1486

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1487

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1488

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1493

'alt_title': 'Dark Walk',

1494

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1495

'duration': 133,

1496

'upload_date': '20151119',

1497

'uploader_id': 'IronSoulElf',

1498

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1499

'uploader': 'IronSoulElf',

1500

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1501

'track': 'Dark Walk',

1502

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1503

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1504

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1505

'categories': ['Film & Animation'],

1506

'view_count': int,

1507

'live_status': 'not_live',

1508

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1509

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1510

'tags': 'count:13',

1511

'availability': 'public',

1512

'channel': 'IronSoulElf',

1513

'playable_in_embed': True,

'like_count': int,

'age_limit': 0,

},

'params': {

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1523

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1524

'only_matching': True,

1525

},

1526

{

1527

# Video with yt:stretch=17:0

1528

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1533

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1534

'upload_date': '20151107',

1535

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1536

'uploader': 'CH GAMER DROID',

1537

},

1538

'params': {

1539

'skip_download': True,

1540

},

1541

'skip': 'This video does not exist.',

1542

},

1543

{

1544

# Video with incomplete 'yt:stretch=16:'

1545

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1546

'only_matching': True,

1547

},

1548

{

1549

# Video licensed under Creative Commons

1550

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1555

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1556

'duration': 721,

1557

'upload_date': '20150127',

1558

'uploader_id': 'BerkmanCenter',

1559

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1560

'uploader': 'The Berkman Klein Center for Internet & Society',

1561

'license': 'Creative Commons Attribution license (reuse allowed)',

1562

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1563

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1564

'like_count': int,

1565

'age_limit': 0,

1566

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1567

'channel': 'The Berkman Klein Center for Internet & Society',

1568

'availability': 'public',

1569

'view_count': int,

1570

'categories': ['Education'],

1571

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1572

'live_status': 'not_live',

1573

'playable_in_embed': True,

1574

},

1575

'params': {

1576

'skip_download': True,

},

},

{

# Channel-like uploader_url

1581

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1586

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1587

'duration': 4060,

1588

'upload_date': '20151119',

1589

'uploader': 'Bernie Sanders',

1590

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1591

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1592

'license': 'Creative Commons Attribution license (reuse allowed)',

1593

'playable_in_embed': True,

1594

'tags': 'count:12',

1595

'like_count': int,

1596

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1597

'age_limit': 0,

1598

'availability': 'public',

1599

'categories': ['News & Politics'],

1600

'channel': 'Bernie Sanders',

1601

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1602

'view_count': int,

1603

'live_status': 'not_live',

1604

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1605

},

1606

'params': {

1607

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1612

'only_matching': True,

1613

},

1614

{

1615

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1616

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1617

'only_matching': True,

1618

},

1619

{

1620

# Rental video preview

1621

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1626

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1627

'upload_date': '20150811',

1628

'uploader': 'FlixMatrix',

1629

'uploader_id': 'FlixMatrixKaravan',

1630

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1631

'license': 'Standard YouTube License',

1632

},

1633

'params': {

1634

'skip_download': True,

1635

},

1636

'skip': 'This video is not available.',

1637

},

1638

{

1639

# YouTube Red video with episode data

1640

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1645

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1646

'duration': 2085,

1647

'upload_date': '20170118',

1648

'uploader': 'Vsauce',

1649

'uploader_id': 'Vsauce',

1650

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1651

'series': 'Mind Field',

1652

'season_number': 1,

1653

'episode_number': 1,

1654

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1655

'tags': 'count:12',

1656

'view_count': int,

1657

'availability': 'public',

1658

'age_limit': 0,

1659

'channel': 'Vsauce',

1660

'episode': 'Episode 1',

1661

'categories': ['Entertainment'],

1662

'season': 'Season 1',

1663

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1664

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1665

'like_count': int,

1666

'playable_in_embed': True,

1667

'live_status': 'not_live',

1668

},

1669

'params': {

1670

'skip_download': True,

1671

},

1672

'expected_warnings': [

1673

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1678

# as inappropriate or offensive to some audiences.

1679

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1684

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1685

'duration': 965,

1686

'upload_date': '20140124',

1687

'uploader': 'New Century Foundation',

1688

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1689

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1690

},

1691

'params': {

1692

'skip_download': True,

1693

},

1694

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1699

'only_matching': True,

1700

},

1701

{

1702

# geo restricted to JP

1703

'url': 'sJL6WA-aGkQ',

1704

'only_matching': True,

1705

},

1706

{

1707

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1708

'only_matching': True,

1709

},

1710

{

1711

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1712

'only_matching': True,

1713

},

1714

{

1715

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1716

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1717

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1722

'only_matching': True,

1723

},

1724

{

1725

# Video with unsupported adaptive stream type formats

1726

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1731

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1732

'duration': 433,

1733

'upload_date': '20130923',

1734

'uploader': 'Amelia Putri Harwita',

1735

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1736

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1737

'formats': 'maxcount:10',

1738

},

1739

'params': {

1740

'skip_download': True,

1741

'youtube_include_dash_manifest': False,

1742

},

1743

'skip': 'not actual anymore',

1744

},

1745

{

1746

# Youtube Music Auto-generated description

1747

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1752

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1753

'upload_date': '20190312',

1754

'uploader': 'Stephen - Topic',

1755

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1756

'artist': 'Stephen',

1757

'track': 'Voyeur Girl',

1758

'album': 'it\'s too much love to know my dear',

1759

'release_date': '20190313',

1760

'release_year': 2019,

1761

'alt_title': 'Voyeur Girl',

1762

'view_count': int,

1763

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1764

'playable_in_embed': True,

1765

'like_count': int,

1766

'categories': ['Music'],

1767

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1768

'channel': 'Stephen',

1769

'availability': 'public',

1770

'creator': 'Stephen',

1771

'duration': 169,

1772

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1773

'age_limit': 0,

1774

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1775

'tags': 'count:11',

1776

'live_status': 'not_live',

1777

},

1778

'params': {

1779

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1784

'only_matching': True,

1785

},

1786

{

1787

# invalid -> valid video id redirection

1788

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1793

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1794

'upload_date': '20090125',

1795

'uploader': 'Prochorowka',

1796

'uploader_id': 'Prochorowka',

1797

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1798

'artist': 'Panjabi MC',

1799

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1800

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1801

},

1802

'params': {

1803

'skip_download': True,

1804

},

1805

'skip': 'Video unavailable',

1806

},

1807

{

1808

# empty description results in an empty string

1809

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1816

'uploader_id': 'ElevageOrVert',

1817

'uploader': 'ElevageOrVert',

1818

'view_count': int,

1819

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1820

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1821

'like_count': int,

1822

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1823

'tags': [],

1824

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1825

'availability': 'public',

1826

'age_limit': 0,

1827

'categories': ['Pets & Animals'],

1828

'duration': 7,

1829

'playable_in_embed': True,

1830

'live_status': 'not_live',

1831

'channel': 'ElevageOrVert',

1832

},

1833

'params': {

1834

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1839

# see [2] for an example with '};' inside ytInitialPlayerResponse

1840

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1841

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1842

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1847

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1848

'upload_date': '20130831',

1849

'uploader_id': 'kudvenkat',

1850

'uploader': 'kudvenkat',

1851

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1852

'like_count': int,

1853

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1854

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1855

'live_status': 'not_live',

1856

'categories': ['Education'],

1857

'availability': 'public',

1858

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1859

'tags': 'count:12',

1860

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1865

},

1866

'params': {

1867

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1872

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1873

'only_matching': True,

1874

},

1875

{

1876

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1877

'only_matching': True,

1878

},

1879

{

1880

# https://github.com/ytdl-org/youtube-dl/pull/28094

1881

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1887

'upload_date': '20141120',

1888

'uploader': 'The Cinematic Orchestra - Topic',

1889

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1890

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1891

'artist': 'The Cinematic Orchestra',

1892

'track': 'Burn Out',

1893

'album': 'Every Day',

1894

'like_count': int,

1895

'live_status': 'not_live',

1896

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1901

'creator': 'The Cinematic Orchestra',

1902

'channel': 'The Cinematic Orchestra',

1903

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1904

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1905

'availability': 'public',

1906

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1907

'categories': ['Music'],

1908

'playable_in_embed': True,

1909

},

1910

'params': {

1911

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1916

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1917

'only_matching': True,

1918

},

1919

{

1920

# controversial video, requires bpctr/contentCheckOk

1921

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1926

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1927

'uploader': 'CBS Mornings',

1928

'uploader_id': 'CBSThisMorning',

1929

'upload_date': '20140716',

1930

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1931

'duration': 170,

1932

'categories': ['News & Politics'],

1933

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1934

'view_count': int,

1935

'channel': 'CBS Mornings',

1936

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1937

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1938

'age_limit': 18,

1939

'availability': 'needs_auth',

1940

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1941

'like_count': int,

1942

'live_status': 'not_live',

1943

'playable_in_embed': True,

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1948

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1953

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1954

'upload_date': '20201120',

1955

'uploader': 'Walk around Japan',

1956

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1957

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1958

'duration': 1456,

1959

'categories': ['Travel & Events'],

1960

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1961

'view_count': int,

1962

'channel': 'Walk around Japan',

1963

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

1964

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

1965

'age_limit': 0,

1966

'availability': 'public',

1967

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1968

'live_status': 'not_live',

1969

'playable_in_embed': True,

1970

},

1971

'params': {

1972

'skip_download': True,

1973

},

1974

}, {

1975

# Has multiple audio streams

1976

'url': 'WaOKSUlf4TM',

1977

'only_matching': True

1978

}, {

1979

# Requires Premium: has format 141 when requested using YTM url

1980

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

1981

'only_matching': True

1982

}, {

1983

# multiple subtitles with same lang_code

1984

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

1985

'only_matching': True,

1986

}, {

1987

# Force use android client fallback

1988

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

1989

'info_dict': {

1990

'id': 'YOelRv7fMxY',

1991

'title': 'DIGGING A SECRET TUNNEL Part 1',

1992

'ext': '3gp',

1993

'upload_date': '20210624',

1994

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

1995

'uploader': 'colinfurze',

1996

'uploader_id': 'colinfurze',

1997

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

1998

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

1999

'duration': 596,

2000

'categories': ['Entertainment'],

2001

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2002

'view_count': int,

2003

'channel': 'colinfurze',

2004

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2005

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2006

'age_limit': 0,

2007

'availability': 'public',

2008

'like_count': int,

2009

'live_status': 'not_live',

2010

'playable_in_embed': True,

2011

},

2012

'params': {

2013

'format': '17', # 3gp format available on android

2014

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2019

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2020

'only_matching': True,

2021

'params': {

2022

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2027

'only_matching': True,

2028

}, {

2029

'note': 'Storyboards',

2030

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2036

'uploader_id': 'scishow',

2037

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2038

'upload_date': '20140324',

2039

'uploader': 'SciShow',

2040

'like_count': int,

2041

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2042

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2043

'view_count': int,

2044

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2045

'playable_in_embed': True,

2046

'tags': 'count:12',

2047

'uploader_url': 'http://www.youtube.com/user/scishow',

2048

'availability': 'public',

2049

'channel': 'SciShow',

2050

'live_status': 'not_live',

2051

'duration': 248,

2052

'categories': ['Education'],

2053

'age_limit': 0,

2054

}, 'params': {'format': 'mhtml', 'skip_download': True}

}

]

@classmethod

def suitable(cls, url):

2060

from ..utils import parse_qs

2061

2062

qs = parse_qs(url)

2063

if qs.get('list', [None])[0]:

2064

return False

2065

return super(YoutubeIE, cls).suitable(url)

2066

2067

def __init__(self, *args, **kwargs):

2068

super(YoutubeIE, self).__init__(*args, **kwargs)

2069

self._code_cache = {}

2070

self._player_cache = {}

2071

2072

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2073

lock = threading.Lock()

2074

2075

is_live = True

2076

start_time = time.time()

2077

formats = [f for f in formats if f.get('is_from_start')]

2078

2079

def refetch_manifest(format_id, delay):

2080

nonlocal formats, start_time, is_live

2081

if time.time() <= start_time + delay:

2082

return

2083

2084

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2085

video_details = traverse_obj(

2086

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2087

microformats = traverse_obj(

2088

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2089

expected_type=dict, default=[])

2090

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2091

start_time = time.time()

2092

2093

def mpd_feed(format_id, delay):

2094

"""

2095

@returns (manifest_url, manifest_stream_number, is_live) or None

2096

"""

2097

with lock:

2098

refetch_manifest(format_id, delay)

2099

2100

f = next((f for f in formats if f['format_id'] == format_id), None)

2101

if not f:

2102

if not is_live:

2103

self.to_screen(f'{video_id}: Video is no longer live')

2104

else:

2105

self.report_warning(

2106

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2107

return None

2108

return f['manifest_url'], f['manifest_stream_number'], is_live

2109

2110

for f in formats:

2111

f['protocol'] = 'http_dash_segments_generator'

2112

f['fragments'] = functools.partial(

2113

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2114

2115

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2116

FETCH_SPAN, MAX_DURATION = 5, 432000

2117

2118

mpd_url, stream_number, is_live = None, None, True

2119

2120

begin_index = 0

2121

download_start_time = ctx.get('start') or time.time()

2122

2123

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2124

if lack_early_segments:

2125

self.report_warning(bug_reports_message(

2126

'Starting download from the last 120 hours of the live stream since '

2127

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2128

lack_early_segments = True

2129

2130

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2131

fragments, fragment_base_url = None, None

2132

2133

def _extract_sequence_from_mpd(refresh_sequence):

2134

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2135

# Obtain from MPD's maximum seq value

2136

old_mpd_url = mpd_url

2137

last_error = ctx.pop('last_error', None)

2138

expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2139

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2140

or (mpd_url, stream_number, False))

2141

if not refresh_sequence:

2142

if expire_fast and not is_live:

2143

return False, last_seq

2144

elif old_mpd_url == mpd_url:

2145

return True, last_seq

2146

try:

2147

fmts, _ = self._extract_mpd_formats_and_subtitles(

2148

mpd_url, None, note=False, errnote=False, fatal=False)

2149

except ExtractorError:

2150

fmts = None

2151

if not fmts:

2152

no_fragment_score += 1

2153

return False, last_seq

2154

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2155

fragments = fmt_info['fragments']

2156

fragment_base_url = fmt_info['fragment_base_url']

2157

assert fragment_base_url

2158

2159

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2160

return True, _last_seq

2161

2162

while is_live:

2163

fetch_time = time.time()

2164

if no_fragment_score > 30:

2165

return

2166

if last_segment_url:

2167

# Obtain from "X-Head-Seqnum" header value from each segment

2168

try:

2169

urlh = self._request_webpage(

2170

last_segment_url, None, note=False, errnote=False, fatal=False)

2171

except ExtractorError:

2172

urlh = None

2173

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2174

if last_seq is None:

2175

no_fragment_score += 1

2176

last_segment_url = None

2177

continue

2178

else:

2179

should_continue, last_seq = _extract_sequence_from_mpd(True)

2180

if not should_continue:

2181

continue

2182

2183

if known_idx > last_seq:

2184

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2190

# skip from the start when it's negative value

2191

known_idx = last_seq + begin_index

2192

if lack_early_segments:

2193

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2194

try:

2195

for idx in range(known_idx, last_seq):

2196

# do not update sequence here or you'll get skipped some part of it

2197

should_continue, _ = _extract_sequence_from_mpd(False)

2198

if not should_continue:

2199

known_idx = idx - 1

2200

raise ExtractorError('breaking out of outer loop')

2201

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2202

yield {

2203

'url': last_segment_url,

2204

}

2205

if known_idx == last_seq:

2206

no_fragment_score += 5

2207

else:

2208

no_fragment_score = 0

2209

known_idx = last_seq

2210

except ExtractorError:

2211

continue

2212

2213

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2214

2215

def _extract_player_url(self, *ytcfgs, webpage=None):

2216

player_url = traverse_obj(

2217

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2218

get_all=False, expected_type=compat_str)

2219

if not player_url:

2220

return

2221

if player_url.startswith('//'):

2222

player_url = 'https:' + player_url

2223

elif not re.match(r'https?://', player_url):

2224

player_url = compat_urlparse.urljoin(

2225

'https://www.youtube.com', player_url)

2226

return player_url

2227

2228

def _download_player_url(self, video_id, fatal=False):

2229

res = self._download_webpage(

2230

'https://www.youtube.com/iframe_api',

2231

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2232

if res:

2233

player_version = self._search_regex(

2234

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2235

if player_version:

2236

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2237

2238

def _signature_cache_id(self, example_sig):

2239

""" Return a string representation of a signature """

2240

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2241

2242

@classmethod

2243

def _extract_player_info(cls, player_url):

2244

for player_re in cls._PLAYER_INFO_RE:

2245

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2250

return id_m.group('id')

2251

2252

def _load_player(self, video_id, player_url, fatal=True):

2253

player_id = self._extract_player_info(player_url)

2254

if player_id not in self._code_cache:

2255

code = self._download_webpage(

2256

player_url, video_id, fatal=fatal,

2257

note='Downloading player ' + player_id,

2258

errnote='Download of %s failed' % player_url)

2259

if code:

2260

self._code_cache[player_id] = code

2261

return self._code_cache.get(player_id)

2262

2263

def _extract_signature_function(self, video_id, player_url, example_sig):

2264

player_id = self._extract_player_info(player_url)

2265

2266

# Read from filesystem cache

2267

func_id = 'js_%s_%s' % (

2268

player_id, self._signature_cache_id(example_sig))

2269

assert os.path.basename(func_id) == func_id

2270

2271

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2272

if cache_spec is not None:

2273

return lambda s: ''.join(s[i] for i in cache_spec)

2274

2275

code = self._load_player(video_id, player_url)

2276

if code:

2277

res = self._parse_sig_js(code)

2278

2279

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2280

cache_res = res(test_string)

2281

cache_spec = [ord(c) for c in cache_res]

2282

2283

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2284

return res

2285

2286

def _print_sig_code(self, func, example_sig):

2287

if not self.get_param('youtube_print_sig_code'):

2288

return

2289

2290

def gen_sig_code(idxs):

2291

def _genslice(start, end, step):

2292

starts = '' if start == 0 else str(start)

2293

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2294

steps = '' if step == 1 else (':%d' % step)

2295

return 's[%s%s%s]' % (starts, ends, steps)

2296

2297

step = None

2298

# Quelch pyflakes warnings - start will be set when step is set

2299

start = '(Never used)'

2300

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2305

step = None

2306

continue

2307

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2317

2318

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2319

cache_res = func(test_string)

2320

cache_spec = [ord(c) for c in cache_res]

2321

expr_code = ' + '.join(gen_sig_code(cache_spec))

2322

signature_id_tuple = '(%s)' % (

2323

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2324

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2325

' return %s\n') % (signature_id_tuple, expr_code)

2326

self.to_screen('Extracted signature function:\n' + code)

2327

2328

def _parse_sig_js(self, jscode):

2329

funcname = self._search_regex(

2330

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2331

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2332

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2333

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2334

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2335

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2336

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2337

# Obsolete patterns

2338

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2339

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2340

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2341

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2342

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2343

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2344

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2345

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2346

jscode, 'Initial JS player signature function name', group='sig')

2347

2348

jsi = JSInterpreter(jscode)

2349

initial_function = jsi.extract_function(funcname)

2350

return lambda s: initial_function([s])

2351

2352

def _decrypt_signature(self, s, video_id, player_url):

2353

"""Turn the encrypted s field into a working signature"""

2354

2355

if player_url is None:

2356

raise ExtractorError('Cannot decrypt signature without player_url')

2357

2358

try:

2359

player_id = (player_url, self._signature_cache_id(s))

2360

if player_id not in self._player_cache:

2361

func = self._extract_signature_function(

2362

video_id, player_url, s

2363

)

2364

self._player_cache[player_id] = func

2365

func = self._player_cache[player_id]

2366

self._print_sig_code(func, s)

2367

return func(s)

2368

except Exception as e:

2369

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2370

2371

def _decrypt_nsig(self, s, video_id, player_url):

2372

"""Turn the encrypted n field into a working signature"""

2373

if player_url is None:

2374

raise ExtractorError('Cannot decrypt nsig without player_url')

2375

if player_url.startswith('//'):

2376

player_url = 'https:' + player_url

2377

elif not re.match(r'https?://', player_url):

2378

player_url = compat_urlparse.urljoin(

2379

'https://www.youtube.com', player_url)

2380

2381

sig_id = ('nsig_value', s)

2382

if sig_id in self._player_cache:

2383

return self._player_cache[sig_id]

2384

2385

try:

2386

player_id = ('nsig', player_url)

2387

if player_id not in self._player_cache:

2388

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2389

func = self._player_cache[player_id]

2390

self._player_cache[sig_id] = func(s)

2391

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2392

return self._player_cache[sig_id]

2393

except Exception as e:

2394

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2395

2396

def _extract_n_function_name(self, jscode):

2397

return self._search_regex(

2398

(r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]$',),

2399

jscode, 'Initial JS player n function name', group='nfunc')

2400

2401

def _extract_n_function(self, video_id, player_url):

2402

player_id = self._extract_player_info(player_url)

2403

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2404

2405

if func_code:

2406

jsi = JSInterpreter(func_code)

2407

else:

2408

jscode = self._load_player(video_id, player_url)

2409

funcname = self._extract_n_function_name(jscode)

2410

jsi = JSInterpreter(jscode)

2411

func_code = jsi.extract_function_code(funcname)

2412

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2413

2414

if self.get_param('youtube_print_sig_code'):

2415

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2416

2417

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2418

2419

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2420

"""

2421

Extract signatureTimestamp (sts)

2422

Required to tell API what sig/player version is in use.

2423

"""

2424

sts = None

2425

if isinstance(ytcfg, dict):

2426

sts = int_or_none(ytcfg.get('STS'))

2427

2428

if not sts:

2429

# Attempt to extract from player

2430

if player_url is None:

2431

error_msg = 'Cannot extract signature timestamp without player_url.'

2432

if fatal:

2433

raise ExtractorError(error_msg)

2434

self.report_warning(error_msg)

2435

return

2436

code = self._load_player(video_id, player_url, fatal=fatal)

2437

if code:

2438

sts = int_or_none(self._search_regex(

2439

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2440

'JS player signature timestamp', group='sts', fatal=fatal))

2441

return sts

2442

2443

def _mark_watched(self, video_id, player_responses):

2444

playback_url = get_first(

2445

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2446

expected_type=url_or_none)

2447

if not playback_url:

2448

self.report_warning('Unable to mark watched')

2449

return

2450

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2451

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2452

2453

# cpn generation algorithm is reverse engineered from base.js.

2454

# In fact it works even with dummy cpn.

2455

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2456

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2463

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2464

2465

self._download_webpage(

2466

playback_url, video_id, 'Marking watched',

2467

'Unable to mark watched', fatal=False)

2468

2469

@staticmethod

2470

def _extract_urls(webpage):

2471

# Embedded YouTube player

2472

entries = [

2473

unescapeHTML(mobj.group('url'))

2474

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2485

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2486

\1''', webpage)]

2487

2488

# lazyYT YouTube embed

2489

entries.extend(list(map(

2490

unescapeHTML,

2491

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2492

2493

# Wordpress "YouTube Video Importer" plugin

2494

matches = re.findall(r'''(?x)<div[^>]+

2495

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2496

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2497

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2503

urls = YoutubeIE._extract_urls(webpage)

2504

return urls[0] if urls else None

2505

2506

@classmethod

2507

def extract_id(cls, url):

2508

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2509

if mobj is None:

2510

raise ExtractorError('Invalid URL: %s' % url)

2511

return mobj.group('id')

2512

2513

def _extract_chapters_from_json(self, data, duration):

2514

chapter_list = traverse_obj(

2515

data, (

2516

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2517

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2518

), expected_type=list)

2519

2520

return self._extract_chapters(

2521

chapter_list,

2522

chapter_time=lambda chapter: float_or_none(

2523

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2524

chapter_title=lambda chapter: traverse_obj(

2525

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2526

duration=duration)

2527

2528

def _extract_chapters_from_engagement_panel(self, data, duration):

2529

content_list = traverse_obj(

2530

data,

2531

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2532

expected_type=list, default=[])

2533

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2534

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2539

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2540

chapter_time, chapter_title, duration)

2541

for contents in content_list

2542

))), [])

2543

2544

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2545

chapters = []

2546

last_chapter = {'start_time': 0}

2547

for idx, chapter in enumerate(chapter_list or []):

2548

title = chapter_title(chapter)

2549

start_time = chapter_time(chapter)

2550

if start_time is None:

2551

continue

2552

last_chapter['end_time'] = start_time

2553

if start_time < last_chapter['start_time']:

2554

if idx == 1:

2555

chapters.pop()

2556

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2557

else:

2558

self.report_warning(f'Invalid start time for chapter "{title}"')

2559

continue

2560

last_chapter = {'start_time': start_time, 'title': title}

2561

chapters.append(last_chapter)

2562

last_chapter['end_time'] = duration

2563

return chapters

2564

2565

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2566

return self._parse_json(self._search_regex(

2567

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

2568

regex), webpage, name, default='{}'), video_id, fatal=False)

2569

2570

def _extract_comment(self, comment_renderer, parent=None):

2571

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2576

2577

# note: timestamp is an estimate calculated from the current time and time_text

2578

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2579

author = self._get_text(comment_renderer, 'authorText')

2580

author_id = try_get(comment_renderer,

2581

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2582

2583

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2584

lambda x: x['likeCount']), compat_str)) or 0

2585

author_thumbnail = try_get(comment_renderer,

2586

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2587

2588

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2589

is_favorited = 'creatorHeart' in (try_get(

2590

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2595

'time_text': time_text,

2596

'like_count': votes,

2597

'is_favorited': is_favorited,

2598

'author': author,

2599

'author_id': author_id,

2600

'author_thumbnail': author_thumbnail,

2601

'author_is_uploader': author_is_uploader,

2602

'parent': parent or 'root'

2603

}

2604

2605

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2606

2607

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2608

2609

def extract_header(contents):

2610

_continuation = None

2611

for content in contents:

2612

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2613

expected_comment_count = self._get_count(

2614

comments_header_renderer, 'countText', 'commentsCount')

2615

2616

if expected_comment_count:

2617

tracker['est_total'] = expected_comment_count

2618

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2619

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2620

2621

sort_menu_item = try_get(

2622

comments_header_renderer,

2623

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2624

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2625

2626

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2627

if not _continuation:

2628

continue

2629

2630

sort_text = str_or_none(sort_menu_item.get('title'))

2631

if not sort_text:

2632

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2633

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2638

if not parent:

2639

tracker['current_page_thread'] = 0

2640

for content in contents:

2641

if not parent and tracker['total_parent_comments'] >= max_parents:

2642

yield

2643

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2644

comment_renderer = get_first(

2645

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2646

expected_type=dict, default={})

2647

2648

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2653

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2654

yield comment

2655

2656

# Attempt to get the replies

2657

comment_replies_renderer = try_get(

2658

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2659

2660

if comment_replies_renderer:

2661

tracker['current_page_thread'] += 1

2662

comment_entries_iter = self._comment_entries(

2663

comment_replies_renderer, ytcfg, video_id,

2664

parent=comment.get('id'), tracker=tracker)

2665

for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):

2666

yield reply_comment

2667

2668

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2674

total_parent_comments=0,

2675

total_reply_comments=0)

2676

2677

# TODO: Deprecated

2678

# YouTube comments have a max depth of 2

2679

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2680

if max_depth:

2681

self._downloader.deprecation_warning(

2682

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2683

if max_depth == 1 and parent:

2684

return

2685

2686

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2687

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2688

2689

continuation = self._extract_continuation(root_continuation_data)

2690

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2691

if message and not parent:

2692

self.report_warning(message, video_id=video_id)

2693

2694

response = None

2695

is_first_continuation = parent is None

2696

2697

for page_num in itertools.count(0):

2698

if not continuation:

2699

break

2700

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2701

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2702

if page_num == 0:

2703

if is_first_continuation:

2704

note_prefix = 'Downloading comment section API JSON'

2705

else:

2706

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2707

tracker['current_page_thread'], comment_prog_str)

2708

else:

2709

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2710

' ' if parent else '', ' replies' if parent else '',

2711

page_num, comment_prog_str)

2712

2713

response = self._extract_response(

2714

item_id=None, query=continuation,

2715

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2716

check_get_keys='onResponseReceivedEndpoints')

2717

2718

continuation_contents = traverse_obj(

2719

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2720

2721

continuation = None

2722

for continuation_section in continuation_contents:

2723

continuation_items = traverse_obj(

2724

continuation_section,

2725

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2726

get_all=False, expected_type=list) or []

2727

if is_first_continuation:

2728

continuation = extract_header(continuation_items)

2729

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

def _get_comments(self, ytcfg, video_id, contents, webpage):

2743

"""Entry for comment extraction"""

2744

def _real_comment_extract(contents):

2745

renderer = next((

2746

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2747

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2748

yield from self._comment_entries(renderer, ytcfg, video_id)

2749

2750

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2751

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2752

2753

@staticmethod

2754

def _get_checkok_params():

2755

return {'contentCheckOk': True, 'racyCheckOk': True}

2756

2757

@classmethod

2758

def _generate_player_context(cls, sts=None):

2759

context = {

2760

'html5Preference': 'HTML5_PREF_WANTS',

2761

}

2762

if sts is not None:

2763

context['signatureTimestamp'] = sts

2764

return {

2765

'playbackContext': {

2766

'contentPlaybackContext': context

2767

},

2768

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2773

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2774

return True

2775

2776

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2777

AGE_GATE_REASONS = (

2778

'confirm your age', 'age-restricted', 'inappropriate', # reason

2779

'age_verification_required', 'age_check_required', # status

2780

)

2781

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2782

2783

@staticmethod

2784

def _is_unplayable(player_response):

2785

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2786

2787

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2788

2789

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2790

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2791

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2792

headers = self.generate_api_headers(

2793

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

2794

2795

yt_query = {'videoId': video_id}

2796

yt_query.update(self._generate_player_context(sts))

2797

return self._extract_response(

2798

item_id=video_id, ep='player', query=yt_query,

2799

ytcfg=player_ytcfg, headers=headers, fatal=True,

2800

default_client=client,

2801

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2802

) or None

2803

2804

def _get_requested_clients(self, url, smuggled_data):

2805

requested_clients = []

2806

default = ['android', 'web']

2807

allowed_clients = sorted(

2808

[client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],

2809

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

2810

for client in self._configuration_arg('player_client'):

2811

if client in allowed_clients:

2812

requested_clients.append(client)

2813

elif client == 'default':

2814

requested_clients.extend(default)

2815

elif client == 'all':

2816

requested_clients.extend(allowed_clients)

2817

else:

2818

self.report_warning(f'Skipping unsupported client {client}')

2819

if not requested_clients:

2820

requested_clients = default

2821

2822

if smuggled_data.get('is_music_url') or self.is_music_url(url):

2823

requested_clients.extend(

2824

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

2825

2826

return orderedSet(requested_clients)

2827

2828

def _extract_player_ytcfg(self, client, video_id):

2829

url = {

2830

'web_music': 'https://music.youtube.com',

2831

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())

2836

return self.extract_ytcfg(video_id, webpage) or {}

2837

2838

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

2839

initial_pr = None

2840

if webpage:

2841

initial_pr = self._extract_yt_initial_variable(

2842

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2843

video_id, 'initial player response')

2844

2845

original_clients = clients

2846

clients = clients[::-1]

2847

prs = []

2848

2849

def append_client(client_name):

2850

if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:

2851

clients.append(client_name)

2852

2853

# Android player_response does not have microFormats which are needed for

2854

# extraction of some data. So we return the initial_pr with formats

2855

# stripped out even if not requested by the user

2856

# See: https://github.com/yt-dlp/yt-dlp/issues/501

2857

if initial_pr:

2858

pr = dict(initial_pr)

2859

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

2864

player_url = None

2865

while clients:

2866

client = clients.pop()

2867

player_ytcfg = master_ytcfg if client == 'web' else {}

2868

if 'configs' not in self._configuration_arg('player_skip'):

2869

player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg

2870

2871

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

2872

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

2873

if 'js' in self._configuration_arg('player_skip'):

2874

require_js_player = False

2875

player_url = None

2876

2877

if not player_url and not tried_iframe_fallback and require_js_player:

2878

player_url = self._download_player_url(video_id)

2879

tried_iframe_fallback = True

2880

2881

try:

2882

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

2883

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

2884

except ExtractorError as e:

2885

if last_error:

2886

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

2894

if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:

2895

append_client(client.replace('_agegate', '_creator'))

2896

elif self._is_agegated(pr):

2897

append_client(f'{client}_agegate')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

2903

return prs, player_url

2904

2905

def _extract_formats(self, streaming_data, video_id, player_url, is_live):

2906

itags, stream_ids = {}, []

2907

itag_qualities, res_qualities = {}, {}

2908

q = qualities([

2909

# Normally tiny is the smallest video-only formats. But

2910

# audio-only formats with unknown quality may get tagged as tiny

2911

'tiny',

2912

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

2913

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

2914

])

2915

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

2916

2917

for fmt in streaming_formats:

2918

if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):

2919

continue

2920

2921

itag = str_or_none(fmt.get('itag'))

2922

audio_track = fmt.get('audioTrack') or {}

2923

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

2924

if stream_id in stream_ids:

2925

continue

2926

2927

quality = fmt.get('quality')

2928

height = int_or_none(fmt.get('height'))

2929

if quality == 'tiny' or not quality:

2930

quality = fmt.get('audioQuality', '').lower() or quality

2931

# The 3gp format (17) in android client has a quality of "small",

2932

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

2938

if height:

2939

res_qualities[height] = quality

2940

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

2941

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

2942

# number of fragment that would subsequently requested with (`&sq=N`)

2943

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

2944

continue

2945

2946

fmt_url = fmt.get('url')

2947

if not fmt_url:

2948

sc = compat_parse_qs(fmt.get('signatureCipher'))

2949

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

2950

encrypted_sig = try_get(sc, lambda x: x['s'][0])

2951

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

2956

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

2957

fmt_url += '&' + sp + '=' + signature

2958

2959

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

2964

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

2965

except ExtractorError as e:

2966

self.report_warning(

2967

f'nsig extraction failed: You may experience throttling for some formats\n'

2968

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

2973

stream_ids.append(stream_id)

2974

2975

tbr = float_or_none(

2976

fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

2977

dct = {

2978

'asr': int_or_none(fmt.get('audioSampleRate')),

2979

'filesize': int_or_none(fmt.get('contentLength')),

2980

'format_id': itag,

2981

'format_note': join_nonempty(

2982

'%s%s' % (audio_track.get('displayName') or '',

2983

' (default)' if audio_track.get('audioIsDefault') else ''),

2984

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

2985

throttled and 'THROTTLED', delim=', '),

2986

'source_preference': -10 if throttled else -1,

2987

'fps': int_or_none(fmt.get('fps')) or None,

2988

'height': height,

2989

'quality': q(quality),

2990

'tbr': tbr,

2991

'url': fmt_url,

2992

'width': int_or_none(fmt.get('width')),

2993

'language': audio_track.get('id', '').split('.')[0],

2994

'language_preference': 1 if audio_track.get('audioIsDefault') else -1,

2995

}

2996

mime_mobj = re.match(

2997

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

2998

if mime_mobj:

2999

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3000

dct.update(parse_codecs(mime_mobj.group(2)))

3001

no_audio = dct.get('acodec') == 'none'

3002

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3008

dct['downloader_options'] = {

3009

# Youtube throttles chunks >~10M

3010

'http_chunk_size': 10485760,

3011

}

3012

if dct.get('ext'):

3013

dct['container'] = dct['ext'] + '_dash'

3014

yield dct

3015

3016

live_from_start = is_live and self.get_param('live_from_start')

3017

skip_manifests = self._configuration_arg('skip')

3018

if not self.get_param('youtube_include_hls_manifest', True):

3019

skip_manifests.append('hls')

3020

get_dash = 'dash' not in skip_manifests and (

3021

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3022

get_hls = not live_from_start and 'hls' not in skip_manifests

3023

3024

def process_manifest_format(f, proto, itag):

3025

if itag in itags:

3026

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3027

return False

3028

itag = f'{itag}-{proto}'

3029

if itag:

3030

f['format_id'] = itag

3031

itags[itag] = proto

3032

3033

f['quality'] = next((

3034

q(qdict[val])

3035

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3036

if val in qdict), -1)

3037

return True

3038

3039

for sd in streaming_data:

3040

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3041

if hls_manifest_url:

3042

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3043

if process_manifest_format(f, 'hls', self._search_regex(

3044

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3045

yield f

3046

3047

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3048

if dash_manifest_url:

3049

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3050

if process_manifest_format(f, 'dash', f['format_id']):

3051

f['filesize'] = int_or_none(self._search_regex(

3052

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3053

if live_from_start:

3054

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3059

spec = get_first(

3060

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3061

if not spec:

3062

return

3063

base_url = spec.pop()

3064

L = len(spec) - 1

3065

for i, args in enumerate(spec):

3066

args = args.split('#')

3067

counts = list(map(int_or_none, args[:5]))

3068

if len(args) != 8 or not all(counts):

3069

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3070

continue

3071

width, height, frame_count, cols, rows = counts

3072

N, sigh = args[6:]

3073

3074

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3075

fragment_count = frame_count / (cols * rows)

3076

fragment_duration = duration / fragment_count

3077

yield {

3078

'format_id': f'sb{i}',

3079

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'path': url.replace('$M', str(j)),

3089

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3090

} for j in range(math.ceil(fragment_count))],

3091

}

3092

3093

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3094

webpage = None

3095

if 'webpage' not in self._configuration_arg('player_skip'):

3096

webpage = self._download_webpage(

3097

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

3098

3099

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3100

3101

player_responses, player_url = self._extract_player_responses(

3102

self._get_requested_clients(url, smuggled_data),

3103

video_id, webpage, master_ytcfg)

3104

3105

return webpage, master_ytcfg, player_responses, player_url

3106

3107

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):

3108

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3109

is_live = get_first(video_details, 'isLive')

3110

if is_live is None:

3111

is_live = get_first(live_broadcast_details, 'isLiveNow')

3112

3113

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3114

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))

3115

3116

return live_broadcast_details, is_live, streaming_data, formats

3117

3118

def _real_extract(self, url):

3119

url, smuggled_data = unsmuggle_url(url, {})

3120

video_id = self._match_id(url)

3121

3122

base_url = self.http_scheme() + '//www.youtube.com/'

3123

webpage_url = base_url + 'watch?v=' + video_id

3124

3125

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3126

3127

playability_statuses = traverse_obj(

3128

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3129

3130

trailer_video_id = get_first(

3131

playability_statuses,

3132

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3133

expected_type=str)

3134

if trailer_video_id:

3135

return self.url_result(

3136

trailer_video_id, self.ie_key(), trailer_video_id)

3137

3138

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3139

if webpage else (lambda x: None))

3140

3141

video_details = traverse_obj(

3142

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3143

microformats = traverse_obj(

3144

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3145

expected_type=dict, default=[])

3146

video_title = (

3147

get_first(video_details, 'title')

3148

or self._get_text(microformats, (..., 'title'))

3149

or search_meta(['og:title', 'twitter:title', 'title']))

3150

video_description = get_first(video_details, 'shortDescription')

3151

3152

multifeed_metadata_list = get_first(

3153

player_responses,

3154

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3155

expected_type=str)

3156

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3157

if self.get_param('noplaylist'):

3158

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3163

# Unquote should take place before split on comma (,) since textual

3164

# fields may contain comma as well (see

3165

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3166

feed_data = compat_parse_qs(

3167

compat_urllib_parse_unquote_plus(feed))

3168

3169

def feed_entry(name):

3170

return try_get(

3171

feed_data, lambda x: x[name][0], compat_str)

3172

3173

feed_id = feed_entry('id')

3174

if not feed_id:

3175

continue

3176

feed_title = feed_entry('title')

3177

title = video_title

3178

if feed_title:

3179

title += ' (%s)' % feed_title

3180

entries.append({

3181

'_type': 'url_transparent',

3182

'ie_key': 'Youtube',

3183

'url': smuggle_url(

3184

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3185

{'force_singlefeed': True}),

3186

'title': title,

3187

})

3188

feed_ids.append(feed_id)

3189

self.to_screen(

3190

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3191

% (', '.join(feed_ids), video_id))

3192

return self.playlist_result(

3193

entries, video_id, video_title, video_description)

3194

3195

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3196

3197

if not formats:

3198

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3199

self.report_drm(video_id)

3200

pemr = get_first(

3201

playability_statuses,

3202

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3203

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3204

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3205

if subreason:

3206

if subreason == 'The uploader has not made this video available in your country.':

3207

countries = get_first(microformats, 'availableCountries')

3208

if not countries:

3209

regions_allowed = search_meta('regionsAllowed')

3210

countries = regions_allowed.split(',') if regions_allowed else None

3211

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3212

reason += f'. {subreason}'

3213

if reason:

3214

self.raise_no_formats(reason, expected=True)

3215

3216

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3217

if not keywords and webpage:

3218

keywords = [

3219

unescapeHTML(m.group('content'))

3220

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3221

for keyword in keywords:

3222

if keyword.startswith('yt:stretch='):

3223

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3224

if mobj:

3225

# NB: float is intentional for forcing float division

3226

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3231

f['stretched_ratio'] = ratio

3232

break

3233

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3234

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3235

if thumbnail_url:

3236

thumbnails.append({

3237

'url': thumbnail_url,

3238

})

3239

original_thumbnails = thumbnails.copy()

3240

3241

# The best resolution thumbnails sometimes does not appear in the webpage

3242

# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340

3243

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3244

thumbnail_names = [

3245

'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',

3246

'hqdefault', 'hq1', 'hq2', 'hq3', '0',

3247

'mqdefault', 'mq1', 'mq2', 'mq3',

3248

'default', '1', '2', '3'

3249

]

3250

n_thumbnail_names = len(thumbnail_names)

3251

thumbnails.extend({

3252

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3253

video_id=video_id, name=name, ext=ext,

3254

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3255

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3256

for thumb in thumbnails:

3257

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3258

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3259

self._remove_duplicate_formats(thumbnails)

3260

self._downloader._sort_thumbnails(original_thumbnails)

3261

3262

category = get_first(microformats, 'category') or search_meta('genre')

3263

channel_id = str_or_none(

3264

get_first(video_details, 'channelId')

3265

or get_first(microformats, 'externalChannelId')

3266

or search_meta('channelId'))

3267

duration = int_or_none(

3268

get_first(video_details, 'lengthSeconds')

3269

or get_first(microformats, 'lengthSeconds')

3270

or parse_duration(search_meta('duration'))) or None

3271

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3272

3273

live_content = get_first(video_details, 'isLiveContent')

3274

is_upcoming = get_first(video_details, 'isUpcoming')

3275

if is_live is None:

3276

if is_upcoming or live_content is False:

3277

is_live = False

3278

if is_upcoming is None and (live_content or is_live):

3279

is_upcoming = False

3280

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3281

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3282

if not duration and live_end_time and live_start_time:

3283

duration = live_end_time - live_start_time

3284

3285

if is_live and self.get_param('live_from_start'):

3286

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3287

3288

formats.extend(self._extract_storyboard(player_responses, duration))

3289

3290

# Source is given priority since formats that throttle are given lower source_preference

3291

# When throttling issue is fully fixed, remove this

3292

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3297

'formats': formats,

3298

'thumbnails': thumbnails,

3299

# The best thumbnail that we are sure exists. Prevents unnecessary

3300

# URL checking if user don't care about getting the best possible thumbnail

3301

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3302

'description': video_description,

3303

'upload_date': unified_strdate(

3304

get_first(microformats, 'uploadDate')

3305

or search_meta('uploadDate')),

3306

'uploader': get_first(video_details, 'author'),

3307

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3308

'uploader_url': owner_profile_url,

3309

'channel_id': channel_id,

3310

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

3311

'duration': duration,

3312

'view_count': int_or_none(

3313

get_first((video_details, microformats), (..., 'viewCount'))

3314

or search_meta('interactionCount')),

3315

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3316

'age_limit': 18 if (

3317

get_first(microformats, 'isFamilySafe') is False

3318

or search_meta('isFamilyFriendly') == 'false'

3319

or search_meta('og:restrictions:age') == '18+') else 0,

3320

'webpage_url': webpage_url,

3321

'categories': [category] if category else None,

3322

'tags': keywords,

3323

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3324

'is_live': is_live,

3325

'was_live': (False if is_live or is_upcoming or live_content is False

3326

else None if is_live is None or is_upcoming is None

3327

else live_content),

3328

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3329

'release_timestamp': live_start_time,

3330

}

3331

3332

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3333

if pctr:

3334

def get_lang_code(track):

3335

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3336

or track.get('languageCode'))

3337

3338

# Converted into dicts to remove duplicates

3339

captions = {

3340

get_lang_code(sub): sub

3341

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3342

translation_languages = {

3343

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3344

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3345

3346

def process_language(container, base_url, lang_code, sub_name, query):

3347

lang_subs = container.setdefault(lang_code, [])

3348

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': update_url_query(base_url, query),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3359

for lang_code, caption_track in captions.items():

3360

base_url = caption_track.get('baseUrl')

3361

if not base_url:

3362

continue

3363

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3364

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3369

if not caption_track.get('isTranslatable'):

3370

continue

3371

for trans_code, trans_name in translation_languages.items():

3372

if not trans_code:

3373

continue

3374

if caption_track.get('kind') != 'asr':

3375

trans_code += f'-{lang_code}'

3376

trans_name += format_field(lang_name, template=' from %s')

3377

process_language(

3378

automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})

3379

info['automatic_captions'] = automatic_captions

3380

info['subtitles'] = subtitles

3381

3382

parsed_url = compat_urllib_parse_urlparse(url)

3383

for component in [parsed_url.fragment, parsed_url.query]:

3384

query = compat_parse_qs(component)

3385

for k, v in query.items():

3386

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3387

d_k += '_time'

3388

if d_k not in info and k in s_ks:

3389

info[d_k] = parse_duration(query[k][0])

3390

3391

# Youtube Music Auto-generated description

3392

if video_description:

3393

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3394

if mobj:

3395

release_year = mobj.group('release_year')

3396

release_date = mobj.group('release_date')

3397

if release_date:

3398

release_date = release_date.replace('-', '')

3399

if not release_year:

3400

release_year = release_date[:4]

3401

info.update({

3402

'album': mobj.group('album'.strip()),

3403

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3404

'track': mobj.group('track').strip(),

3405

'release_date': release_date,

3406

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3412

webpage, self._YT_INITIAL_DATA_RE, video_id,

3413

'yt initial data')

3414

if not initial_data:

3415

query = {'videoId': video_id}

3416

query.update(self._get_checkok_params())

3417

initial_data = self._extract_response(

3418

item_id=video_id, ep='next', fatal=False,

3419

ytcfg=master_ytcfg, query=query,

3420

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3421

note='Downloading initial data API JSON')

3422

3423

try:

3424

# This will error if there is no livechat

3425

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3426

info.setdefault('subtitles', {})['live_chat'] = [{

3427

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

3428

'video_id': video_id,

3429

'ext': 'json',

3430

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

3431

}]

3432

except (KeyError, IndexError, TypeError):

pass

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3438

or self._extract_chapters_from_engagement_panel(initial_data, duration)

or None)

contents = try_get(

initial_data,

lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],

3444

list) or []

3445

for content in contents:

3446

vpir = content.get('videoPrimaryInfoRenderer')

3447

if vpir:

3448

stl = vpir.get('superTitleLink')

3449

if stl:

3450

stl = self._get_text(stl)

3451

if try_get(

3452

vpir,

3453

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3454

info['location'] = stl

3455

else:

3456

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

3457

if mobj:

3458

info.update({

3459

'series': mobj.group(1),

3460

'season_number': int(mobj.group(2)),

3461

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3466

list) or []):

3467

tbr = tlb.get('toggleButtonRenderer') or {}

3468

for getter, regex in [(

3469

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3470

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3471

lambda x: x['accessibility'],

3472

lambda x: x['accessibilityData']['accessibilityData'],

3473

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3474

label = (try_get(tbr, getter, dict) or {}).get('label')

3475

if label:

3476

mobj = re.match(regex, label)

3477

if mobj:

3478

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3479

break

3480

sbr_tooltip = try_get(

3481

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3482

if sbr_tooltip:

3483

like_count, dislike_count = sbr_tooltip.split(' / ')

3484

info.update({

3485

'like_count': str_to_int(like_count),

3486

'dislike_count': str_to_int(dislike_count),

3487

})

3488

vsir = content.get('videoSecondaryInfoRenderer')

3489

if vsir:

3490

info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))

3491

rows = try_get(

3492

vsir,

3493

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3494

list) or []

3495

multiple_songs = False

3496

for row in rows:

3497

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3498

multiple_songs = True

3499

break

3500

for row in rows:

3501

mrr = row.get('metadataRowRenderer') or {}

3502

mrr_title = mrr.get('title')

3503

if not mrr_title:

3504

continue

3505

mrr_title = self._get_text(mrr, 'title')

3506

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3507

if mrr_title == 'License':

3508

info['license'] = mrr_contents_text

3509

elif not multiple_songs:

3510

if mrr_title == 'Album':

3511

info['album'] = mrr_contents_text

3512

elif mrr_title == 'Artist':

3513

info['artist'] = mrr_contents_text

3514

elif mrr_title == 'Song':

3515

info['track'] = mrr_contents_text

3516

3517

fallbacks = {

3518

'channel': 'uploader',

3519

'channel_id': 'uploader_id',

3520

'channel_url': 'uploader_url',

3521

}

3522

for to, frm in fallbacks.items():

3523

if not info.get(to):

3524

info[to] = info.get(frm)

3525

3526

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3532

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3533

is_membersonly = None

3534

is_premium = None

3535

if initial_data and is_private is not None:

3536

is_membersonly = False

3537

is_premium = False

3538

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3539

badge_labels = set()

3540

for content in contents:

3541

if not isinstance(content, dict):

3542

continue

3543

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3544

for badge_label in badge_labels:

3545

if badge_label.lower() == 'members only':

3546

is_membersonly = True

3547

elif badge_label.lower() == 'premium':

3548

is_premium = True

3549

elif badge_label.lower() == 'unlisted':

3550

is_unlisted = True

3551

3552

info['availability'] = self._availability(

3553

is_private=is_private,

3554

needs_premium=is_premium,

3555

needs_subscription=is_membersonly,

3556

needs_auth=info['age_limit'] >= 18,

3557

is_unlisted=None if is_private is None else is_unlisted)

3558

3559

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3560

3561

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3567

3568

def _extract_channel_id(self, webpage):

3569

channel_id = self._html_search_meta(

3570

'channelId', webpage, 'channel id', default=None)

3571

if channel_id:

3572

return channel_id

3573

channel_url = self._html_search_meta(

3574

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3575

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3576

'twitter:app:url:googleplay'), webpage, 'channel url')

3577

return self._search_regex(

3578

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3579

channel_url, 'channel id')

3580

3581

@staticmethod

3582

def _extract_basic_item_renderer(item):

3583

# Modified from _extract_grid_item_renderer

3584

known_basic_renderers = (

3585

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'

3586

)

3587

for key, renderer in item.items():

3588

if not isinstance(renderer, dict):

3589

continue

3590

elif key in known_basic_renderers:

3591

return renderer

3592

elif key.startswith('grid') and key.endswith('Renderer'):

3593

return renderer

3594

3595

def _grid_entries(self, grid_renderer):

3596

for item in grid_renderer['items']:

3597

if not isinstance(item, dict):

3598

continue

3599

renderer = self._extract_basic_item_renderer(item)

3600

if not isinstance(renderer, dict):

3601

continue

3602

title = self._get_text(renderer, 'title')

3603

3604

# playlist

3605

playlist_id = renderer.get('playlistId')

3606

if playlist_id:

3607

yield self.url_result(

3608

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3609

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3614

if video_id:

3615

yield self._extract_video(renderer)

3616

continue

3617

# channel

3618

channel_id = renderer.get('channelId')

3619

if channel_id:

3620

yield self.url_result(

3621

'https://www.youtube.com/channel/%s' % channel_id,

3622

ie=YoutubeTabIE.ie_key(), video_title=title)

3623

continue

3624

# generic endpoint URL support

3625

ep_url = urljoin('https://www.youtube.com/', try_get(

3626

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3627

compat_str))

3628

if ep_url:

3629

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3630

if ie.suitable(ep_url):

3631

yield self.url_result(

3632

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3633

break

3634

3635

def _shelf_entries_from_content(self, shelf_renderer):

3636

content = shelf_renderer.get('content')

3637

if not isinstance(content, dict):

3638

return

3639

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3640

if renderer:

3641

# TODO: add support for nested playlists so each shelf is processed

3642

# as separate playlist

3643

# TODO: this includes only first N items

3644

for entry in self._grid_entries(renderer):

3645

yield entry

3646

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3652

ep = try_get(

3653

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3654

compat_str)

3655

shelf_url = urljoin('https://www.youtube.com', ep)

3656

if shelf_url:

3657

# Skipping links to another channels, note that checking for

3658

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3659

# will not work

3660

if skip_channels and '/channels?' in shelf_url:

3661

return

3662

title = self._get_text(shelf_renderer, 'title')

3663

yield self.url_result(shelf_url, video_title=title)

3664

# Shelf may not contain shelf URL, fallback to extraction from content

3665

for entry in self._shelf_entries_from_content(shelf_renderer):

3666

yield entry

3667

3668

def _playlist_entries(self, video_list_renderer):

3669

for content in video_list_renderer['contents']:

3670

if not isinstance(content, dict):

3671

continue

3672

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3673

if not isinstance(renderer, dict):

3674

continue

3675

video_id = renderer.get('videoId')

3676

if not video_id:

3677

continue

3678

yield self._extract_video(renderer)

3679

3680

def _rich_entries(self, rich_grid_renderer):

3681

renderer = try_get(

3682

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3683

video_id = renderer.get('videoId')

3684

if not video_id:

3685

return

3686

yield self._extract_video(renderer)

3687

3688

def _video_entry(self, video_renderer):

3689

video_id = video_renderer.get('videoId')

3690

if video_id:

3691

return self._extract_video(video_renderer)

3692

3693

def _post_thread_entries(self, post_thread_renderer):

3694

post_renderer = try_get(

3695

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3696

if not post_renderer:

3697

return

3698

# video attachment

3699

video_renderer = try_get(

3700

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3701

video_id = video_renderer.get('videoId')

3702

if video_id:

3703

entry = self._extract_video(video_renderer)

3704

if entry:

3705

yield entry

3706

# playlist attachment

3707

playlist_id = try_get(

3708

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3709

if playlist_id:

3710

yield self.url_result(

3711

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3712

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3713

# inline video links

3714

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3715

for run in runs:

3716

if not isinstance(run, dict):

3717

continue

3718

ep_url = try_get(

3719

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3720

if not ep_url:

3721

continue

3722

if not YoutubeIE.suitable(ep_url):

3723

continue

3724

ep_video_id = YoutubeIE._match_id(ep_url)

3725

if video_id == ep_video_id:

3726

continue

3727

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3728

3729

def _post_thread_continuation_entries(self, post_thread_continuation):

3730

contents = post_thread_continuation.get('contents')

3731

if not isinstance(contents, list):

3732

return

3733

for content in contents:

3734

renderer = content.get('backstagePostThreadRenderer')

3735

if not isinstance(renderer, dict):

3736

continue

3737

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3742

for content in contents:

3743

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3744

if video_renderer:

3745

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

3750

# continuation_list is modified in-place with continuation_list = [continuation_token]

3751

continuation_list[:] = [None]

3752

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3753

for content in contents:

3754

if not isinstance(content, dict):

3755

continue

3756

is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)

3757

if not is_renderer:

3758

renderer = content.get('richItemRenderer')

3759

if renderer:

3760

for entry in self._rich_entries(renderer):

3761

yield entry

3762

continuation_list[0] = self._extract_continuation(parent_renderer)

3763

continue

3764

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3765

for isr_content in isr_contents:

3766

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3771

'gridRenderer': self._grid_entries,

3772

'shelfRenderer': lambda x: self._shelf_entries(x),

3773

'backstagePostThreadRenderer': self._post_thread_entries,

3774

'videoRenderer': lambda x: [self._video_entry(x)],

3775

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

3776

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

3777

}

3778

for key, renderer in isr_content.items():

3779

if key not in known_renderers:

3780

continue

3781

for entry in known_renderers[key](renderer):

3782

if entry:

3783

yield entry

3784

continuation_list[0] = self._extract_continuation(renderer)

3785

break

3786

3787

if not continuation_list[0]:

3788

continuation_list[0] = self._extract_continuation(is_renderer)

3789

3790

if not continuation_list[0]:

3791

continuation_list[0] = self._extract_continuation(parent_renderer)

3792

3793

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

3794

continuation_list = [None]

3795

extract_entries = lambda x: self._extract_entries(x, continuation_list)

3796

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3801

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3802

for entry in extract_entries(parent_renderer):

3803

yield entry

3804

continuation = continuation_list[0]

3805

3806

for page_num in itertools.count(1):

3807

if not continuation:

3808

break

3809

headers = self.generate_api_headers(

3810

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

3811

response = self._extract_response(

3812

item_id='%s page %s' % (item_id, page_num),

3813

query=continuation, headers=headers, ytcfg=ytcfg,

3814

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

3819

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

3820

visitor_data = self._extract_visitor_data(response) or visitor_data

3821

3822

known_continuation_renderers = {

3823

'playlistVideoListContinuation': self._playlist_entries,

3824

'gridContinuation': self._grid_entries,

3825

'itemSectionContinuation': self._post_thread_continuation_entries,

3826

'sectionListContinuation': extract_entries, # for feeds

3827

}

3828

continuation_contents = try_get(

3829

response, lambda x: x['continuationContents'], dict) or {}

3830

continuation_renderer = None

3831

for key, value in continuation_contents.items():

3832

if key not in known_continuation_renderers:

3833

continue

3834

continuation_renderer = value

3835

continuation_list = [None]

3836

for entry in known_continuation_renderers[key](continuation_renderer):

3837

yield entry

3838

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

3839

break

3840

if continuation_renderer:

continue

known_renderers = {

'gridPlaylistRenderer': (self._grid_entries, 'items'),

3845

'gridVideoRenderer': (self._grid_entries, 'items'),

3846

'gridChannelRenderer': (self._grid_entries, 'items'),

3847

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

3848

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

3849

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

3850

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

3851

}

3852

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

3853

continuation_items = try_get(

3854

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

3855

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

3856

video_items_renderer = None

3857

for key, value in continuation_item.items():

3858

if key not in known_renderers:

3859

continue

3860

video_items_renderer = {known_renderers[key][1]: continuation_items}

3861

continuation_list = [None]

3862

for entry in known_renderers[key][0](video_items_renderer):

3863

yield entry

3864

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

3865

break

3866

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3872

for tab in tabs:

3873

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

3874

if renderer.get('selected') is True:

3875

return renderer

3876

else:

3877

raise ExtractorError('Unable to find selected tab')

3878

3879

@classmethod

3880

def _extract_uploader(cls, data):

3881

uploader = {}

3882

renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

3883

owner = try_get(

3884

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3885

if owner:

3886

uploader['uploader'] = owner.get('text')

3887

uploader['uploader_id'] = try_get(

3888

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3889

uploader['uploader_url'] = urljoin(

3890

'https://www.youtube.com/',

3891

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3892

return {k: v for k, v in uploader.items() if v is not None}

3893

3894

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

3895

playlist_id = title = description = channel_url = channel_name = channel_id = None

3896

tags = []

3897

3898

selected_tab = self._extract_selected_tab(tabs)

3899

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

3900

renderer = try_get(

3901

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3902

if renderer:

3903

channel_name = renderer.get('title')

3904

channel_url = renderer.get('channelUrl')

3905

channel_id = renderer.get('externalId')

3906

else:

3907

renderer = try_get(

3908

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3909

3910

if renderer:

3911

title = renderer.get('title')

3912

description = renderer.get('description', '')

3913

playlist_id = channel_id

3914

tags = renderer.get('keywords', '').split()

3915

3916

thumbnails = (

3917

self._extract_thumbnails(renderer, 'avatar')

3918

or self._extract_thumbnails(

3919

primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))

3920

3921

if playlist_id is None:

3922

playlist_id = item_id

3923

3924

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

3925

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

3926

if title is None:

3927

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

3928

title += format_field(selected_tab, 'title', ' - %s')

3929

title += format_field(selected_tab, 'expandedText', ' - %s')

3930

3931

metadata = {

3932

'playlist_id': playlist_id,

3933

'playlist_title': title,

3934

'playlist_description': description,

3935

'uploader': channel_name,

3936

'uploader_id': channel_id,

3937

'uploader_url': channel_url,

3938

'thumbnails': thumbnails,

3939

'tags': tags,

3940

'view_count': self._get_count(playlist_stats, 1),

3941

'availability': self._extract_availability(data),

3942

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

3943

'playlist_count': self._get_count(playlist_stats, 0)

3944

}

3945

if not channel_id:

3946

metadata.update(self._extract_uploader(data))

3947

metadata.update({

3948

'channel': metadata['uploader'],

3949

'channel_id': metadata['uploader_id'],

3950

'channel_url': metadata['uploader_url']})

3951

return self.playlist_result(

3952

self._entries(

3953

selected_tab, playlist_id, ytcfg,

3954

self._extract_account_syncid(ytcfg, data),

3955

self._extract_visitor_data(data, ytcfg)),

3956

**metadata)

3957

3958

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):

3959

first_id = last_id = response = None

3960

for page_num in itertools.count(1):

3961

videos = list(self._playlist_entries(playlist))

3962

if not videos:

3963

return

3964

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

3965

if start >= len(videos):

3966

return

3967

for video in videos[start:]:

3968

if video['id'] == first_id:

3969

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

3970

return

3971

yield video

3972

first_id = first_id or videos[0]['id']

3973

last_id = videos[-1]['id']

3974

watch_endpoint = try_get(

3975

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

3976

headers = self.generate_api_headers(

3977

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

3978

visitor_data=self._extract_visitor_data(response, data, ytcfg))

3979

query = {

3980

'playlistId': playlist_id,

3981

'videoId': watch_endpoint.get('videoId') or last_id,

3982

'index': watch_endpoint.get('index') or len(videos),

3983

'params': watch_endpoint.get('params') or 'OAE%3D'

3984

}

3985

response = self._extract_response(

3986

item_id='%s page %d' % (playlist_id, page_num),

3987

query=query, ep='next', headers=headers, ytcfg=ytcfg,

3988

check_get_keys='contents'

3989

)

3990

playlist = try_get(

3991

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

3992

3993

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

3994

title = playlist.get('title') or try_get(

3995

data, lambda x: x['titleText']['simpleText'], compat_str)

3996

playlist_id = playlist.get('playlistId') or item_id

3997

3998

# Delegating everything except mix playlists to regular tab-based playlist URL

3999

playlist_url = urljoin(url, try_get(

4000

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4001

compat_str))

4002

if playlist_url and playlist_url != url:

4003

return self.url_result(

4004

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4005

video_title=title)

4006

4007

return self.playlist_result(

4008

self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),

4009

playlist_id=playlist_id, playlist_title=title)

4010

4011

def _extract_availability(self, data):

4012

"""

4013

Gets the availability of a given playlist/tab.

4014

Note: Unless YouTube tells us explicitly, we do not assume it is public

4015

@param data: response

4016

"""

4017

is_private = is_unlisted = None

4018

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4019

badge_labels = self._extract_badges(renderer)

4020

4021

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4022

privacy_dropdown_entries = try_get(

4023

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4024

for renderer_dict in privacy_dropdown_entries:

4025

is_selected = try_get(

4026

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4027

if not is_selected:

4028

continue

4029

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4030

if label:

4031

badge_labels.add(label.lower())

4032

break

4033

4034

for badge_label in badge_labels:

4035

if badge_label == 'unlisted':

4036

is_unlisted = True

4037

elif badge_label == 'private':

4038

is_private = True

4039

elif badge_label == 'public':

4040

is_unlisted = is_private = False

4041

return self._availability(is_private, False, False, False, is_unlisted)

4042

4043

@staticmethod

4044

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4045

sidebar_renderer = try_get(

4046

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4047

for item in sidebar_renderer:

4048

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4053

"""

4054

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4055

"""

4056

browse_id = params = None

4057

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4058

if not renderer:

4059

return

4060

menu_renderer = try_get(

4061

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4062

for menu_item in menu_renderer:

4063

if not isinstance(menu_item, dict):

4064

continue

4065

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4066

text = try_get(

4067

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4068

if not text or text.lower() != 'show unavailable videos':

4069

continue

4070

browse_endpoint = try_get(

4071

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4072

browse_id = browse_endpoint.get('browseId')

4073

params = browse_endpoint.get('params')

4074

break

4075

4076

headers = self.generate_api_headers(

4077

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4078

visitor_data=self._extract_visitor_data(data, ytcfg))

4079

query = {

4080

'params': params or 'wgYCCAA=',

4081

'browseId': browse_id or 'VL%s' % item_id

4082

}

4083

return self._extract_response(

4084

item_id=item_id, headers=headers, query=query,

4085

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4086

note='Downloading API JSON with unavailable videos')

4087

4088

def _extract_webpage(self, url, item_id, fatal=True):

4089

retries = self.get_param('extractor_retries', 3)

4090

count = -1

4091

webpage = data = last_error = None

4092

while count < retries:

4093

count += 1

4094

# Sometimes youtube returns a webpage with incomplete ytInitialData

4095

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4096

if last_error:

4097

self.report_warning('%s. Retrying ...' % last_error)

4098

try:

4099

webpage = self._download_webpage(

4100

url, item_id,

4101

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4102

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4103

except ExtractorError as e:

4104

if isinstance(e.cause, network_exceptions):

4105

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4106

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4116

except ExtractorError as e:

4117

if fatal:

4118

raise

4119

self.report_warning(error_to_compat_str(e))

4120

break

4121

4122

if dict_get(data, ('contents', 'currentVideoEndpoint')):

4123

break

4124

4125

last_error = 'Incomplete yt initial data received'

4126

if count >= retries:

4127

if fatal:

4128

raise ExtractorError(last_error)

4129

self.report_warning(last_error)

break

return webpage, data

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4135

data = None

4136

if 'webpage' not in self._configuration_arg('skip'):

4137

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4138

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4139

if not data:

4140

if not ytcfg and self.is_authenticated:

4141

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

4142

if 'authcheck' not in self._configuration_arg('skip') and fatal:

4143

raise ExtractorError(

4144

msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'

4145

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4146

expected=True)

4147

self.report_warning(msg, only_once=True)

4148

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4149

return data, ytcfg

4150

4151

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4152

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4153

resolve_response = self._extract_response(

4154

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4155

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4156

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4157

for ep_key, ep in endpoints.items():

4158

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4159

if params:

4160

return self._extract_response(

4161

item_id=item_id, query=params, ep=ep, headers=headers,

4162

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4163

check_get_keys=('contents', 'currentVideoEndpoint'))

4164

err_note = 'Failed to resolve url (does the playlist exist?)'

4165

if fatal:

4166

raise ExtractorError(err_note, expected=True)

4167

self.report_warning(err_note, item_id)

4168

4169

@staticmethod

4170

def _smuggle_data(entries, data):

4171

for entry in entries:

4172

if data:

4173

entry['url'] = smuggle_url(entry['url'], data)

4174

yield entry

4175

4176

_SEARCH_PARAMS = None

4177

4178

def _search_results(self, query, params=NO_DEFAULT):

4179

data = {'query': query}

4180

if params is NO_DEFAULT:

4181

params = self._SEARCH_PARAMS

4182

if params:

4183

data['params'] = params

4184

continuation_list = [None]

4185

for page_num in itertools.count(1):

4186

data.update(continuation_list[0] or {})

4187

search = self._extract_response(

4188

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4189

check_get_keys=('contents', 'onResponseReceivedCommands'))

4190

slr_contents = try_get(

4191

search,

4192

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

4193

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

4194

list)

4195

yield from self._extract_entries({'contents': slr_contents}, continuation_list)

4196

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4201

IE_DESC = 'YouTube Tabs'

4202

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4211

(?P<not_channel>

4212

feed/|hashtag/|

4213

(?:playlist|watch)\?.*?\blist=

4214

)|

4215

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4220

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4221

}

4222

IE_NAME = 'youtube:tab'

4223

4224

_TESTS = [{

4225

'note': 'playlists, multipage',

4226

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4227

'playlist_mincount': 94,

4228

'info_dict': {

4229

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4230

'title': 'Igor Kleiner - Playlists',

4231

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4232

'uploader': 'Igor Kleiner',

4233

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4234

'channel': 'Igor Kleiner',

4235

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4236

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4237

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4238

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4239

},

4240

}, {

4241

'note': 'playlists, multipage, different order',

4242

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4243

'playlist_mincount': 94,

4244

'info_dict': {

4245

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4246

'title': 'Igor Kleiner - Playlists',

4247

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4248

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4249

'uploader': 'Igor Kleiner',

4250

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4251

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4252

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4253

'channel': 'Igor Kleiner',

4254

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4255

},

4256

}, {

4257

'note': 'playlists, series',

4258

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4259

'playlist_mincount': 5,

4260

'info_dict': {

4261

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4262

'title': '3Blue1Brown - Playlists',

4263

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4264

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4265

'uploader': '3Blue1Brown',

4266

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4267

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4268

'channel': '3Blue1Brown',

4269

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4270

'tags': ['Mathematics'],

4271

},

4272

}, {

4273

'note': 'playlists, singlepage',

4274

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4275

'playlist_mincount': 4,

4276

'info_dict': {

4277

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4278

'title': 'ThirstForScience - Playlists',

4279

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4280

'uploader': 'ThirstForScience',

4281

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4282

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4283

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4284

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4285

'tags': 'count:13',

4286

'channel': 'ThirstForScience',

4287

}

4288

}, {

4289

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4290

'only_matching': True,

4291

}, {

4292

'note': 'basic, single video playlist',

4293

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4294

'info_dict': {

4295

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4296

'uploader': 'Sergey M.',

4297

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4298

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4303

'channel': 'Sergey M.',

4304

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4305

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4306

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4311

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4312

'info_dict': {

4313

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4314

'uploader': 'Sergey M.',

4315

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4316

'title': 'youtube-dl empty playlist',

4317

'tags': [],

4318

'channel': 'Sergey M.',

4319

'description': '',

4320

'modified_date': '20160902',

4321

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4322

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4323

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4329

'info_dict': {

4330

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4331

'title': 'lex will - Home',

4332

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4333

'uploader': 'lex will',

4334

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4335

'channel': 'lex will',

4336

'tags': ['bible', 'history', 'prophesy'],

4337

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4338

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4339

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4340

},

4341

'playlist_mincount': 2,

4342

}, {

4343

'note': 'Videos tab',

4344

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4345

'info_dict': {

4346

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4347

'title': 'lex will - Videos',

4348

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4349

'uploader': 'lex will',

4350

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4351

'tags': ['bible', 'history', 'prophesy'],

4352

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4353

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4354

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4355

'channel': 'lex will',

4356

},

4357

'playlist_mincount': 975,

4358

}, {

4359

'note': 'Videos tab, sorted by popular',

4360

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4361

'info_dict': {

4362

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4363

'title': 'lex will - Videos',

4364

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4365

'uploader': 'lex will',

4366

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4367

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4368

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4369

'channel': 'lex will',

4370

'tags': ['bible', 'history', 'prophesy'],

4371

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4372

},

4373

'playlist_mincount': 199,

4374

}, {

4375

'note': 'Playlists tab',

4376

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4377

'info_dict': {

4378

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4379

'title': 'lex will - Playlists',

4380

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4381

'uploader': 'lex will',

4382

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4383

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4384

'channel': 'lex will',

4385

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4386

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4387

'tags': ['bible', 'history', 'prophesy'],

4388

},

4389

'playlist_mincount': 17,

4390

}, {

4391

'note': 'Community tab',

4392

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4393

'info_dict': {

4394

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4395

'title': 'lex will - Community',

4396

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4397

'uploader': 'lex will',

4398

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4399

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4400

'channel': 'lex will',

4401

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4402

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4403

'tags': ['bible', 'history', 'prophesy'],

4404

},

4405

'playlist_mincount': 18,

4406

}, {

4407

'note': 'Channels tab',

4408

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4409

'info_dict': {

4410

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4411

'title': 'lex will - Channels',

4412

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4413

'uploader': 'lex will',

4414

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4415

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4416

'channel': 'lex will',

4417

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4418

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4419

'tags': ['bible', 'history', 'prophesy'],

4420

},

4421

'playlist_mincount': 12,

4422

}, {

4423

'note': 'Search tab',

4424

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4425

'playlist_mincount': 40,

4426

'info_dict': {

4427

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4428

'title': '3Blue1Brown - Search - linear algebra',

4429

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4430

'uploader': '3Blue1Brown',

4431

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4432

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4433

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4434

'tags': ['Mathematics'],

4435

'channel': '3Blue1Brown',

4436

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4437

},

4438

}, {

4439

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4440

'only_matching': True,

4441

}, {

4442

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4443

'only_matching': True,

4444

}, {

4445

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4446

'only_matching': True,

4447

}, {

4448

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4449

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4450

'info_dict': {

4451

'title': '29C3: Not my department',

4452

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4453

'uploader': 'Christiaan008',

4454

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4455

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4456

'tags': [],

4457

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4458

'view_count': int,

4459

'modified_date': '20150605',

4460

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4461

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4462

'channel': 'Christiaan008',

4463

},

4464

'playlist_count': 96,

4465

}, {

4466

'note': 'Large playlist',

4467

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4468

'info_dict': {

4469

'title': 'Uploads from Cauchemar',

4470

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4471

'uploader': 'Cauchemar',

4472

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4473

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4474

'tags': [],

4475

'modified_date': r're:\d{8}',

4476

'channel': 'Cauchemar',

4477

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4478

'view_count': int,

4479

'description': '',

4480

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4481

},

4482

'playlist_mincount': 1123,

4483

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4484

}, {

4485

'note': 'even larger playlist, 8832 videos',

4486

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4487

'only_matching': True,

4488

}, {

4489

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4490

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4491

'info_dict': {

4492

'title': 'Uploads from Interstellar Movie',

4493

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4494

'uploader': 'Interstellar Movie',

4495

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4496

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4497

'tags': [],

4498

'view_count': int,

4499

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4500

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4501

'channel': 'Interstellar Movie',

4502

'description': '',

4503

'modified_date': r're:\d{8}',

4504

},

4505

'playlist_mincount': 21,

4506

}, {

4507

'note': 'Playlist with "show unavailable videos" button',

4508

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4509

'info_dict': {

4510

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4511

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4512

'uploader': 'Phim Siêu Nhân Nhật Bản',

4513

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4514

'view_count': int,

4515

'channel': 'Phim Siêu Nhân Nhật Bản',

4516

'tags': [],

4517

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4518

'description': '',

4519

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4520

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4521

'modified_date': r're:\d{8}',

4522

},

4523

'playlist_mincount': 200,

4524

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4525

}, {

4526

'note': 'Playlist with unavailable videos in page 7',

4527

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4528

'info_dict': {

4529

'title': 'Uploads from BlankTV',

4530

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4531

'uploader': 'BlankTV',

4532

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4533

'channel': 'BlankTV',

4534

'channel_url': 'https://www.youtube.com/c/blanktv',

4535

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4536

'view_count': int,

4537

'tags': [],

4538

'uploader_url': 'https://www.youtube.com/c/blanktv',

4539

'modified_date': r're:\d{8}',

4540

'description': '',

4541

},

4542

'playlist_mincount': 1000,

4543

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4544

}, {

4545

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4546

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4547

'info_dict': {

4548

'title': 'Data Analysis with Dr Mike Pound',

4549

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4550

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4551

'uploader': 'Computerphile',

4552

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4553

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4554

'tags': [],

4555

'view_count': int,

4556

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4557

'channel_url': 'https://www.youtube.com/user/Computerphile',

4558

'channel': 'Computerphile',

4559

},

4560

'playlist_mincount': 11,

4561

}, {

4562

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4563

'only_matching': True,

4564

}, {

4565

'note': 'Playlist URL that does not actually serve a playlist',

4566

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4571

'uploader': 'STREEM',

4572

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4573

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4574

'upload_date': '20150526',

4575

'license': 'Standard YouTube License',

4576

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4577

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4584

},

4585

'skip': 'This video is not available.',

4586

'add_ie': [YoutubeIE.ie_key()],

4587

}, {

4588

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4589

'only_matching': True,

4590

}, {

4591

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4592

'only_matching': True,

4593

}, {

4594

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4595

'info_dict': {

4596

'id': 'zpsbVPFwsqk', # This will keep changing

4597

'ext': 'mp4',

4598

'title': str,

4599

'uploader': 'Sky News',

4600

'uploader_id': 'skynews',

4601

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4602

'upload_date': r're:\d{8}',

4603

'description': str,

4604

'categories': ['News & Politics'],

4605

'tags': list,

4606

'like_count': int,

4607

'release_timestamp': 1640164857,

4608

'channel': 'Sky News',

4609

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4610

'age_limit': 0,

4611

'view_count': int,

4612

'thumbnail': 'https://i.ytimg.com/vi/zpsbVPFwsqk/maxresdefault_live.jpg',

4613

'playable_in_embed': True,

4614

'release_date': '20211222',

4615

'availability': 'public',

4616

'live_status': 'is_live',

4617

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4618

},

4619

'params': {

4620

'skip_download': True,

4621

},

4622

'expected_warnings': ['Ignoring subtitle tracks found in '],

4623

}, {

4624

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4629

'uploader': 'The Young Turks',

4630

'uploader_id': 'TheYoungTurks',

4631

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4632

'upload_date': '20150715',

4633

'license': 'Standard YouTube License',

4634

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4635

'categories': ['News & Politics'],

4636

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4641

},

4642

'only_matching': True,

4643

}, {

4644

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4645

'only_matching': True,

4646

}, {

4647

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4648

'only_matching': True,

4649

}, {

4650

'note': 'A channel that is not live. Should raise error',

4651

'url': 'https://www.youtube.com/user/numberphile/live',

4652

'only_matching': True,

4653

}, {

4654

'url': 'https://www.youtube.com/feed/trending',

4655

'only_matching': True,

4656

}, {

4657

'url': 'https://www.youtube.com/feed/library',

4658

'only_matching': True,

4659

}, {

4660

'url': 'https://www.youtube.com/feed/history',

4661

'only_matching': True,

4662

}, {

4663

'url': 'https://www.youtube.com/feed/subscriptions',

4664

'only_matching': True,

4665

}, {

4666

'url': 'https://www.youtube.com/feed/watch_later',

4667

'only_matching': True,

4668

}, {

4669

'note': 'Recommended - redirects to home page.',

4670

'url': 'https://www.youtube.com/feed/recommended',

4671

'only_matching': True,

4672

}, {

4673

'note': 'inline playlist with not always working continuations',

4674

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

4675

'only_matching': True,

4676

}, {

4677

'url': 'https://www.youtube.com/course',

4678

'only_matching': True,

4679

}, {

4680

'url': 'https://www.youtube.com/zsecurity',

4681

'only_matching': True,

4682

}, {

4683

'url': 'http://www.youtube.com/NASAgovVideo/videos',

4684

'only_matching': True,

4685

}, {

4686

'url': 'https://www.youtube.com/TheYoungTurks/live',

4687

'only_matching': True,

4688

}, {

4689

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

4696

}, {

4697

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

4698

'only_matching': True,

4699

}, {

4700

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

4701

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4702

'only_matching': True

4703

}, {

4704

'note': '/browse/ should redirect to /channel/',

4705

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

4706

'only_matching': True

4707

}, {

4708

'note': 'VLPL, should redirect to playlist?list=PL...',

4709

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4710

'info_dict': {

4711

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4712

'uploader': 'NoCopyrightSounds',

4713

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

4714

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4715

'title': 'NCS Releases',

4716

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4717

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4718

'modified_date': r're:\d{8}',

4719

'view_count': int,

4720

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4721

'tags': [],

4722

'channel': 'NoCopyrightSounds',

4723

},

4724

'playlist_mincount': 166,

4725

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4726

}, {

4727

'note': 'Topic, should redirect to playlist?list=UU...',

4728

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4729

'info_dict': {

4730

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4731

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4732

'title': 'Uploads from Royalty Free Music - Topic',

4733

'uploader': 'Royalty Free Music - Topic',

4734

'tags': [],

4735

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4736

'channel': 'Royalty Free Music - Topic',

4737

'view_count': int,

4738

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4739

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4740

'modified_date': r're:\d{8}',

4741

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4742

'description': '',

4743

},

4744

'expected_warnings': [

4745

'The URL does not have a videos tab',

4746

r'[Uu]navailable videos (are|will be) hidden',

4747

],

4748

'playlist_mincount': 101,

4749

}, {

4750

'note': 'Topic without a UU playlist',

4751

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

4752

'info_dict': {

4753

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

4754

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

4755

'tags': [],

4756

},

4757

'expected_warnings': [

4758

'the playlist redirect gave error',

4759

],

4760

'playlist_mincount': 9,

4761

}, {

4762

'note': 'Youtube music Album',

4763

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

4764

'info_dict': {

4765

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

4766

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

4771

'modified_date': r're:\d{8}',

4772

},

4773

'playlist_count': 50,

4774

}, {

4775

'note': 'unlisted single video playlist',

4776

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4777

'info_dict': {

4778

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4779

'uploader': 'colethedj',

4780

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4781

'title': 'yt-dlp unlisted playlist test',

4782

'availability': 'unlisted',

4783

'tags': [],

4784

'modified_date': '20211208',

4785

'channel': 'colethedj',

4786

'view_count': int,

4787

'description': '',

4788

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

4789

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4790

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

4795

'url': 'https://www.youtube.com/feed/recommended',

4796

'info_dict': {

4797

'id': 'recommended',

4798

'title': 'recommended',

4799

},

4800

'playlist_mincount': 50,

4801

'params': {

4802

'skip_download': True,

4803

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4804

},

4805

}, {

4806

'note': 'API Fallback: /videos tab, sorted by oldest first',

4807

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

4808

'info_dict': {

4809

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4810

'title': 'Cody\'sLab - Videos',

4811

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

4812

'uploader': 'Cody\'sLab',

4813

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4814

'channel': 'Cody\'sLab',

4815

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4816

'tags': [],

4817

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4818

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4819

},

4820

'playlist_mincount': 650,

4821

'params': {

4822

'skip_download': True,

4823

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4824

},

4825

}, {

4826

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

4827

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4828

'info_dict': {

4829

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4830

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4831

'title': 'Uploads from Royalty Free Music - Topic',

4832

'uploader': 'Royalty Free Music - Topic',

4833

'modified_date': r're:\d{8}',

4834

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4835

'description': '',

4836

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4837

'tags': [],

4838

'channel': 'Royalty Free Music - Topic',

4839

'view_count': int,

4840

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4841

},

4842

'expected_warnings': [

4843

'does not have a videos tab',

4844

r'[Uu]navailable videos (are|will be) hidden',

4845

],

4846

'playlist_mincount': 101,

4847

'params': {

4848

'skip_download': True,

4849

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

},

}]

@classmethod

def suitable(cls, url):

4855

return False if YoutubeIE.suitable(url) else super(

4856

YoutubeTabIE, cls).suitable(url)

4857

4858

def _real_extract(self, url):

4859

url, smuggled_data = unsmuggle_url(url, {})

4860

if self.is_music_url(url):

4861

smuggled_data['is_music_url'] = True

4862

info_dict = self.__real_extract(url, smuggled_data)

4863

if info_dict.get('entries'):

4864

info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)

4865

return info_dict

4866

4867

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')

4868

4869

def __real_extract(self, url, smuggled_data):

4870

item_id = self._match_id(url)

4871

url = compat_urlparse.urlunparse(

4872

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

4873

compat_opts = self.get_param('compat_opts', [])

4874

4875

def get_mobj(url):

4876

mobj = self._URL_RE.match(url).groupdict()

4877

mobj.update((k, '') for k, v in mobj.items() if v is None)

4878

return mobj

4879

4880

mobj, redirect_warning = get_mobj(url), None

4881

# Youtube returns incomplete data if tabname is not lower case

4882

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

4883

if is_channel:

4884

if smuggled_data.get('is_music_url'):

4885

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

4886

item_id = item_id[2:]

4887

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

4888

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

4889

mdata = self._extract_tab_endpoint(

4890

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

4891

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

4892

get_all=False, expected_type=compat_str)

4893

if not murl:

4894

raise ExtractorError('Failed to resolve album to playlist')

4895

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

4896

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

4897

pre = f'https://www.youtube.com/channel/{item_id}'

4898

4899

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

4900

# Home URLs should redirect to /videos/

4901

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

4902

'To download only the videos in the home page, add a "/featured" to the URL')

4903

tab = '/videos'

4904

4905

url = ''.join((pre, tab, post))

4906

mobj = get_mobj(url)

4907

4908

# Handle both video/playlist URLs

4909

qs = parse_qs(url)

4910

video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]

4911

4912

if not video_id and mobj['not_channel'].startswith('watch'):

4913

if not playlist_id:

4914

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

4915

raise ExtractorError('Unable to recognize tab page')

4916

# Common mistake: https://www.youtube.com/watch?list=playlist_id

4917

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

4918

url = f'https://www.youtube.com/playlist?list={playlist_id}'

4919

mobj = get_mobj(url)

4920

4921

if video_id and playlist_id:

4922

if self.get_param('noplaylist'):

4923

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

4924

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

4925

ie=YoutubeIE.ie_key(), video_id=video_id)

4926

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

4927

4928

data, ytcfg = self._extract_data(url, item_id)

4929

4930

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

4931

if tabs:

4932

selected_tab = self._extract_selected_tab(tabs)

4933

tab_name = selected_tab.get('title', '')

4934

if 'no-youtube-channel-redirect' not in compat_opts:

4935

if mobj['tab'] == '/live':

4936

# Live tab should have redirected to the video

4937

raise ExtractorError('The channel is not currently live', expected=True)

4938

if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:

4939

redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'

4940

if not mobj['not_channel'] and item_id[:2] == 'UC':

4941

# Topic channels don't have /videos. Use the equivalent playlist instead

4942

pl_id = f'UU{item_id[2:]}'

4943

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

4944

try:

4945

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)

4946

except ExtractorError:

4947

redirect_warning += ' and the playlist redirect gave error'

4948

else:

4949

item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]

4950

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

4951

if tab_name.lower() != mobj['tab'][1:]:

4952

redirect_warning += f'. {tab_name} tab is being downloaded instead'

4953

4954

if redirect_warning:

4955

self.report_warning(redirect_warning)

4956

self.write_debug(f'Final URL: {url}')

4957

4958

# YouTube sometimes provides a button to reload playlist with unavailable videos.

4959

if 'no-youtube-unavailable-videos' not in compat_opts:

4960

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

4961

self._extract_and_report_alerts(data, only_once=True)

4962

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

4963

if tabs:

4964

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

4965

4966

playlist = traverse_obj(

4967

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

4968

if playlist:

4969

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

4970

4971

video_id = traverse_obj(

4972

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

4973

if video_id:

4974

if mobj['tab'] != '/live': # live tab is expected to redirect to video

4975

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

4976

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

4977

ie=YoutubeIE.ie_key(), video_id=video_id)

4978

4979

raise ExtractorError('Unable to recognize tab page')

4980

4981

4982

class YoutubePlaylistIE(InfoExtractor):

4983

IE_DESC = 'YouTube playlists'

4984

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

4995

)''' % {

4996

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

4997

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4998

}

4999

IE_NAME = 'youtube:playlist'

5000

_TESTS = [{

5001

'note': 'issue #673',

5002

'url': 'PLBB231211A4F62143',

5003

'info_dict': {

5004

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5005

'id': 'PLBB231211A4F62143',

5006

'uploader': 'Wickman',

5007

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5008

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5009

'view_count': int,

5010

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5011

'modified_date': r're:\d{8}',

5012

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5013

'channel': 'Wickman',

5014

'tags': [],

5015

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5016

},

5017

'playlist_mincount': 29,

5018

}, {

5019

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5020

'info_dict': {

5021

'title': 'YDL_safe_search',

5022

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5023

},

5024

'playlist_count': 2,

5025

'skip': 'This playlist is private',

5026

}, {

5027

'note': 'embedded',

5028

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5033

'uploader': 'milan',

5034

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5035

'description': '',

5036

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5037

'tags': [],

5038

'modified_date': '20140919',

5039

'view_count': int,

5040

'channel': 'milan',

5041

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5042

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5043

},

5044

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5045

}, {

5046

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5047

'playlist_mincount': 654,

5048

'info_dict': {

5049

'title': '2018 Chinese New Singles (11/6 updated)',

5050

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5051

'uploader': 'LBK',

5052

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5053

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5054

'channel': 'LBK',

5055

'view_count': int,

5056

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5057

'tags': [],

5058

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5059

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5060

'modified_date': r're:\d{8}',

5061

},

5062

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5063

}, {

5064

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5065

'only_matching': True,

5066

}, {

5067

# music album playlist

5068

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5069

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5074

if YoutubeTabIE.suitable(url):

5075

return False

5076

from ..utils import parse_qs

5077

qs = parse_qs(url)

5078

if qs.get('v', [None])[0]:

5079

return False

5080

return super(YoutubePlaylistIE, cls).suitable(url)

5081

5082

def _real_extract(self, url):

5083

playlist_id = self._match_id(url)

5084

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5085

url = update_url_query(

5086

'https://www.youtube.com/playlist',

5087

parse_qs(url) or {'list': playlist_id})

5088

if is_music_url:

5089

url = smuggle_url(url, {'is_music_url': True})

5090

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5091

5092

5093

class YoutubeYtBeIE(InfoExtractor):

5094

IE_DESC = 'youtu.be'

5095

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5096

_TESTS = [{

5097

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5102

'uploader': 'Backus-Page House Museum',

5103

'uploader_id': 'backuspagemuseum',

5104

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5105

'upload_date': '20161008',

5106

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5107

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5112

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5113

'channel': 'Backus-Page House Museum',

5114

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5115

'live_status': 'not_live',

5116

'view_count': int,

5117

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5118

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5124

},

5125

}, {

5126

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5127

'only_matching': True,

5128

}]

5129

5130

def _real_extract(self, url):

5131

mobj = self._match_valid_url(url)

5132

video_id = mobj.group('id')

5133

playlist_id = mobj.group('playlist_id')

5134

return self.url_result(

5135

update_url_query('https://www.youtube.com/watch', {

5136

'v': video_id,

5137

'list': playlist_id,

5138

'feature': 'youtu.be',

5139

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5140

5141

5142

class YoutubeYtUserIE(InfoExtractor):

5143

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5144

_VALID_URL = r'ytuser:(?P<id>.+)'

5145

_TESTS = [{

5146

'url': 'ytuser:phihag',

5147

'only_matching': True,

5148

}]

5149

5150

def _real_extract(self, url):

5151

user_id = self._match_id(url)

5152

return self.url_result(

5153

'https://www.youtube.com/user/%s/videos' % user_id,

5154

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5155

5156

5157

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5158

IE_NAME = 'youtube:favorites'

5159

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5160

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5161

_LOGIN_REQUIRED = True

5162

_TESTS = [{

5163

'url': ':ytfav',

5164

'only_matching': True,

5165

}, {

5166

'url': ':ytfavorites',

5167

'only_matching': True,

5168

}]

5169

5170

def _real_extract(self, url):

5171

return self.url_result(

5172

'https://www.youtube.com/playlist?list=LL',

5173

ie=YoutubeTabIE.ie_key())

5174

5175

5176

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5177

IE_DESC = 'YouTube search'

5178

IE_NAME = 'youtube:search'

5179

_SEARCH_KEY = 'ytsearch'

5180

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

_TESTS = []

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5185

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5186

_SEARCH_KEY = 'ytsearchdate'

5187

IE_DESC = 'YouTube search, newest videos first'

5188

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5189

5190

5191

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5192

IE_DESC = 'YouTube search URLs with sorting and filter support'

5193

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5194

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'

5195

_TESTS = [{

5196

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5197

'playlist_mincount': 5,

5198

'info_dict': {

5199

'id': 'youtube-dl test video',

5200

'title': 'youtube-dl test video',

5201

}

5202

}, {

5203

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5204

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5212

'only_matching': True,

5213

}]

5214

5215

def _real_extract(self, url):

5216

qs = parse_qs(url)

5217

query = (qs.get('search_query') or qs.get('q'))[0]

5218

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5219

5220

5221

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

5222

"""

5223

Base class for feed extractors

5224

Subclasses must define the _FEED_NAME property.

5225

"""

5226

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5232

5233

def _real_extract(self, url):

5234

return self.url_result(

5235

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

5236

ie=YoutubeTabIE.ie_key())

5237

5238

5239

class YoutubeWatchLaterIE(InfoExtractor):

5240

IE_NAME = 'youtube:watchlater'

5241

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5242

_VALID_URL = r':ytwatchlater'

5243

_TESTS = [{

5244

'url': ':ytwatchlater',

5245

'only_matching': True,

5246

}]

5247

5248

def _real_extract(self, url):

5249

return self.url_result(

5250

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5251

5252

5253

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5254

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5255

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5256

_FEED_NAME = 'recommended'

5257

_LOGIN_REQUIRED = False

5258

_TESTS = [{

5259

'url': ':ytrec',

5260

'only_matching': True,

5261

}, {

5262

'url': ':ytrecommended',

5263

'only_matching': True,

5264

}, {

5265

'url': 'https://youtube.com',

5266

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5271

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5272

_VALID_URL = r':ytsub(?:scription)?s?'

5273

_FEED_NAME = 'subscriptions'

5274

_TESTS = [{

5275

'url': ':ytsubs',

5276

'only_matching': True,

5277

}, {

5278

'url': ':ytsubscriptions',

5279

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5284

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5285

_VALID_URL = r':ythis(?:tory)?'

5286

_FEED_NAME = 'history'

5287

_TESTS = [{

5288

'url': ':ythistory',

5289

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

5294

IE_NAME = 'youtube:truncated_url'

5295

IE_DESC = False # Do not list

5296

_VALID_URL = r'''(?x)

5297

(?:https?://)?

5298

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5299

(?:watch\?(?:

5300

feature=[a-z_]+|

5301

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5314

'only_matching': True,

5315

}, {

5316

'url': 'https://www.youtube.com/watch?',

5317

'only_matching': True,

5318

}, {

5319

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5320

'only_matching': True,

5321

}, {

5322

'url': 'https://www.youtube.com/watch?feature=foo',

5323

'only_matching': True,

5324

}, {

5325

'url': 'https://www.youtube.com/watch?hl=en-GB',

5326

'only_matching': True,

5327

}, {

5328

'url': 'https://www.youtube.com/watch?t=2372',

5329

'only_matching': True,

5330

}]

5331

5332

def _real_extract(self, url):

5333

raise ExtractorError(

5334

'Did you forget to quote the URL? Remember that & is a meta '

5335

'character in most shells, so you want to put the URL in quotes, '

5336

'like youtube-dl '

5337

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5338

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5343

IE_NAME = 'youtube:clip'

5344

IE_DESC = False # Do not list

5345

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5346

5347

def _real_extract(self, url):

5348

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5349

return self.url_result(url, 'Generic')

5350

5351

5352

class YoutubeTruncatedIDIE(InfoExtractor):

5353

IE_NAME = 'youtube:truncated_id'

5354

IE_DESC = False # Do not list

5355

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5356

5357

_TESTS = [{

5358

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5359

'only_matching': True,

5360

}]

5361

5362

def _real_extract(self, url):

5363

video_id = self._match_id(url)

5364

raise ExtractorError(

5365

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

5366

expected=True)