jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import calendar
	6	import copy
	7	import datetime
	8	import functools
	9	import hashlib
	10	import itertools
	11	import json
	12	import math
	13	import os.path
	14	import random
	15	import re
	16	import sys
	17	import time
	18	import traceback
	19	import threading
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from ..compat import (
	23	compat_chr,
	24	compat_HTTPError,
	25	compat_parse_qs,
	26	compat_str,
	27	compat_urllib_parse_unquote_plus,
	28	compat_urllib_parse_urlencode,
	29	compat_urllib_parse_urlparse,
	30	compat_urlparse,
	31	)
	32	from ..jsinterp import JSInterpreter
	33	from ..utils import (
	34	bug_reports_message,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	ExtractorError,
	40	float_or_none,
	41	format_field,
	42	get_first,
	43	int_or_none,
	44	is_html,
	45	join_nonempty,
	46	js_to_json,
	47	mimetype2ext,
	48	network_exceptions,
	49	NO_DEFAULT,
	50	orderedSet,
	51	parse_codecs,
	52	parse_count,
	53	parse_duration,
	54	parse_iso8601,
	55	parse_qs,
	56	qualities,
	57	remove_end,
	58	remove_start,
	59	smuggle_url,
	60	str_or_none,
	61	str_to_int,
	62	strftime_or_none,
	63	traverse_obj,
	64	try_get,
	65	unescapeHTML,
	66	unified_strdate,
	67	unified_timestamp,
	68	unsmuggle_url,
	69	update_url_query,
	70	url_or_none,
	71	urljoin,
	72	variadic,
	73	)
	74
	75
	76	# any clients starting with _ cannot be explicity requested by the user
	77	INNERTUBE_CLIENTS = {
	78	'web': {
	79	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	80	'INNERTUBE_CONTEXT': {
	81	'client': {
	82	'clientName': 'WEB',
	83	'clientVersion': '2.20211221.00.00',
	84	}
	85	},
	86	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	87	},
	88	'web_embedded': {
	89	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	90	'INNERTUBE_CONTEXT': {
	91	'client': {
	92	'clientName': 'WEB_EMBEDDED_PLAYER',
	93	'clientVersion': '1.20211215.00.01',
	94	},
	95	},
	96	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	97	},
	98	'web_music': {
	99	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	100	'INNERTUBE_HOST': 'music.youtube.com',
	101	'INNERTUBE_CONTEXT': {
	102	'client': {
	103	'clientName': 'WEB_REMIX',
	104	'clientVersion': '1.20211213.00.00',
	105	}
	106	},
	107	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	108	},
	109	'web_creator': {
	110	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	111	'INNERTUBE_CONTEXT': {
	112	'client': {
	113	'clientName': 'WEB_CREATOR',
	114	'clientVersion': '1.20211220.02.00',
	115	}
	116	},
	117	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	118	},
	119	'android': {
	120	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	121	'INNERTUBE_CONTEXT': {
	122	'client': {
	123	'clientName': 'ANDROID',
	124	'clientVersion': '16.49',
	125	}
	126	},
	127	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	128	'REQUIRE_JS_PLAYER': False
	129	},
	130	'android_embedded': {
	131	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	132	'INNERTUBE_CONTEXT': {
	133	'client': {
	134	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	135	'clientVersion': '16.49',
	136	},
	137	},
	138	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	139	'REQUIRE_JS_PLAYER': False
	140	},
	141	'android_music': {
	142	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	143	'INNERTUBE_CONTEXT': {
	144	'client': {
	145	'clientName': 'ANDROID_MUSIC',
	146	'clientVersion': '4.57',
	147	}
	148	},
	149	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	150	'REQUIRE_JS_PLAYER': False
	151	},
	152	'android_creator': {
	153	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	154	'INNERTUBE_CONTEXT': {
	155	'client': {
	156	'clientName': 'ANDROID_CREATOR',
	157	'clientVersion': '21.47',
	158	},
	159	},
	160	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	161	'REQUIRE_JS_PLAYER': False
	162	},
	163	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	164	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	165	'ios': {
	166	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS',
	170	'clientVersion': '16.46',
	171	'deviceModel': 'iPhone14,3',
	172	}
	173	},
	174	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	175	'REQUIRE_JS_PLAYER': False
	176	},
	177	'ios_embedded': {
	178	'INNERTUBE_CONTEXT': {
	179	'client': {
	180	'clientName': 'IOS_MESSAGES_EXTENSION',
	181	'clientVersion': '16.46',
	182	'deviceModel': 'iPhone14,3',
	183	},
	184	},
	185	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	186	'REQUIRE_JS_PLAYER': False
	187	},
	188	'ios_music': {
	189	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	190	'INNERTUBE_CONTEXT': {
	191	'client': {
	192	'clientName': 'IOS_MUSIC',
	193	'clientVersion': '4.57',
	194	},
	195	},
	196	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	197	'REQUIRE_JS_PLAYER': False
	198	},
	199	'ios_creator': {
	200	'INNERTUBE_CONTEXT': {
	201	'client': {
	202	'clientName': 'IOS_CREATOR',
	203	'clientVersion': '21.47',
	204	},
	205	},
	206	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	207	'REQUIRE_JS_PLAYER': False
	208	},
	209	# mweb has 'ultralow' formats
	210	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	211	'mweb': {
	212	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	213	'INNERTUBE_CONTEXT': {
	214	'client': {
	215	'clientName': 'MWEB',
	216	'clientVersion': '2.20211221.01.00',
	217	}
	218	},
	219	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	220	}
	221	}
	222
	223
	224	def build_innertube_clients():
	225	THIRD_PARTY = {
	226	'embedUrl': 'https://google.com', # Can be any valid URL
	227	}
	228	BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')
	229	priority = qualities(BASE_CLIENTS[::-1])
	230
	231	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	232	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	233	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	234	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	235	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	236
	237	base_client, *variant = client.split('_')
	238	ytcfg['priority'] = 10 * priority(base_client)
	239
	240	if not variant:
	241	INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
	242	agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	243	agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	244	agegate_ytcfg['priority'] -= 1
	245	elif variant == ['embedded']:
	246	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	247	ytcfg['priority'] -= 2
	248	else:
	249	ytcfg['priority'] -= 3
	250
	251
	252	build_innertube_clients()
	253
	254
	255	class YoutubeBaseInfoExtractor(InfoExtractor):
	256	"""Provide base functions for Youtube extractors"""
	257
	258	_RESERVED_NAMES = (
	259	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	260	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	261	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	262	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	263
	264	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	265
	266	# _NETRC_MACHINE = 'youtube'
	267
	268	# If True it will raise an error if no login info is provided
	269	_LOGIN_REQUIRED = False
	270
	271	_INVIDIOUS_SITES = (
	272	# invidious-redirect websites
	273	r'(?:www\.)?redirect\.invidious\.io',
	274	r'(?:(?:www\|dev)\.)?invidio\.us',
	275	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
	276	r'(?:www\.)?invidious\.pussthecat\.org',
	277	r'(?:www\.)?invidious\.zee\.li',
	278	r'(?:www\.)?invidious\.ethibox\.fr',
	279	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	280	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	281	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	282	# youtube-dl invidious instances list
	283	r'(?:(?:www\|no)\.)?invidiou\.sh',
	284	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	285	r'(?:www\.)?invidious\.kabi\.tk',
	286	r'(?:www\.)?invidious\.mastodon\.host',
	287	r'(?:www\.)?invidious\.zapashcanon\.fr',
	288	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	289	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	290	r'(?:www\.)?invidious\.himiko\.cloud',
	291	r'(?:www\.)?invidious\.reallyancient\.tech',
	292	r'(?:www\.)?invidious\.tube',
	293	r'(?:www\.)?invidiou\.site',
	294	r'(?:www\.)?invidious\.site',
	295	r'(?:www\.)?invidious\.xyz',
	296	r'(?:www\.)?invidious\.nixnet\.xyz',
	297	r'(?:www\.)?invidious\.048596\.xyz',
	298	r'(?:www\.)?invidious\.drycat\.fr',
	299	r'(?:www\.)?inv\.skyn3t\.in',
	300	r'(?:www\.)?tube\.poal\.co',
	301	r'(?:www\.)?tube\.connect\.cafe',
	302	r'(?:www\.)?vid\.wxzm\.sx',
	303	r'(?:www\.)?vid\.mint\.lgbt',
	304	r'(?:www\.)?vid\.puffyan\.us',
	305	r'(?:www\.)?yewtu\.be',
	306	r'(?:www\.)?yt\.elukerio\.org',
	307	r'(?:www\.)?yt\.lelux\.fi',
	308	r'(?:www\.)?invidious\.ggc-project\.de',
	309	r'(?:www\.)?yt\.maisputain\.ovh',
	310	r'(?:www\.)?ytprivate\.com',
	311	r'(?:www\.)?invidious\.13ad\.de',
	312	r'(?:www\.)?invidious\.toot\.koeln',
	313	r'(?:www\.)?invidious\.fdn\.fr',
	314	r'(?:www\.)?watch\.nettohikari\.com',
	315	r'(?:www\.)?invidious\.namazso\.eu',
	316	r'(?:www\.)?invidious\.silkky\.cloud',
	317	r'(?:www\.)?invidious\.exonip\.de',
	318	r'(?:www\.)?invidious\.riverside\.rocks',
	319	r'(?:www\.)?invidious\.blamefran\.net',
	320	r'(?:www\.)?invidious\.moomoo\.de',
	321	r'(?:www\.)?ytb\.trom\.tf',
	322	r'(?:www\.)?yt\.cyberhost\.uk',
	323	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	324	r'(?:www\.)?qklhadlycap4cnod\.onion',
	325	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	326	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	327	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	328	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	329	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	330	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	331	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	332	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	333	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	334	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	335	)
	336
	337	def _initialize_consent(self):
	338	cookies = self._get_cookies('https://www.youtube.com/')
	339	if cookies.get('__Secure-3PSID'):
	340	return
	341	consent_id = None
	342	consent = cookies.get('CONSENT')
	343	if consent:
	344	if 'YES' in consent.value:
	345	return
	346	consent_id = self._search_regex(
	347	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	348	if not consent_id:
	349	consent_id = random.randint(100, 999)
	350	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	351
	352	def _initialize_pref(self):
	353	cookies = self._get_cookies('https://www.youtube.com/')
	354	pref_cookie = cookies.get('PREF')
	355	pref = {}
	356	if pref_cookie:
	357	try:
	358	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	359	except ValueError:
	360	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	361	pref.update({'hl': 'en', 'tz': 'UTC'})
	362	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	363
	364	def _real_initialize(self):
	365	self._initialize_pref()
	366	self._initialize_consent()
	367	if (self._LOGIN_REQUIRED
	368	and self.get_param('cookiefile') is None
	369	and self.get_param('cookiesfrombrowser') is None):
	370	self.raise_login_required('Login details are needed to download this content', method='cookies')
	371
	372	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	373	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	374	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	375
	376	def _get_default_ytcfg(self, client='web'):
	377	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	378
	379	def _get_innertube_host(self, client='web'):
	380	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	381
	382	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	383	# try_get but with fallback to default ytcfg client values when present
	384	_func = lambda y: try_get(y, getter, expected_type)
	385	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	386
	387	def _extract_client_name(self, ytcfg, default_client='web'):
	388	return self._ytcfg_get_safe(
	389	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	390	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	391
	392	def _extract_client_version(self, ytcfg, default_client='web'):
	393	return self._ytcfg_get_safe(
	394	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	395	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	396
	397	def _extract_api_key(self, ytcfg=None, default_client='web'):
	398	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	399
	400	def _extract_context(self, ytcfg=None, default_client='web'):
	401	context = get_first(
	402	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	403	# Enforce language and tz for extraction
	404	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	405	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	406	return context
	407
	408	_SAPISID = None
	409
	410	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	411	time_now = round(time.time())
	412	if self._SAPISID is None:
	413	yt_cookies = self._get_cookies('https://www.youtube.com')
	414	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	415	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	416	sapisid_cookie = dict_get(
	417	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	418	if sapisid_cookie and sapisid_cookie.value:
	419	self._SAPISID = sapisid_cookie.value
	420	self.write_debug('Extracted SAPISID cookie')
	421	# SAPISID cookie is required if not already present
	422	if not yt_cookies.get('SAPISID'):
	423	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	424	self._set_cookie(
	425	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	426	else:
	427	self._SAPISID = False
	428	if not self._SAPISID:
	429	return None
	430	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	431	sapisidhash = hashlib.sha1(
	432	f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
	433	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	434
	435	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	436	note='Downloading API JSON', errnote='Unable to download API page',
	437	context=None, api_key=None, api_hostname=None, default_client='web'):
	438
	439	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	440	data.update(query)
	441	real_headers = self.generate_api_headers(default_client=default_client)
	442	real_headers.update({'content-type': 'application/json'})
	443	if headers:
	444	real_headers.update(headers)
	445	return self._download_json(
	446	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	447	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	448	data=json.dumps(data).encode('utf8'), headers=real_headers,
	449	query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
	450
	451	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	452	data = self._search_regex(
	453	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	454	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	455	if data:
	456	return self._parse_json(data, item_id, fatal=fatal)
	457
	458	@staticmethod
	459	def _extract_session_index(*data):
	460	"""
	461	Index of current account in account list.
	462	See: https://github.com/yt-dlp/yt-dlp/pull/519
	463	"""
	464	for ytcfg in data:
	465	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	466	if session_index is not None:
	467	return session_index
	468
	469	# Deprecated?
	470	def _extract_identity_token(self, ytcfg=None, webpage=None):
	471	if ytcfg:
	472	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	473	if token:
	474	return token
	475	if webpage:
	476	return self._search_regex(
	477	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	478	'identity token', default=None, fatal=False)
	479
	480	@staticmethod
	481	def _extract_account_syncid(*args):
	482	"""
	483	Extract syncId required to download private playlists of secondary channels
	484	@params response and/or ytcfg
	485	"""
	486	for data in args:
	487	# ytcfg includes channel_syncid if on secondary channel
	488	delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
	489	if delegated_sid:
	490	return delegated_sid
	491	sync_ids = (try_get(
	492	data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
	493	lambda x: x['DATASYNC_ID']), compat_str) or '').split('\|\|')
	494	if len(sync_ids) >= 2 and sync_ids[1]:
	495	# datasyncid is of the form "channel_syncid\|\|user_syncid" for secondary channel
	496	# and just "user_syncid\|\|" for primary channel. We only want the channel_syncid
	497	return sync_ids[0]
	498
	499	@staticmethod
	500	def _extract_visitor_data(*args):

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import time

import traceback

import threading

from .common import InfoExtractor, SearchInfoExtractor

22

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

28

compat_urllib_parse_urlencode,

29

compat_urllib_parse_urlparse,

30

compat_urlparse,

31

)

32

from ..jsinterp import JSInterpreter

33

from ..utils import (

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

NO_DEFAULT,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

77

INNERTUBE_CLIENTS = {

78

'web': {

79

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

80

'INNERTUBE_CONTEXT': {

81

'client': {

82

'clientName': 'WEB',

83

'clientVersion': '2.20211221.00.00',

84

}

85

},

86

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

87

},

88

'web_embedded': {

89

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

90

'INNERTUBE_CONTEXT': {

91

'client': {

92

'clientName': 'WEB_EMBEDDED_PLAYER',

93

'clientVersion': '1.20211215.00.01',

94

},

95

},

96

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

97

},

98

'web_music': {

99

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

100

'INNERTUBE_HOST': 'music.youtube.com',

101

'INNERTUBE_CONTEXT': {

102

'client': {

103

'clientName': 'WEB_REMIX',

104

'clientVersion': '1.20211213.00.00',

105

}

106

},

107

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

108

},

109

'web_creator': {

110

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

111

'INNERTUBE_CONTEXT': {

112

'client': {

113

'clientName': 'WEB_CREATOR',

114

'clientVersion': '1.20211220.02.00',

115

}

116

},

117

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

118

},

119

'android': {

120

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

121

'INNERTUBE_CONTEXT': {

122

'client': {

123

'clientName': 'ANDROID',

124

'clientVersion': '16.49',

125

}

126

},

127

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

128

'REQUIRE_JS_PLAYER': False

129

},

130

'android_embedded': {

131

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

132

'INNERTUBE_CONTEXT': {

133

'client': {

134

'clientName': 'ANDROID_EMBEDDED_PLAYER',

135

'clientVersion': '16.49',

136

},

137

},

138

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

139

'REQUIRE_JS_PLAYER': False

140

},

141

'android_music': {

142

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

143

'INNERTUBE_CONTEXT': {

144

'client': {

145

'clientName': 'ANDROID_MUSIC',

146

'clientVersion': '4.57',

147

}

148

},

149

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

150

'REQUIRE_JS_PLAYER': False

151

},

152

'android_creator': {

153

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

154

'INNERTUBE_CONTEXT': {

155

'client': {

156

'clientName': 'ANDROID_CREATOR',

157

'clientVersion': '21.47',

158

},

159

},

160

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

161

'REQUIRE_JS_PLAYER': False

162

},

163

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

164

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

165

'ios': {

166

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS',

170

'clientVersion': '16.46',

171

'deviceModel': 'iPhone14,3',

172

}

173

},

174

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

175

'REQUIRE_JS_PLAYER': False

176

},

177

'ios_embedded': {

178

'INNERTUBE_CONTEXT': {

179

'client': {

180

'clientName': 'IOS_MESSAGES_EXTENSION',

181

'clientVersion': '16.46',

182

'deviceModel': 'iPhone14,3',

183

},

184

},

185

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

186

'REQUIRE_JS_PLAYER': False

187

},

188

'ios_music': {

189

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

190

'INNERTUBE_CONTEXT': {

191

'client': {

192

'clientName': 'IOS_MUSIC',

193

'clientVersion': '4.57',

194

},

195

},

196

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

197

'REQUIRE_JS_PLAYER': False

198

},

199

'ios_creator': {

200

'INNERTUBE_CONTEXT': {

201

'client': {

202

'clientName': 'IOS_CREATOR',

203

'clientVersion': '21.47',

204

},

205

},

206

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

207

'REQUIRE_JS_PLAYER': False

208

},

209

# mweb has 'ultralow' formats

210

# See: https://github.com/yt-dlp/yt-dlp/pull/557

211

'mweb': {

212

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

213

'INNERTUBE_CONTEXT': {

214

'client': {

215

'clientName': 'MWEB',

216

'clientVersion': '2.20211221.01.00',

217

}

218

},

219

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

}

}

def build_innertube_clients():

225

THIRD_PARTY = {

226

'embedUrl': 'https://google.com', # Can be any valid URL

227

}

228

BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')

229

priority = qualities(BASE_CLIENTS[::-1])

230

231

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

232

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

233

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

234

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

235

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

236

237

base_client, *variant = client.split('_')

238

ytcfg['priority'] = 10 * priority(base_client)

239

240

if not variant:

241

INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)

242

agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

243

agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

244

agegate_ytcfg['priority'] -= 1

245

elif variant == ['embedded']:

246

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

247

ytcfg['priority'] -= 2

248

else:

249

ytcfg['priority'] -= 3

250

251

252

build_innertube_clients()

253

254

255

class YoutubeBaseInfoExtractor(InfoExtractor):

256

"""Provide base functions for Youtube extractors"""

257

258

_RESERVED_NAMES = (

259

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

265

266

# _NETRC_MACHINE = 'youtube'

267

268

# If True it will raise an error if no login info is provided

269

_LOGIN_REQUIRED = False

270

271

_INVIDIOUS_SITES = (

272

# invidious-redirect websites

273

r'(?:www\.)?redirect\.invidious\.io',

274

r'(?:(?:www|dev)\.)?invidio\.us',

275

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

276

r'(?:www\.)?invidious\.pussthecat\.org',

277

r'(?:www\.)?invidious\.zee\.li',

278

r'(?:www\.)?invidious\.ethibox\.fr',

279

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

280

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

281

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

282

# youtube-dl invidious instances list

283

r'(?:(?:www|no)\.)?invidiou\.sh',

284

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

285

r'(?:www\.)?invidious\.kabi\.tk',

286

r'(?:www\.)?invidious\.mastodon\.host',

287

r'(?:www\.)?invidious\.zapashcanon\.fr',

288

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

289

r'(?:www\.)?invidious\.tinfoil-hat\.net',

290

r'(?:www\.)?invidious\.himiko\.cloud',

291

r'(?:www\.)?invidious\.reallyancient\.tech',

292

r'(?:www\.)?invidious\.tube',

293

r'(?:www\.)?invidiou\.site',

294

r'(?:www\.)?invidious\.site',

295

r'(?:www\.)?invidious\.xyz',

296

r'(?:www\.)?invidious\.nixnet\.xyz',

297

r'(?:www\.)?invidious\.048596\.xyz',

298

r'(?:www\.)?invidious\.drycat\.fr',

299

r'(?:www\.)?inv\.skyn3t\.in',

300

r'(?:www\.)?tube\.poal\.co',

301

r'(?:www\.)?tube\.connect\.cafe',

302

r'(?:www\.)?vid\.wxzm\.sx',

303

r'(?:www\.)?vid\.mint\.lgbt',

304

r'(?:www\.)?vid\.puffyan\.us',

305

r'(?:www\.)?yewtu\.be',

306

r'(?:www\.)?yt\.elukerio\.org',

307

r'(?:www\.)?yt\.lelux\.fi',

308

r'(?:www\.)?invidious\.ggc-project\.de',

309

r'(?:www\.)?yt\.maisputain\.ovh',

310

r'(?:www\.)?ytprivate\.com',

311

r'(?:www\.)?invidious\.13ad\.de',

312

r'(?:www\.)?invidious\.toot\.koeln',

313

r'(?:www\.)?invidious\.fdn\.fr',

314

r'(?:www\.)?watch\.nettohikari\.com',

315

r'(?:www\.)?invidious\.namazso\.eu',

316

r'(?:www\.)?invidious\.silkky\.cloud',

317

r'(?:www\.)?invidious\.exonip\.de',

318

r'(?:www\.)?invidious\.riverside\.rocks',

319

r'(?:www\.)?invidious\.blamefran\.net',

320

r'(?:www\.)?invidious\.moomoo\.de',

321

r'(?:www\.)?ytb\.trom\.tf',

322

r'(?:www\.)?yt\.cyberhost\.uk',

323

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

324

r'(?:www\.)?qklhadlycap4cnod\.onion',

325

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

326

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

327

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

328

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

329

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

330

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

331

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

332

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

333

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

334

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

335

)

336

337

def _initialize_consent(self):

338

cookies = self._get_cookies('https://www.youtube.com/')

339

if cookies.get('__Secure-3PSID'):

340

return

341

consent_id = None

342

consent = cookies.get('CONSENT')

343

if consent:

344

if 'YES' in consent.value:

345

return

346

consent_id = self._search_regex(

347

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

348

if not consent_id:

349

consent_id = random.randint(100, 999)

350

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

351

352

def _initialize_pref(self):

353

cookies = self._get_cookies('https://www.youtube.com/')

354

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

359

except ValueError:

360

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

361

pref.update({'hl': 'en', 'tz': 'UTC'})

362

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

363

364

def _real_initialize(self):

365

self._initialize_pref()

366

self._initialize_consent()

367

if (self._LOGIN_REQUIRED

368

and self.get_param('cookiefile') is None

369

and self.get_param('cookiesfrombrowser') is None):

370

self.raise_login_required('Login details are needed to download this content', method='cookies')

371

372

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

373

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

374

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

375

376

def _get_default_ytcfg(self, client='web'):

377

return copy.deepcopy(INNERTUBE_CLIENTS[client])

378

379

def _get_innertube_host(self, client='web'):

380

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

381

382

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

383

# try_get but with fallback to default ytcfg client values when present

384

_func = lambda y: try_get(y, getter, expected_type)

385

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

386

387

def _extract_client_name(self, ytcfg, default_client='web'):

388

return self._ytcfg_get_safe(

389

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

390

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

391

392

def _extract_client_version(self, ytcfg, default_client='web'):

393

return self._ytcfg_get_safe(

394

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

395

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

396

397

def _extract_api_key(self, ytcfg=None, default_client='web'):

398

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

399

400

def _extract_context(self, ytcfg=None, default_client='web'):

401

context = get_first(

402

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

403

# Enforce language and tz for extraction

404

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

405

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

411

time_now = round(time.time())

412

if self._SAPISID is None:

413

yt_cookies = self._get_cookies('https://www.youtube.com')

414

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

415

# See: https://github.com/yt-dlp/yt-dlp/issues/393

416

sapisid_cookie = dict_get(

417

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

418

if sapisid_cookie and sapisid_cookie.value:

419

self._SAPISID = sapisid_cookie.value

420

self.write_debug('Extracted SAPISID cookie')

421

# SAPISID cookie is required if not already present

422

if not yt_cookies.get('SAPISID'):

423

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

424

self._set_cookie(

425

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

426

else:

427

self._SAPISID = False

428

if not self._SAPISID:

429

return None

430

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

431

sapisidhash = hashlib.sha1(

432

f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()

433

return f'SAPISIDHASH {time_now}_{sapisidhash}'

434

435

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

436

note='Downloading API JSON', errnote='Unable to download API page',

437

context=None, api_key=None, api_hostname=None, default_client='web'):

438

439

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

440

data.update(query)

441

real_headers = self.generate_api_headers(default_client=default_client)

442

real_headers.update({'content-type': 'application/json'})

443

if headers:

444

real_headers.update(headers)

445

return self._download_json(

446

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

447

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

448

data=json.dumps(data).encode('utf8'), headers=real_headers,

449

query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})

450

451

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

452

data = self._search_regex(

453

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

454

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

455

if data:

456

return self._parse_json(data, item_id, fatal=fatal)

457

458

@staticmethod

459

def _extract_session_index(*data):

460

"""

461

Index of current account in account list.

462

See: https://github.com/yt-dlp/yt-dlp/pull/519

463

"""

464

for ytcfg in data:

465

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

466

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

471

if ytcfg:

472

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

477

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

478

'identity token', default=None, fatal=False)

479

480

@staticmethod

481

def _extract_account_syncid(*args):

482

"""

483

Extract syncId required to download private playlists of secondary channels

484

@params response and/or ytcfg

485

"""

486

for data in args:

487

# ytcfg includes channel_syncid if on secondary channel

488

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

493

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

494

if len(sync_ids) >= 2 and sync_ids[1]:

495

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

496

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

501

"""

502

Extracts visitorData from an API response or ytcfg

503

Appears to be used to track session state

504

"""

505

return get_first(

506

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

expected_type=str)

@property

def is_authenticated(self):

511

return bool(self._generate_sapisidhash_header())

512

513

def extract_ytcfg(self, video_id, webpage):

514

if not webpage:

515

return {}

516

return self._parse_json(

517

self._search_regex(

518

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

519

default='{}'), video_id, fatal=False) or {}

520

521

def generate_api_headers(

522

self, *, ytcfg=None, account_syncid=None, session_index=None,

523

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

524

525

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

526

headers = {

527

'X-YouTube-Client-Name': compat_str(

528

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

529

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

530

'Origin': origin,

531

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

532

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

533

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

534

}

535

if session_index is None:

536

session_index = self._extract_session_index(ytcfg)

537

if account_syncid or session_index is not None:

538

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

539

540

auth = self._generate_sapisidhash_header(origin)

541

if auth is not None:

542

headers['Authorization'] = auth

543

headers['X-Origin'] = origin

544

return {h: v for h, v in headers.items() if v is not None}

545

546

@staticmethod

547

def _build_api_continuation_query(continuation, ctp=None):

548

query = {

549

'continuation': continuation

550

}

551

# TODO: Inconsistency with clickTrackingParams.

552

# Currently we have a fixed ctp contained within context (from ytcfg)

553

# and a ctp in root query for continuation.

554

if ctp:

555

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

560

next_continuation = try_get(

561

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

562

lambda x: x['continuation']['reloadContinuationData']), dict)

563

if not next_continuation:

564

return

565

continuation = next_continuation.get('continuation')

566

if not continuation:

567

return

568

ctp = next_continuation.get('clickTrackingParams')

569

return cls._build_api_continuation_query(continuation, ctp)

570

571

@classmethod

572

def _extract_continuation_ep_data(cls, continuation_ep: dict):

573

if isinstance(continuation_ep, dict):

574

continuation = try_get(

575

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

576

if not continuation:

577

return

578

ctp = continuation_ep.get('clickTrackingParams')

579

return cls._build_api_continuation_query(continuation, ctp)

580

581

@classmethod

582

def _extract_continuation(cls, renderer):

583

next_continuation = cls._extract_next_continuation_data(renderer)

584

if next_continuation:

585

return next_continuation

586

587

contents = []

588

for key in ('contents', 'items'):

589

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

590

591

for content in contents:

592

if not isinstance(content, dict):

593

continue

594

continuation_ep = try_get(

595

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

596

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

597

dict)

598

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

604

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

605

if not isinstance(alert_dict, dict):

606

continue

607

for alert in alert_dict.values():

608

alert_type = alert.get('type')

609

if not alert_type:

610

continue

611

message = cls._get_text(alert, 'text')

612

if message:

613

yield alert_type, message

614

615

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

616

errors = []

617

warnings = []

618

for alert_type, alert_message in alerts:

619

if alert_type.lower() == 'error' and fatal:

620

errors.append([alert_type, alert_message])

621

else:

622

warnings.append([alert_type, alert_message])

623

624

for alert_type, alert_message in (warnings + errors[:-1]):

625

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)

626

if errors:

627

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

628

629

def _extract_and_report_alerts(self, data, *args, **kwargs):

630

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

631

632

def _extract_badges(self, renderer: dict):

633

badges = set()

634

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

635

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

636

if label:

637

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

642

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

647

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

648

obj = [obj]

649

for item in obj:

650

text = try_get(item, lambda x: x['simpleText'], compat_str)

651

if text:

652

return text

653

runs = try_get(item, lambda x: x['runs'], list) or []

654

if not runs and isinstance(item, list):

655

runs = item

656

657

runs = runs[:min(len(runs), max_runs or len(runs))]

658

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

663

count_text = self._get_text(data, *path_list) or ''

664

count = parse_count(count_text)

665

if count is None:

666

count = str_to_int(

667

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

672

"""

673

Extract thumbnails from thumbnails dict

674

@param path_list: path list to level that contains 'thumbnails' key

675

"""

676

thumbnails = []

677

for path in path_list or [()]:

678

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

679

thumbnail_url = url_or_none(thumbnail.get('url'))

680

if not thumbnail_url:

681

continue

682

# Sometimes youtube gives a wrong thumbnail URL. See:

683

# https://github.com/yt-dlp/yt-dlp/issues/233

684

# https://github.com/ytdl-org/youtube-dl/issues/28023

685

if 'maxresdefault' in thumbnail_url:

686

thumbnail_url = thumbnail_url.split('?')[0]

687

thumbnails.append({

688

'url': thumbnail_url,

689

'height': int_or_none(thumbnail.get('height')),

690

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

696

"""

697

Extracts a relative time from string and converts to dt object

698

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

703

if start:

704

return datetime_from_str(start)

705

try:

706

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

711

text = self._get_text(renderer, *path_list) or ''

712

dt = self.extract_relative_time(text)

713

timestamp = None

714

if isinstance(dt, datetime.datetime):

715

timestamp = calendar.timegm(dt.timetuple())

716

717

if timestamp is None:

718

timestamp = (

719

unified_timestamp(text) or unified_timestamp(

720

self._search_regex(

721

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

722

text.lower(), 'time text', default=None)))

723

724

if text and timestamp is None:

725

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

726

return timestamp, text

727

728

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

729

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

730

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

735

if check_get_keys is None:

736

check_get_keys = []

737

while count < retries:

738

count += 1

739

if last_error:

740

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

741

try:

742

response = self._call_api(

743

ep=ep, fatal=True, headers=headers,

744

video_id=item_id, query=query,

745

context=self._extract_context(ytcfg, default_client),

746

api_key=self._extract_api_key(ytcfg, default_client),

747

api_hostname=api_hostname, default_client=default_client,

748

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

749

except ExtractorError as e:

750

if isinstance(e.cause, network_exceptions):

751

if isinstance(e.cause, compat_HTTPError):

752

first_bytes = e.cause.read(512)

753

if not is_html(first_bytes):

754

yt_error = try_get(

755

self._parse_json(

756

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

757

lambda x: x['error']['message'], compat_str)

758

if yt_error:

759

self._report_alerts([('ERROR', yt_error)], fatal=False)

760

# Downloading page may result in intermittent 5xx HTTP error

761

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

762

# We also want to catch all other network exceptions since errors in later pages can be troublesome

763

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

764

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

765

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

777

except ExtractorError as e:

778

# YouTube servers may return errors we want to retry on in a 200 OK response

779

# See: https://github.com/yt-dlp/yt-dlp/issues/839

780

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

786

return

787

if not check_get_keys or dict_get(response, check_get_keys):

788

break

789

# Youtube sometimes sends incomplete data

790

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

791

last_error = 'Incomplete data received'

792

if count >= retries:

793

if fatal:

794

raise ExtractorError(last_error)

795

else:

796

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

802

return re.match(r'https?://music\.youtube\.com/', url) is not None

803

804

def _extract_video(self, renderer):

805

video_id = renderer.get('videoId')

806

title = self._get_text(renderer, 'title')

807

description = self._get_text(renderer, 'descriptionSnippet')

808

duration = parse_duration(self._get_text(

809

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

810

if duration is None:

811

duration = parse_duration(self._search_regex(

812

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

813

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

814

video_id, default=None, group='duration'))

815

816

view_count = self._get_count(renderer, 'viewCountText')

817

818

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

819

channel_id = traverse_obj(

820

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)

821

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

822

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

823

overlay_style = traverse_obj(

824

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)

825

badges = self._extract_badges(renderer)

826

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

827

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

828

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str))

829

url = f'https://www.youtube.com/watch?v={video_id}'

830

if overlay_style == 'SHORTS' or (navigation_url and '/shorts/' in navigation_url):

831

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

840

'duration': duration,

841

'view_count': view_count,

842

'uploader': uploader,

843

'channel_id': channel_id,

844

'thumbnails': thumbnails,

845

'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None,

846

'live_status': ('is_upcoming' if scheduled_timestamp is not None

847

else 'was_live' if 'streamed' in time_text.lower()

848

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

849

else None),

850

'release_timestamp': scheduled_timestamp,

851

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

856

IE_DESC = 'YouTube'

857

_VALID_URL = r"""(?x)^

858

(

859

(?:https?://|//) # http(s):// or protocol-independent URL

860

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

861

(?:www\.)?deturl\.com/www\.youtube\.com|

862

(?:www\.)?pwnyoutube\.com|

863

(?:www\.)?hooktube\.com|

864

(?:www\.)?yourepeat\.com|

865

tube\.majestyc\.net|

866

%(invidious)s|

867

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

868

(?:.*?\#/)? # handle anchor (#/) redirect urls

869

(?: # the various things that can precede the ID:

870

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

871

|(?: # or the v= param in all its forms

872

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

873

(?:\?|\#!?) # the params delimiter ? or # or #!

874

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

880

vid\.plus| # or vid.plus/xxxx

881

zwearz\.com/watch| # or zwearz.com/watch/xxxx

882

%(invidious)s

883

)/

884

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

885

)

886

)? # all until now is optional -> you can pass the naked ID

887

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

888

(?(1).+)? # if we found the ID, everything can follow

889

(?:\#|$)""" % {

890

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

891

}

892

_PLAYER_INFO_RE = (

893

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

894

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

895

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

896

)

897

_formats = {

898

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

899

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

900

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

901

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

902

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

903

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

904

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

905

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

906

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

907

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

908

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

909

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

910

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

911

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

912

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

913

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

914

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

915

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

920

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

921

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

922

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

923

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

924

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

925

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

926

927

# Apple HTTP Live Streaming

928

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

929

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

930

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

931

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

932

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

933

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

934

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

935

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

936

937

# DASH mp4 video

938

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

939

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

940

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

941

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

942

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

943

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

944

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

945

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

946

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

947

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

948

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

949

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

950

951

# Dash mp4 audio

952

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

953

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

954

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

955

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

956

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

957

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

958

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

959

960

# Dash webm

961

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

962

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

963

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

964

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

965

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

966

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

967

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

968

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

969

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

970

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

971

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

972

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

973

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

974

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

975

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

976

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

977

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

978

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

979

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

980

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

981

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

982

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

983

984

# Dash webm audio

985

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

986

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

987

988

# Dash webm audio with opus inside

989

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

990

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

991

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

992

993

# RTMP (unnamed)

994

'_rtmp': {'protocol': 'rtmp'},

995

996

# av01 video only formats sometimes served with "unknown" codecs

997

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

998

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

999

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1000

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1001

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1002

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1003

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1004

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1005

}

1006

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1018

'uploader': 'Philipp Hagemeister',

1019

'uploader_id': 'phihag',

1020

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1021

'channel': 'Philipp Hagemeister',

1022

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1023

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1024

'upload_date': '20121002',

1025

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1026

'categories': ['Science & Technology'],

1027

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1032

'playable_in_embed': True,

1033

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1034

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1043

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1048

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1049

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1050

'uploader': 'SET India',

1051

'uploader_id': 'setindia',

1052

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1053

'age_limit': 18,

1054

},

1055

'skip': 'Private video',

1056

},

1057

{

1058

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1059

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1064

'uploader': 'Philipp Hagemeister',

1065

'uploader_id': 'phihag',

1066

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1067

'channel': 'Philipp Hagemeister',

1068

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1069

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1070

'upload_date': '20121002',

1071

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1072

'categories': ['Science & Technology'],

1073

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1078

'playable_in_embed': True,

1079

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1080

'live_status': 'not_live',

1081

'age_limit': 0,

1082

'channel_follower_count': int

1083

},

1084

'params': {

1085

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1090

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1095

'uploader_id': '8KVIDEO',

1096

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1097

'description': '',

1098

'uploader': '8KVIDEO',

1099

'title': 'UHDTV TEST 8K VIDEO.mp4'

1100

},

1101

'params': {

1102

'youtube_include_dash_manifest': True,

1103

'format': '141',

1104

},

1105

'skip': 'format 141 not served anymore',

1106

},

1107

# DASH manifest with encrypted signature

1108

{

1109

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1114

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1115

'duration': 244,

1116

'uploader': 'AfrojackVEVO',

1117

'uploader_id': 'AfrojackVEVO',

1118

'upload_date': '20131011',

1119

'abr': 129.495,

1120

'like_count': int,

1121

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1122

'playable_in_embed': True,

1123

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1124

'view_count': int,

1125

'track': 'The Spark',

1126

'live_status': 'not_live',

1127

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1128

'channel': 'Afrojack',

1129

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1130

'tags': 'count:19',

1131

'availability': 'public',

1132

'categories': ['Music'],

1133

'age_limit': 0,

1134

'alt_title': 'The Spark',

1135

'channel_follower_count': int

1136

},

1137

'params': {

1138

'youtube_include_dash_manifest': True,

1139

'format': '141/bestaudio[ext=m4a]',

1140

},

1141

},

1142

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1143

{

1144

'note': 'Embed allowed age-gate video',

1145

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1150

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1151

'duration': 142,

1152

'uploader': 'The Witcher',

1153

'uploader_id': 'WitcherGame',

1154

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1155

'upload_date': '20140605',

1156

'age_limit': 18,

1157

'categories': ['Gaming'],

1158

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1159

'availability': 'needs_auth',

1160

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1161

'like_count': int,

1162

'channel': 'The Witcher',

1163

'live_status': 'not_live',

1164

'tags': 'count:17',

1165

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1166

'playable_in_embed': True,

1167

'view_count': int,

1168

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1173

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1178

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1179

'upload_date': '20200408',

1180

'uploader_id': 'FlyingKitty900',

1181

'uploader': 'FlyingKitty',

1182

'age_limit': 18,

1183

'availability': 'needs_auth',

1184

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1185

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1186

'channel': 'FlyingKitty',

1187

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1188

'view_count': int,

1189

'categories': ['Entertainment'],

1190

'live_status': 'not_live',

1191

'tags': ['Flyingkitty', 'godzilla 2'],

1192

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1193

'like_count': int,

1194

'duration': 177,

1195

'playable_in_embed': True,

1196

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1201

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1202

'info_dict': {

1203

'id': 'Tq92D6wQ1mg',

1204

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1205

'ext': 'mp4',

1206

'upload_date': '20191228',

1207

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1208

'uploader': 'Projekt Melody',

1209

'description': 'md5:17eccca93a786d51bc67646756894066',

1210

'age_limit': 18,

1211

'like_count': int,

1212

'availability': 'needs_auth',

1213

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1214

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1215

'view_count': int,

1216

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1217

'channel': 'Projekt Melody',

1218

'live_status': 'not_live',

1219

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1220

'playable_in_embed': True,

1221

'categories': ['Entertainment'],

1222

'duration': 106,

1223

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1224

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1229

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1234

'uploader': 'Herr Lurik',

1235

'uploader_id': 'st3in234',

1236

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1237

'upload_date': '20130730',

1238

'track': 'Such mich find mich',

1239

'age_limit': 0,

1240

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1241

'like_count': int,

1242

'playable_in_embed': False,

1243

'creator': 'OOMPH!',

1244

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1245

'view_count': int,

1246

'alt_title': 'Such mich find mich',

1247

'duration': 210,

1248

'channel': 'Herr Lurik',

1249

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1250

'categories': ['Music'],

1251

'availability': 'public',

1252

'uploader_url': 'http://www.youtube.com/user/st3in234',

1253

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1254

'live_status': 'not_live',

1255

'artist': 'OOMPH!',

1256

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1261

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1262

'only_matching': True,

1263

},

1264

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1265

# YouTube Red ad is not captured for creator

1266

{

1267

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1273

'uploader_id': 'deadmau5',

1274

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1275

'creator': 'deadmau5',

1276

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1277

'uploader': 'deadmau5',

1278

'title': 'Deadmau5 - Some Chords (HD)',

1279

'alt_title': 'Some Chords',

1280

'availability': 'public',

1281

'tags': 'count:14',

1282

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1283

'view_count': int,

1284

'live_status': 'not_live',

1285

'channel': 'deadmau5',

1286

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1287

'like_count': int,

1288

'track': 'Some Chords',

1289

'artist': 'deadmau5',

1290

'playable_in_embed': True,

1291

'age_limit': 0,

1292

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1293

'categories': ['Music'],

1294

'album': 'Some Chords',

1295

'channel_follower_count': int

1296

},

1297

'expected_warnings': [

1298

'DASH manifest missing',

1299

]

1300

},

1301

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1302

{

1303

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1309

'uploader_id': 'olympic',

1310

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1311

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1312

'uploader': 'Olympics',

1313

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1314

'like_count': int,

1315

'release_timestamp': 1343767800,

1316

'playable_in_embed': True,

1317

'categories': ['Sports'],

1318

'release_date': '20120731',

1319

'channel': 'Olympics',

1320

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1321

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1322

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1323

'age_limit': 0,

1324

'availability': 'public',

1325

'live_status': 'was_live',

1326

'view_count': int,

1327

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1328

'channel_follower_count': int

1329

},

1330

'params': {

1331

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1341

'duration': 85,

1342

'upload_date': '20110310',

1343

'uploader_id': 'AllenMeow',

1344

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1345

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1346

'uploader': '孫ᄋᄅ',

1347

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1348

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1353

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1354

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1355

'view_count': int,

1356

'categories': ['People & Blogs'],

1357

'like_count': int,

1358

'live_status': 'not_live',

1359

'availability': 'unlisted',

1360

'channel_follower_count': int

1361

},

1362

},

1363

# url_encoded_fmt_stream_map is empty string

1364

{

1365

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1370

'description': '',

1371

'upload_date': '20150404',

1372

'uploader_id': 'spbelect',

1373

'uploader': 'Наблюдатели Петербурга',

1374

},

1375

'params': {

1376

'skip_download': 'requires avconv',

1377

},

1378

'skip': 'This live event has ended.',

1379

},

1380

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1381

{

1382

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1387

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1388

'duration': 220,

1389

'upload_date': '20150625',

1390

'uploader_id': 'dorappi2000',

1391

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1392

'uploader': 'dorappi2000',

1393

'formats': 'mincount:31',

1394

},

1395

'skip': 'not actual anymore',

1396

},

1397

# DASH manifest with segment_list

1398

{

1399

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1400

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1405

'uploader': 'Airtek',

1406

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1407

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1408

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1409

},

1410

'params': {

1411

'youtube_include_dash_manifest': True,

1412

'format': '135', # bestvideo

1413

},

1414

'skip': 'This live event has ended.',

1415

},

1416

{

1417

# Multifeed videos (multiple cameras), URL is for Main Camera

1418

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1419

'info_dict': {

1420

'id': 'jvGDaLqkpTg',

1421

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1422

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1429

'description': 'md5:e03b909557865076822aa169218d6a5d',

1430

'duration': 10643,

1431

'upload_date': '20161111',

1432

'uploader': 'Team PGP',

1433

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1434

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1441

'description': 'md5:e03b909557865076822aa169218d6a5d',

1442

'duration': 10991,

1443

'upload_date': '20161111',

1444

'uploader': 'Team PGP',

1445

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1446

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1453

'description': 'md5:e03b909557865076822aa169218d6a5d',

1454

'duration': 10995,

1455

'upload_date': '20161111',

1456

'uploader': 'Team PGP',

1457

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1458

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1465

'description': 'md5:e03b909557865076822aa169218d6a5d',

1466

'duration': 10990,

1467

'upload_date': '20161111',

1468

'uploader': 'Team PGP',

1469

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1470

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1475

},

1476

'skip': 'Not multifeed anymore',

1477

},

1478

{

1479

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1480

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1481

'info_dict': {

1482

'id': 'gVfLd0zydlo',

1483

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1484

},

1485

'playlist_count': 2,

1486

'skip': 'Not multifeed anymore',

1487

},

1488

{

1489

'url': 'https://vid.plus/FlRa-iH7PGw',

1490

'only_matching': True,

1491

},

1492

{

1493

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1494

'only_matching': True,

1495

},

1496

{

1497

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1498

# Also tests cut-off URL expansion in video description (see

1499

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1500

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1501

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1506

'alt_title': 'Dark Walk',

1507

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1508

'duration': 133,

1509

'upload_date': '20151119',

1510

'uploader_id': 'IronSoulElf',

1511

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1512

'uploader': 'IronSoulElf',

1513

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1514

'track': 'Dark Walk',

1515

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1516

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1517

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1518

'categories': ['Film & Animation'],

1519

'view_count': int,

1520

'live_status': 'not_live',

1521

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1522

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1523

'tags': 'count:13',

1524

'availability': 'public',

1525

'channel': 'IronSoulElf',

1526

'playable_in_embed': True,

1527

'like_count': int,

1528

'age_limit': 0,

1529

'channel_follower_count': int

1530

},

1531

'params': {

1532

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1537

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1538

'only_matching': True,

1539

},

1540

{

1541

# Video with yt:stretch=17:0

1542

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1547

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1548

'upload_date': '20151107',

1549

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1550

'uploader': 'CH GAMER DROID',

1551

},

1552

'params': {

1553

'skip_download': True,

1554

},

1555

'skip': 'This video does not exist.',

1556

},

1557

{

1558

# Video with incomplete 'yt:stretch=16:'

1559

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1560

'only_matching': True,

1561

},

1562

{

1563

# Video licensed under Creative Commons

1564

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1569

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1570

'duration': 721,

1571

'upload_date': '20150128',

1572

'uploader_id': 'BerkmanCenter',

1573

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1574

'uploader': 'The Berkman Klein Center for Internet & Society',

1575

'license': 'Creative Commons Attribution license (reuse allowed)',

1576

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1577

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1578

'like_count': int,

1579

'age_limit': 0,

1580

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1581

'channel': 'The Berkman Klein Center for Internet & Society',

1582

'availability': 'public',

1583

'view_count': int,

1584

'categories': ['Education'],

1585

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1586

'live_status': 'not_live',

1587

'playable_in_embed': True,

1588

'channel_follower_count': int

1589

},

1590

'params': {

1591

'skip_download': True,

},

},

{

# Channel-like uploader_url

1596

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1601

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1602

'duration': 4060,

1603

'upload_date': '20151120',

1604

'uploader': 'Bernie Sanders',

1605

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1606

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1607

'license': 'Creative Commons Attribution license (reuse allowed)',

1608

'playable_in_embed': True,

1609

'tags': 'count:12',

1610

'like_count': int,

1611

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1612

'age_limit': 0,

1613

'availability': 'public',

1614

'categories': ['News & Politics'],

1615

'channel': 'Bernie Sanders',

1616

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1617

'view_count': int,

1618

'live_status': 'not_live',

1619

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1620

'channel_follower_count': int

1621

},

1622

'params': {

1623

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1628

'only_matching': True,

1629

},

1630

{

1631

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1632

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1633

'only_matching': True,

1634

},

1635

{

1636

# Rental video preview

1637

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1642

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1643

'upload_date': '20150811',

1644

'uploader': 'FlixMatrix',

1645

'uploader_id': 'FlixMatrixKaravan',

1646

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1647

'license': 'Standard YouTube License',

1648

},

1649

'params': {

1650

'skip_download': True,

1651

},

1652

'skip': 'This video is not available.',

1653

},

1654

{

1655

# YouTube Red video with episode data

1656

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1661

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1662

'duration': 2085,

1663

'upload_date': '20170118',

1664

'uploader': 'Vsauce',

1665

'uploader_id': 'Vsauce',

1666

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1667

'series': 'Mind Field',

1668

'season_number': 1,

1669

'episode_number': 1,

1670

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1671

'tags': 'count:12',

1672

'view_count': int,

1673

'availability': 'public',

1674

'age_limit': 0,

1675

'channel': 'Vsauce',

1676

'episode': 'Episode 1',

1677

'categories': ['Entertainment'],

1678

'season': 'Season 1',

1679

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1680

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1681

'like_count': int,

1682

'playable_in_embed': True,

1683

'live_status': 'not_live',

1684

'channel_follower_count': int

1685

},

1686

'params': {

1687

'skip_download': True,

1688

},

1689

'expected_warnings': [

1690

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1695

# as inappropriate or offensive to some audiences.

1696

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1701

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1702

'duration': 965,

1703

'upload_date': '20140124',

1704

'uploader': 'New Century Foundation',

1705

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1706

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1707

},

1708

'params': {

1709

'skip_download': True,

1710

},

1711

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1716

'only_matching': True,

1717

},

1718

{

1719

# geo restricted to JP

1720

'url': 'sJL6WA-aGkQ',

1721

'only_matching': True,

1722

},

1723

{

1724

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1725

'only_matching': True,

1726

},

1727

{

1728

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1729

'only_matching': True,

1730

},

1731

{

1732

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1733

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1734

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1739

'only_matching': True,

1740

},

1741

{

1742

# Video with unsupported adaptive stream type formats

1743

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1748

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1749

'duration': 433,

1750

'upload_date': '20130923',

1751

'uploader': 'Amelia Putri Harwita',

1752

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1753

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1754

'formats': 'maxcount:10',

1755

},

1756

'params': {

1757

'skip_download': True,

1758

'youtube_include_dash_manifest': False,

1759

},

1760

'skip': 'not actual anymore',

1761

},

1762

{

1763

# Youtube Music Auto-generated description

1764

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1769

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1770

'upload_date': '20190312',

1771

'uploader': 'Stephen - Topic',

1772

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1773

'artist': 'Stephen',

1774

'track': 'Voyeur Girl',

1775

'album': 'it\'s too much love to know my dear',

1776

'release_date': '20190313',

1777

'release_year': 2019,

1778

'alt_title': 'Voyeur Girl',

1779

'view_count': int,

1780

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1781

'playable_in_embed': True,

1782

'like_count': int,

1783

'categories': ['Music'],

1784

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1785

'channel': 'Stephen',

1786

'availability': 'public',

1787

'creator': 'Stephen',

1788

'duration': 169,

1789

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1790

'age_limit': 0,

1791

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1792

'tags': 'count:11',

1793

'live_status': 'not_live',

1794

'channel_follower_count': int

1795

},

1796

'params': {

1797

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1802

'only_matching': True,

1803

},

1804

{

1805

# invalid -> valid video id redirection

1806

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1811

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1812

'upload_date': '20090125',

1813

'uploader': 'Prochorowka',

1814

'uploader_id': 'Prochorowka',

1815

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1816

'artist': 'Panjabi MC',

1817

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1818

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1819

},

1820

'params': {

1821

'skip_download': True,

1822

},

1823

'skip': 'Video unavailable',

1824

},

1825

{

1826

# empty description results in an empty string

1827

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1834

'uploader_id': 'ElevageOrVert',

1835

'uploader': 'ElevageOrVert',

1836

'view_count': int,

1837

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1838

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1839

'like_count': int,

1840

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1841

'tags': [],

1842

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1843

'availability': 'public',

1844

'age_limit': 0,

1845

'categories': ['Pets & Animals'],

1846

'duration': 7,

1847

'playable_in_embed': True,

1848

'live_status': 'not_live',

1849

'channel': 'ElevageOrVert',

1850

'channel_follower_count': int

1851

},

1852

'params': {

1853

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1858

# see [2] for an example with '};' inside ytInitialPlayerResponse

1859

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1860

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1861

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1866

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1867

'upload_date': '20130831',

1868

'uploader_id': 'kudvenkat',

1869

'uploader': 'kudvenkat',

1870

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1871

'like_count': int,

1872

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1873

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1874

'live_status': 'not_live',

1875

'categories': ['Education'],

1876

'availability': 'public',

1877

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1878

'tags': 'count:12',

1879

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1884

'channel_follower_count': int

1885

},

1886

'params': {

1887

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1892

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1893

'only_matching': True,

1894

},

1895

{

1896

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1897

'only_matching': True,

1898

},

1899

{

1900

# https://github.com/ytdl-org/youtube-dl/pull/28094

1901

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1907

'upload_date': '20141120',

1908

'uploader': 'The Cinematic Orchestra - Topic',

1909

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1910

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1911

'artist': 'The Cinematic Orchestra',

1912

'track': 'Burn Out',

1913

'album': 'Every Day',

1914

'like_count': int,

1915

'live_status': 'not_live',

1916

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1921

'creator': 'The Cinematic Orchestra',

1922

'channel': 'The Cinematic Orchestra',

1923

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1924

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1925

'availability': 'public',

1926

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1927

'categories': ['Music'],

1928

'playable_in_embed': True,

1929

'channel_follower_count': int

1930

},

1931

'params': {

1932

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1937

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1938

'only_matching': True,

1939

},

1940

{

1941

# controversial video, requires bpctr/contentCheckOk

1942

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1947

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1948

'uploader': 'CBS Mornings',

1949

'uploader_id': 'CBSThisMorning',

1950

'upload_date': '20140716',

1951

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1952

'duration': 170,

1953

'categories': ['News & Politics'],

1954

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1955

'view_count': int,

1956

'channel': 'CBS Mornings',

1957

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1958

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1959

'age_limit': 18,

1960

'availability': 'needs_auth',

1961

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1962

'like_count': int,

1963

'live_status': 'not_live',

1964

'playable_in_embed': True,

1965

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1970

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1975

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1976

'upload_date': '20201120',

1977

'uploader': 'Walk around Japan',

1978

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1979

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1980

'duration': 1456,

1981

'categories': ['Travel & Events'],

1982

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1983

'view_count': int,

1984

'channel': 'Walk around Japan',

1985

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

1986

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

1987

'age_limit': 0,

1988

'availability': 'public',

1989

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1990

'live_status': 'not_live',

1991

'playable_in_embed': True,

1992

'channel_follower_count': int

1993

},

1994

'params': {

1995

'skip_download': True,

1996

},

1997

}, {

1998

# Has multiple audio streams

1999

'url': 'WaOKSUlf4TM',

2000

'only_matching': True

2001

}, {

2002

# Requires Premium: has format 141 when requested using YTM url

2003

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2004

'only_matching': True

2005

}, {

2006

# multiple subtitles with same lang_code

2007

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2008

'only_matching': True,

2009

}, {

2010

# Force use android client fallback

2011

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2012

'info_dict': {

2013

'id': 'YOelRv7fMxY',

2014

'title': 'DIGGING A SECRET TUNNEL Part 1',

2015

'ext': '3gp',

2016

'upload_date': '20210624',

2017

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2018

'uploader': 'colinfurze',

2019

'uploader_id': 'colinfurze',

2020

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2021

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2022

'duration': 596,

2023

'categories': ['Entertainment'],

2024

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2025

'view_count': int,

2026

'channel': 'colinfurze',

2027

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2028

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2029

'age_limit': 0,

2030

'availability': 'public',

2031

'like_count': int,

2032

'live_status': 'not_live',

2033

'playable_in_embed': True,

2034

'channel_follower_count': int

2035

},

2036

'params': {

2037

'format': '17', # 3gp format available on android

2038

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2043

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2044

'only_matching': True,

2045

'params': {

2046

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2051

'only_matching': True,

2052

}, {

2053

'note': 'Storyboards',

2054

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2060

'uploader_id': 'scishow',

2061

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2062

'upload_date': '20140324',

2063

'uploader': 'SciShow',

2064

'like_count': int,

2065

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2066

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2067

'view_count': int,

2068

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2069

'playable_in_embed': True,

2070

'tags': 'count:12',

2071

'uploader_url': 'http://www.youtube.com/user/scishow',

2072

'availability': 'public',

2073

'channel': 'SciShow',

2074

'live_status': 'not_live',

2075

'duration': 248,

2076

'categories': ['Education'],

2077

'age_limit': 0,

2078

'channel_follower_count': int

2079

}, 'params': {'format': 'mhtml', 'skip_download': True}

2080

}, {

2081

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2082

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2087

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2088

'uploader': 'Leon Nguyen',

2089

'uploader_id': 'VNSXIII',

2090

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2091

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2092

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2097

'tags': 'count:23',

2098

'playable_in_embed': True,

2099

'live_status': 'not_live',

2100

'upload_date': '20220103',

2101

'like_count': int,

2102

'availability': 'public',

2103

'channel': 'Leon Nguyen',

2104

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2105

'channel_follower_count': int

2106

}

2107

}, {

2108

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2109

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2114

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2115

'uploader': 'Quackity',

2116

'uploader_id': 'QuackityHQ',

2117

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2118

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2119

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2124

'tags': 'count:26',

2125

'playable_in_embed': True,

2126

'live_status': 'not_live',

2127

'release_timestamp': 1641172509,

2128

'release_date': '20220103',

2129

'upload_date': '20220103',

2130

'like_count': int,

2131

'availability': 'public',

2132

'channel': 'Quackity',

2133

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2134

'channel_follower_count': int

2135

}

2136

},

2137

{ # continuous livestream. Microformat upload date should be preferred.

2138

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2139

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2140

'info_dict': {

2141

'id': 'kgx4WGK0oNU',

2142

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2143

'ext': 'mp4',

2144

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2145

'availability': 'public',

2146

'age_limit': 0,

2147

'release_timestamp': 1637975704,

2148

'upload_date': '20210619',

2149

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2150

'live_status': 'is_live',

2151

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2152

'uploader': '阿鲍Abao',

2153

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2154

'channel': 'Abao in Tokyo',

2155

'channel_follower_count': int,

2156

'release_date': '20211127',

2157

'tags': 'count:39',

2158

'categories': ['People & Blogs'],

2159

'like_count': int,

2160

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2161

'view_count': int,

2162

'playable_in_embed': True,

2163

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2164

},

2165

'params': {'skip_download': True}

},

]

@classmethod

def suitable(cls, url):

2171

from ..utils import parse_qs

2172

2173

qs = parse_qs(url)

2174

if qs.get('list', [None])[0]:

2175

return False

2176

return super(YoutubeIE, cls).suitable(url)

2177

2178

def __init__(self, *args, **kwargs):

2179

super(YoutubeIE, self).__init__(*args, **kwargs)

2180

self._code_cache = {}

2181

self._player_cache = {}

2182

2183

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2184

lock = threading.Lock()

2185

2186

is_live = True

2187

start_time = time.time()

2188

formats = [f for f in formats if f.get('is_from_start')]

2189

2190

def refetch_manifest(format_id, delay):

2191

nonlocal formats, start_time, is_live

2192

if time.time() <= start_time + delay:

2193

return

2194

2195

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2196

video_details = traverse_obj(

2197

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2198

microformats = traverse_obj(

2199

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2200

expected_type=dict, default=[])

2201

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2202

start_time = time.time()

2203

2204

def mpd_feed(format_id, delay):

2205

"""

2206

@returns (manifest_url, manifest_stream_number, is_live) or None

2207

"""

2208

with lock:

2209

refetch_manifest(format_id, delay)

2210

2211

f = next((f for f in formats if f['format_id'] == format_id), None)

2212

if not f:

2213

if not is_live:

2214

self.to_screen(f'{video_id}: Video is no longer live')

2215

else:

2216

self.report_warning(

2217

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2218

return None

2219

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2224

f['fragments'] = functools.partial(

2225

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2226

2227

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2228

FETCH_SPAN, MAX_DURATION = 5, 432000

2229

2230

mpd_url, stream_number, is_live = None, None, True

2231

2232

begin_index = 0

2233

download_start_time = ctx.get('start') or time.time()

2234

2235

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2236

if lack_early_segments:

2237

self.report_warning(bug_reports_message(

2238

'Starting download from the last 120 hours of the live stream since '

2239

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2240

lack_early_segments = True

2241

2242

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2243

fragments, fragment_base_url = None, None

2244

2245

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2246

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2247

# Obtain from MPD's maximum seq value

2248

old_mpd_url = mpd_url

2249

last_error = ctx.pop('last_error', None)

2250

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2251

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2252

or (mpd_url, stream_number, False))

2253

if not refresh_sequence:

2254

if expire_fast and not is_live:

2255

return False, last_seq

2256

elif old_mpd_url == mpd_url:

2257

return True, last_seq

2258

try:

2259

fmts, _ = self._extract_mpd_formats_and_subtitles(

2260

mpd_url, None, note=False, errnote=False, fatal=False)

2261

except ExtractorError:

2262

fmts = None

2263

if not fmts:

2264

no_fragment_score += 2

2265

return False, last_seq

2266

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2267

fragments = fmt_info['fragments']

2268

fragment_base_url = fmt_info['fragment_base_url']

2269

assert fragment_base_url

2270

2271

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2272

return True, _last_seq

2273

2274

while is_live:

2275

fetch_time = time.time()

2276

if no_fragment_score > 30:

2277

return

2278

if last_segment_url:

2279

# Obtain from "X-Head-Seqnum" header value from each segment

2280

try:

2281

urlh = self._request_webpage(

2282

last_segment_url, None, note=False, errnote=False, fatal=False)

2283

except ExtractorError:

2284

urlh = None

2285

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2286

if last_seq is None:

2287

no_fragment_score += 2

2288

last_segment_url = None

2289

continue

2290

else:

2291

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2292

no_fragment_score += 2

2293

if not should_continue:

2294

continue

2295

2296

if known_idx > last_seq:

2297

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2303

# skip from the start when it's negative value

2304

known_idx = last_seq + begin_index

2305

if lack_early_segments:

2306

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2307

try:

2308

for idx in range(known_idx, last_seq):

2309

# do not update sequence here or you'll get skipped some part of it

2310

should_continue, _ = _extract_sequence_from_mpd(False, False)

2311

if not should_continue:

2312

known_idx = idx - 1

2313

raise ExtractorError('breaking out of outer loop')

2314

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2315

yield {

2316

'url': last_segment_url,

2317

}

2318

if known_idx == last_seq:

2319

no_fragment_score += 5

2320

else:

2321

no_fragment_score = 0

2322

known_idx = last_seq

2323

except ExtractorError:

2324

continue

2325

2326

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2327

2328

def _extract_player_url(self, *ytcfgs, webpage=None):

2329

player_url = traverse_obj(

2330

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2331

get_all=False, expected_type=compat_str)

2332

if not player_url:

2333

return

2334

return urljoin('https://www.youtube.com', player_url)

2335

2336

def _download_player_url(self, video_id, fatal=False):

2337

res = self._download_webpage(

2338

'https://www.youtube.com/iframe_api',

2339

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2340

if res:

2341

player_version = self._search_regex(

2342

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2343

if player_version:

2344

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2345

2346

def _signature_cache_id(self, example_sig):

2347

""" Return a string representation of a signature """

2348

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2349

2350

@classmethod

2351

def _extract_player_info(cls, player_url):

2352

for player_re in cls._PLAYER_INFO_RE:

2353

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2358

return id_m.group('id')

2359

2360

def _load_player(self, video_id, player_url, fatal=True):

2361

player_id = self._extract_player_info(player_url)

2362

if player_id not in self._code_cache:

2363

code = self._download_webpage(

2364

player_url, video_id, fatal=fatal,

2365

note='Downloading player ' + player_id,

2366

errnote='Download of %s failed' % player_url)

2367

if code:

2368

self._code_cache[player_id] = code

2369

return self._code_cache.get(player_id)

2370

2371

def _extract_signature_function(self, video_id, player_url, example_sig):

2372

player_id = self._extract_player_info(player_url)

2373

2374

# Read from filesystem cache

2375

func_id = 'js_%s_%s' % (

2376

player_id, self._signature_cache_id(example_sig))

2377

assert os.path.basename(func_id) == func_id

2378

2379

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2380

if cache_spec is not None:

2381

return lambda s: ''.join(s[i] for i in cache_spec)

2382

2383

code = self._load_player(video_id, player_url)

2384

if code:

2385

res = self._parse_sig_js(code)

2386

2387

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2388

cache_res = res(test_string)

2389

cache_spec = [ord(c) for c in cache_res]

2390

2391

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2392

return res

2393

2394

def _print_sig_code(self, func, example_sig):

2395

if not self.get_param('youtube_print_sig_code'):

2396

return

2397

2398

def gen_sig_code(idxs):

2399

def _genslice(start, end, step):

2400

starts = '' if start == 0 else str(start)

2401

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2402

steps = '' if step == 1 else (':%d' % step)

2403

return 's[%s%s%s]' % (starts, ends, steps)

2404

2405

step = None

2406

# Quelch pyflakes warnings - start will be set when step is set

2407

start = '(Never used)'

2408

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2413

step = None

2414

continue

2415

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2425

2426

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2427

cache_res = func(test_string)

2428

cache_spec = [ord(c) for c in cache_res]

2429

expr_code = ' + '.join(gen_sig_code(cache_spec))

2430

signature_id_tuple = '(%s)' % (

2431

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2432

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2433

' return %s\n') % (signature_id_tuple, expr_code)

2434

self.to_screen('Extracted signature function:\n' + code)

2435

2436

def _parse_sig_js(self, jscode):

2437

funcname = self._search_regex(

2438

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2439

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2440

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2441

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2442

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2443

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2444

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2445

# Obsolete patterns

2446

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2447

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2448

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2449

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2450

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2451

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2452

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2453

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2454

jscode, 'Initial JS player signature function name', group='sig')

2455

2456

jsi = JSInterpreter(jscode)

2457

initial_function = jsi.extract_function(funcname)

2458

return lambda s: initial_function([s])

2459

2460

def _decrypt_signature(self, s, video_id, player_url):

2461

"""Turn the encrypted s field into a working signature"""

2462

2463

if player_url is None:

2464

raise ExtractorError('Cannot decrypt signature without player_url')

2465

2466

try:

2467

player_id = (player_url, self._signature_cache_id(s))

2468

if player_id not in self._player_cache:

2469

func = self._extract_signature_function(

2470

video_id, player_url, s

2471

)

2472

self._player_cache[player_id] = func

2473

func = self._player_cache[player_id]

2474

self._print_sig_code(func, s)

2475

return func(s)

2476

except Exception as e:

2477

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2478

2479

def _decrypt_nsig(self, s, video_id, player_url):

2480

"""Turn the encrypted n field into a working signature"""

2481

if player_url is None:

2482

raise ExtractorError('Cannot decrypt nsig without player_url')

2483

player_url = urljoin('https://www.youtube.com', player_url)

2484

2485

sig_id = ('nsig_value', s)

2486

if sig_id in self._player_cache:

2487

return self._player_cache[sig_id]

2488

2489

try:

2490

player_id = ('nsig', player_url)

2491

if player_id not in self._player_cache:

2492

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2493

func = self._player_cache[player_id]

2494

self._player_cache[sig_id] = func(s)

2495

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2496

return self._player_cache[sig_id]

2497

except Exception as e:

2498

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2499

2500

def _extract_n_function_name(self, jscode):

2501

nfunc, idx = self._search_regex(

2502

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2503

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2504

if not idx:

2505

return nfunc

2506

return json.loads(js_to_json(self._search_regex(

2507

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2508

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2509

2510

def _extract_n_function(self, video_id, player_url):

2511

player_id = self._extract_player_info(player_url)

2512

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2513

2514

if func_code:

2515

jsi = JSInterpreter(func_code)

2516

else:

2517

jscode = self._load_player(video_id, player_url)

2518

funcname = self._extract_n_function_name(jscode)

2519

jsi = JSInterpreter(jscode)

2520

func_code = jsi.extract_function_code(funcname)

2521

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2522

2523

if self.get_param('youtube_print_sig_code'):

2524

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2525

2526

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2527

2528

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2529

"""

2530

Extract signatureTimestamp (sts)

2531

Required to tell API what sig/player version is in use.

2532

"""

2533

sts = None

2534

if isinstance(ytcfg, dict):

2535

sts = int_or_none(ytcfg.get('STS'))

2536

2537

if not sts:

2538

# Attempt to extract from player

2539

if player_url is None:

2540

error_msg = 'Cannot extract signature timestamp without player_url.'

2541

if fatal:

2542

raise ExtractorError(error_msg)

2543

self.report_warning(error_msg)

2544

return

2545

code = self._load_player(video_id, player_url, fatal=fatal)

2546

if code:

2547

sts = int_or_none(self._search_regex(

2548

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2549

'JS player signature timestamp', group='sts', fatal=fatal))

2550

return sts

2551

2552

def _mark_watched(self, video_id, player_responses):

2553

playback_url = get_first(

2554

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2555

expected_type=url_or_none)

2556

if not playback_url:

2557

self.report_warning('Unable to mark watched')

2558

return

2559

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2560

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2561

2562

# cpn generation algorithm is reverse engineered from base.js.

2563

# In fact it works even with dummy cpn.

2564

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2565

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2572

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2573

2574

self._download_webpage(

2575

playback_url, video_id, 'Marking watched',

2576

'Unable to mark watched', fatal=False)

2577

2578

@staticmethod

2579

def _extract_urls(webpage):

2580

# Embedded YouTube player

2581

entries = [

2582

unescapeHTML(mobj.group('url'))

2583

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2594

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2595

\1''', webpage)]

2596

2597

# lazyYT YouTube embed

2598

entries.extend(list(map(

2599

unescapeHTML,

2600

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2601

2602

# Wordpress "YouTube Video Importer" plugin

2603

matches = re.findall(r'''(?x)<div[^>]+

2604

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2605

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2606

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2612

urls = YoutubeIE._extract_urls(webpage)

2613

return urls[0] if urls else None

2614

2615

@classmethod

2616

def extract_id(cls, url):

2617

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2618

if mobj is None:

2619

raise ExtractorError('Invalid URL: %s' % url)

2620

return mobj.group('id')

2621

2622

def _extract_chapters_from_json(self, data, duration):

2623

chapter_list = traverse_obj(

2624

data, (

2625

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2626

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2627

), expected_type=list)

2628

2629

return self._extract_chapters(

2630

chapter_list,

2631

chapter_time=lambda chapter: float_or_none(

2632

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2633

chapter_title=lambda chapter: traverse_obj(

2634

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2635

duration=duration)

2636

2637

def _extract_chapters_from_engagement_panel(self, data, duration):

2638

content_list = traverse_obj(

2639

data,

2640

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2641

expected_type=list, default=[])

2642

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2643

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2648

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2649

chapter_time, chapter_title, duration)

2650

for contents in content_list

2651

))), [])

2652

2653

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2654

chapters = []

2655

last_chapter = {'start_time': 0}

2656

for idx, chapter in enumerate(chapter_list or []):

2657

title = chapter_title(chapter)

2658

start_time = chapter_time(chapter)

2659

if start_time is None:

2660

continue

2661

last_chapter['end_time'] = start_time

2662

if start_time < last_chapter['start_time']:

2663

if idx == 1:

2664

chapters.pop()

2665

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2666

else:

2667

self.report_warning(f'Invalid start time for chapter "{title}"')

2668

continue

2669

last_chapter = {'start_time': start_time, 'title': title}

2670

chapters.append(last_chapter)

2671

last_chapter['end_time'] = duration

2672

return chapters

2673

2674

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2675

return self._parse_json(self._search_regex(

2676

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

2677

regex), webpage, name, default='{}'), video_id, fatal=False)

2678

2679

def _extract_comment(self, comment_renderer, parent=None):

2680

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2685

2686

# note: timestamp is an estimate calculated from the current time and time_text

2687

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2688

author = self._get_text(comment_renderer, 'authorText')

2689

author_id = try_get(comment_renderer,

2690

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2691

2692

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2693

lambda x: x['likeCount']), compat_str)) or 0

2694

author_thumbnail = try_get(comment_renderer,

2695

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2696

2697

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2698

is_favorited = 'creatorHeart' in (try_get(

2699

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2704

'time_text': time_text,

2705

'like_count': votes,

2706

'is_favorited': is_favorited,

2707

'author': author,

2708

'author_id': author_id,

2709

'author_thumbnail': author_thumbnail,

2710

'author_is_uploader': author_is_uploader,

2711

'parent': parent or 'root'

2712

}

2713

2714

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2715

2716

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2717

2718

def extract_header(contents):

2719

_continuation = None

2720

for content in contents:

2721

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2722

expected_comment_count = self._get_count(

2723

comments_header_renderer, 'countText', 'commentsCount')

2724

2725

if expected_comment_count:

2726

tracker['est_total'] = expected_comment_count

2727

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2728

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2729

2730

sort_menu_item = try_get(

2731

comments_header_renderer,

2732

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2733

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2734

2735

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2736

if not _continuation:

2737

continue

2738

2739

sort_text = str_or_none(sort_menu_item.get('title'))

2740

if not sort_text:

2741

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2742

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2747

if not parent:

2748

tracker['current_page_thread'] = 0

2749

for content in contents:

2750

if not parent and tracker['total_parent_comments'] >= max_parents:

2751

yield

2752

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2753

comment_renderer = get_first(

2754

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2755

expected_type=dict, default={})

2756

2757

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2762

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2763

yield comment

2764

2765

# Attempt to get the replies

2766

comment_replies_renderer = try_get(

2767

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2768

2769

if comment_replies_renderer:

2770

tracker['current_page_thread'] += 1

2771

comment_entries_iter = self._comment_entries(

2772

comment_replies_renderer, ytcfg, video_id,

2773

parent=comment.get('id'), tracker=tracker)

2774

for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):

2775

yield reply_comment

2776

2777

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2783

total_parent_comments=0,

2784

total_reply_comments=0)

2785

2786

# TODO: Deprecated

2787

# YouTube comments have a max depth of 2

2788

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2789

if max_depth:

2790

self._downloader.deprecation_warning(

2791

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2792

if max_depth == 1 and parent:

2793

return

2794

2795

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2796

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2797

2798

continuation = self._extract_continuation(root_continuation_data)

2799

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2800

if message and not parent:

2801

self.report_warning(message, video_id=video_id)

2802

2803

response = None

2804

is_first_continuation = parent is None

2805

2806

for page_num in itertools.count(0):

2807

if not continuation:

2808

break

2809

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2810

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2811

if page_num == 0:

2812

if is_first_continuation:

2813

note_prefix = 'Downloading comment section API JSON'

2814

else:

2815

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2816

tracker['current_page_thread'], comment_prog_str)

2817

else:

2818

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2819

' ' if parent else '', ' replies' if parent else '',

2820

page_num, comment_prog_str)

2821

2822

response = self._extract_response(

2823

item_id=None, query=continuation,

2824

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2825

check_get_keys='onResponseReceivedEndpoints')

2826

2827

continuation_contents = traverse_obj(

2828

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2829

2830

continuation = None

2831

for continuation_section in continuation_contents:

2832

continuation_items = traverse_obj(

2833

continuation_section,

2834

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2835

get_all=False, expected_type=list) or []

2836

if is_first_continuation:

2837

continuation = extract_header(continuation_items)

2838

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

def _get_comments(self, ytcfg, video_id, contents, webpage):

2852

"""Entry for comment extraction"""

2853

def _real_comment_extract(contents):

2854

renderer = next((

2855

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2856

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2857

yield from self._comment_entries(renderer, ytcfg, video_id)

2858

2859

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2860

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2861

2862

@staticmethod

2863

def _get_checkok_params():

2864

return {'contentCheckOk': True, 'racyCheckOk': True}

2865

2866

@classmethod

2867

def _generate_player_context(cls, sts=None):

2868

context = {

2869

'html5Preference': 'HTML5_PREF_WANTS',

2870

}

2871

if sts is not None:

2872

context['signatureTimestamp'] = sts

2873

return {

2874

'playbackContext': {

2875

'contentPlaybackContext': context

2876

},

2877

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2882

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2883

return True

2884

2885

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2886

AGE_GATE_REASONS = (

2887

'confirm your age', 'age-restricted', 'inappropriate', # reason

2888

'age_verification_required', 'age_check_required', # status

2889

)

2890

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2891

2892

@staticmethod

2893

def _is_unplayable(player_response):

2894

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2895

2896

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2897

2898

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2899

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2900

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2901

headers = self.generate_api_headers(

2902

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

2903

2904

yt_query = {'videoId': video_id}

2905

yt_query.update(self._generate_player_context(sts))

2906

return self._extract_response(

2907

item_id=video_id, ep='player', query=yt_query,

2908

ytcfg=player_ytcfg, headers=headers, fatal=True,

2909

default_client=client,

2910

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2911

) or None

2912

2913

def _get_requested_clients(self, url, smuggled_data):

2914

requested_clients = []

2915

default = ['android', 'web']

2916

allowed_clients = sorted(

2917

[client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],

2918

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

2919

for client in self._configuration_arg('player_client'):

2920

if client in allowed_clients:

2921

requested_clients.append(client)

2922

elif client == 'default':

2923

requested_clients.extend(default)

2924

elif client == 'all':

2925

requested_clients.extend(allowed_clients)

2926

else:

2927

self.report_warning(f'Skipping unsupported client {client}')

2928

if not requested_clients:

2929

requested_clients = default

2930

2931

if smuggled_data.get('is_music_url') or self.is_music_url(url):

2932

requested_clients.extend(

2933

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

2934

2935

return orderedSet(requested_clients)

2936

2937

def _extract_player_ytcfg(self, client, video_id):

2938

url = {

2939

'web_music': 'https://music.youtube.com',

2940

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())

2945

return self.extract_ytcfg(video_id, webpage) or {}

2946

2947

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

2948

initial_pr = None

2949

if webpage:

2950

initial_pr = self._extract_yt_initial_variable(

2951

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2952

video_id, 'initial player response')

2953

2954

all_clients = set(clients)

2955

clients = clients[::-1]

2956

prs = []

2957

2958

def append_client(*client_names):

2959

""" Append the first client name that exists """

2960

for client_name in client_names:

2961

if client_name in INNERTUBE_CLIENTS:

2962

if client_name not in all_clients:

2963

clients.append(client_name)

2964

all_clients.add(client_name)

2965

return

2966

2967

# Android player_response does not have microFormats which are needed for

2968

# extraction of some data. So we return the initial_pr with formats

2969

# stripped out even if not requested by the user

2970

# See: https://github.com/yt-dlp/yt-dlp/issues/501

2971

if initial_pr:

2972

pr = dict(initial_pr)

2973

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

2978

player_url = None

2979

while clients:

2980

client = clients.pop()

2981

player_ytcfg = master_ytcfg if client == 'web' else {}

2982

if 'configs' not in self._configuration_arg('player_skip'):

2983

player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg

2984

2985

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

2986

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

2987

if 'js' in self._configuration_arg('player_skip'):

2988

require_js_player = False

2989

player_url = None

2990

2991

if not player_url and not tried_iframe_fallback and require_js_player:

2992

player_url = self._download_player_url(video_id)

2993

tried_iframe_fallback = True

2994

2995

try:

2996

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

2997

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

2998

except ExtractorError as e:

2999

if last_error:

3000

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3008

if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:

3009

append_client(client.replace('_agegate', '_creator'))

3010

elif self._is_agegated(pr):

3011

append_client(f'{client}_embedded', f'{client.replace("_embedded", "")}_agegate')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3017

return prs, player_url

3018

3019

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3020

itags, stream_ids = {}, []

3021

itag_qualities, res_qualities = {}, {}

3022

q = qualities([

3023

# Normally tiny is the smallest video-only formats. But

3024

# audio-only formats with unknown quality may get tagged as tiny

3025

'tiny',

3026

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3027

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3028

])

3029

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3030

3031

for fmt in streaming_formats:

3032

if fmt.get('targetDurationSec'):

3033

continue

3034

3035

itag = str_or_none(fmt.get('itag'))

3036

audio_track = fmt.get('audioTrack') or {}

3037

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3038

if stream_id in stream_ids:

3039

continue

3040

3041

quality = fmt.get('quality')

3042

height = int_or_none(fmt.get('height'))

3043

if quality == 'tiny' or not quality:

3044

quality = fmt.get('audioQuality', '').lower() or quality

3045

# The 3gp format (17) in android client has a quality of "small",

3046

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3052

if height:

3053

res_qualities[height] = quality

3054

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3055

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3056

# number of fragment that would subsequently requested with (`&sq=N`)

3057

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3058

continue

3059

3060

fmt_url = fmt.get('url')

3061

if not fmt_url:

3062

sc = compat_parse_qs(fmt.get('signatureCipher'))

3063

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3064

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3065

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

3070

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

3071

fmt_url += '&' + sp + '=' + signature

3072

3073

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3078

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3079

except ExtractorError as e:

3080

self.report_warning(

3081

f'nsig extraction failed: You may experience throttling for some formats\n'

3082

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3087

stream_ids.append(stream_id)

3088

3089

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3090

language_preference = (

3091

10 if audio_track.get('audioIsDefault') and 10

3092

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3093

else -1)

3094

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3095

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3096

# Make sure to avoid false positives with small duration differences.

3097

# Eg: __2ABJjxzNo, ySuUZEjARPY

3098

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3099

if is_damaged:

3100

self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3101

dct = {

3102

'asr': int_or_none(fmt.get('audioSampleRate')),

3103

'filesize': int_or_none(fmt.get('contentLength')),

3104

'format_id': itag,

3105

'format_note': join_nonempty(

3106

'%s%s' % (audio_track.get('displayName') or '',

3107

' (default)' if language_preference > 0 else ''),

3108

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3109

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3110

'source_preference': -10 if throttled else -1,

3111

'fps': int_or_none(fmt.get('fps')) or None,

3112

'height': height,

3113

'quality': q(quality),

3114

'has_drm': bool(fmt.get('drmFamilies')),

3115

'tbr': tbr,

3116

'url': fmt_url,

3117

'width': int_or_none(fmt.get('width')),

3118

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3119

'desc' if language_preference < -1 else ''),

3120

'language_preference': language_preference,

3121

# Strictly de-prioritize damaged and 3gp formats

3122

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3123

}

3124

mime_mobj = re.match(

3125

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3126

if mime_mobj:

3127

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3128

dct.update(parse_codecs(mime_mobj.group(2)))

3129

no_audio = dct.get('acodec') == 'none'

3130

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3136

dct['downloader_options'] = {

3137

# Youtube throttles chunks >~10M

3138

'http_chunk_size': 10485760,

3139

}

3140

if dct.get('ext'):

3141

dct['container'] = dct['ext'] + '_dash'

3142

yield dct

3143

3144

live_from_start = is_live and self.get_param('live_from_start')

3145

skip_manifests = self._configuration_arg('skip')

3146

if not self.get_param('youtube_include_hls_manifest', True):

3147

skip_manifests.append('hls')

3148

get_dash = 'dash' not in skip_manifests and (

3149

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3150

get_hls = not live_from_start and 'hls' not in skip_manifests

3151

3152

def process_manifest_format(f, proto, itag):

3153

if itag in itags:

3154

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3155

return False

3156

itag = f'{itag}-{proto}'

3157

if itag:

3158

f['format_id'] = itag

3159

itags[itag] = proto

3160

3161

f['quality'] = next((

3162

q(qdict[val])

3163

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3164

if val in qdict), -1)

3165

return True

3166

3167

for sd in streaming_data:

3168

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3169

if hls_manifest_url:

3170

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3171

if process_manifest_format(f, 'hls', self._search_regex(

3172

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3173

yield f

3174

3175

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3176

if dash_manifest_url:

3177

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3178

if process_manifest_format(f, 'dash', f['format_id']):

3179

f['filesize'] = int_or_none(self._search_regex(

3180

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3181

if live_from_start:

3182

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3187

spec = get_first(

3188

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3189

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3194

args = args.split('#')

3195

counts = list(map(int_or_none, args[:5]))

3196

if len(args) != 8 or not all(counts):

3197

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3198

continue

3199

width, height, frame_count, cols, rows = counts

3200

N, sigh = args[6:]

3201

3202

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3203

fragment_count = frame_count / (cols * rows)

3204

fragment_duration = duration / fragment_count

3205

yield {

3206

'format_id': f'sb{i}',

3207

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3217

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3218

} for j in range(math.ceil(fragment_count))],

3219

}

3220

3221

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3222

webpage = None

3223

if 'webpage' not in self._configuration_arg('player_skip'):

3224

webpage = self._download_webpage(

3225

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

3226

3227

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3228

3229

player_responses, player_url = self._extract_player_responses(

3230

self._get_requested_clients(url, smuggled_data),

3231

video_id, webpage, master_ytcfg)

3232

3233

return webpage, master_ytcfg, player_responses, player_url

3234

3235

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3236

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3237

is_live = get_first(video_details, 'isLive')

3238

if is_live is None:

3239

is_live = get_first(live_broadcast_details, 'isLiveNow')

3240

3241

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3242

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3243

3244

return live_broadcast_details, is_live, streaming_data, formats

3245

3246

def _real_extract(self, url):

3247

url, smuggled_data = unsmuggle_url(url, {})

3248

video_id = self._match_id(url)

3249

3250

base_url = self.http_scheme() + '//www.youtube.com/'

3251

webpage_url = base_url + 'watch?v=' + video_id

3252

3253

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3254

3255

playability_statuses = traverse_obj(

3256

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3257

3258

trailer_video_id = get_first(

3259

playability_statuses,

3260

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3261

expected_type=str)

3262

if trailer_video_id:

3263

return self.url_result(

3264

trailer_video_id, self.ie_key(), trailer_video_id)

3265

3266

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3267

if webpage else (lambda x: None))

3268

3269

video_details = traverse_obj(

3270

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3271

microformats = traverse_obj(

3272

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3273

expected_type=dict, default=[])

3274

video_title = (

3275

get_first(video_details, 'title')

3276

or self._get_text(microformats, (..., 'title'))

3277

or search_meta(['og:title', 'twitter:title', 'title']))

3278

video_description = get_first(video_details, 'shortDescription')

3279

3280

multifeed_metadata_list = get_first(

3281

player_responses,

3282

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3283

expected_type=str)

3284

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3285

if self.get_param('noplaylist'):

3286

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3291

# Unquote should take place before split on comma (,) since textual

3292

# fields may contain comma as well (see

3293

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3294

feed_data = compat_parse_qs(

3295

compat_urllib_parse_unquote_plus(feed))

3296

3297

def feed_entry(name):

3298

return try_get(

3299

feed_data, lambda x: x[name][0], compat_str)

3300

3301

feed_id = feed_entry('id')

3302

if not feed_id:

3303

continue

3304

feed_title = feed_entry('title')

3305

title = video_title

3306

if feed_title:

3307

title += ' (%s)' % feed_title

3308

entries.append({

3309

'_type': 'url_transparent',

3310

'ie_key': 'Youtube',

3311

'url': smuggle_url(

3312

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3313

{'force_singlefeed': True}),

3314

'title': title,

3315

})

3316

feed_ids.append(feed_id)

3317

self.to_screen(

3318

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3319

% (', '.join(feed_ids), video_id))

3320

return self.playlist_result(

3321

entries, video_id, video_title, video_description)

3322

3323

duration = int_or_none(

3324

get_first(video_details, 'lengthSeconds')

3325

or get_first(microformats, 'lengthSeconds')

3326

or parse_duration(search_meta('duration'))) or None

3327

3328

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3329

video_id, microformats, video_details, player_responses, player_url, duration)

3330

3331

if not formats:

3332

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3333

self.report_drm(video_id)

3334

pemr = get_first(

3335

playability_statuses,

3336

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3337

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3338

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3339

if subreason:

3340

if subreason == 'The uploader has not made this video available in your country.':

3341

countries = get_first(microformats, 'availableCountries')

3342

if not countries:

3343

regions_allowed = search_meta('regionsAllowed')

3344

countries = regions_allowed.split(',') if regions_allowed else None

3345

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3346

reason += f'. {subreason}'

3347

if reason:

3348

self.raise_no_formats(reason, expected=True)

3349

3350

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3351

if not keywords and webpage:

3352

keywords = [

3353

unescapeHTML(m.group('content'))

3354

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3355

for keyword in keywords:

3356

if keyword.startswith('yt:stretch='):

3357

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3358

if mobj:

3359

# NB: float is intentional for forcing float division

3360

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3365

f['stretched_ratio'] = ratio

3366

break

3367

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3368

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3369

if thumbnail_url:

3370

thumbnails.append({

3371

'url': thumbnail_url,

3372

})

3373

original_thumbnails = thumbnails.copy()

3374

3375

# The best resolution thumbnails sometimes does not appear in the webpage

3376

# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340

3377

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3378

thumbnail_names = [

3379

'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',

3380

'hqdefault', 'hq1', 'hq2', 'hq3', '0',

3381

'mqdefault', 'mq1', 'mq2', 'mq3',

3382

'default', '1', '2', '3'

3383

]

3384

n_thumbnail_names = len(thumbnail_names)

3385

thumbnails.extend({

3386

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3387

video_id=video_id, name=name, ext=ext,

3388

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3389

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3390

for thumb in thumbnails:

3391

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3392

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3393

self._remove_duplicate_formats(thumbnails)

3394

self._downloader._sort_thumbnails(original_thumbnails)

3395

3396

category = get_first(microformats, 'category') or search_meta('genre')

3397

channel_id = str_or_none(

3398

get_first(video_details, 'channelId')

3399

or get_first(microformats, 'externalChannelId')

3400

or search_meta('channelId'))

3401

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3402

3403

live_content = get_first(video_details, 'isLiveContent')

3404

is_upcoming = get_first(video_details, 'isUpcoming')

3405

if is_live is None:

3406

if is_upcoming or live_content is False:

3407

is_live = False

3408

if is_upcoming is None and (live_content or is_live):

3409

is_upcoming = False

3410

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3411

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3412

if not duration and live_end_time and live_start_time:

3413

duration = live_end_time - live_start_time

3414

3415

if is_live and self.get_param('live_from_start'):

3416

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3417

3418

formats.extend(self._extract_storyboard(player_responses, duration))

3419

3420

# Source is given priority since formats that throttle are given lower source_preference

3421

# When throttling issue is fully fixed, remove this

3422

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3427

'formats': formats,

3428

'thumbnails': thumbnails,

3429

# The best thumbnail that we are sure exists. Prevents unnecessary

3430

# URL checking if user don't care about getting the best possible thumbnail

3431

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3432

'description': video_description,

3433

'uploader': get_first(video_details, 'author'),

3434

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3435

'uploader_url': owner_profile_url,

3436

'channel_id': channel_id,

3437

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3438

'duration': duration,

3439

'view_count': int_or_none(

3440

get_first((video_details, microformats), (..., 'viewCount'))

3441

or search_meta('interactionCount')),

3442

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3443

'age_limit': 18 if (

3444

get_first(microformats, 'isFamilySafe') is False

3445

or search_meta('isFamilyFriendly') == 'false'

3446

or search_meta('og:restrictions:age') == '18+') else 0,

3447

'webpage_url': webpage_url,

3448

'categories': [category] if category else None,

3449

'tags': keywords,

3450

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3451

'is_live': is_live,

3452

'was_live': (False if is_live or is_upcoming or live_content is False

3453

else None if is_live is None or is_upcoming is None

3454

else live_content),

3455

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3456

'release_timestamp': live_start_time,

3457

}

3458

3459

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3460

if pctr:

3461

def get_lang_code(track):

3462

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3463

or track.get('languageCode'))

3464

3465

# Converted into dicts to remove duplicates

3466

captions = {

3467

get_lang_code(sub): sub

3468

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3469

translation_languages = {

3470

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3471

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3472

3473

def process_language(container, base_url, lang_code, sub_name, query):

3474

lang_subs = container.setdefault(lang_code, [])

3475

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3486

for lang_code, caption_track in captions.items():

3487

base_url = caption_track.get('baseUrl')

3488

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3489

if not base_url:

3490

continue

3491

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3492

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3497

if not caption_track.get('isTranslatable'):

3498

continue

3499

for trans_code, trans_name in translation_languages.items():

3500

if not trans_code:

3501

continue

3502

orig_trans_code = trans_code

3503

if caption_track.get('kind') != 'asr':

3504

if 'translated_subs' in self._configuration_arg('skip'):

3505

continue

3506

trans_code += f'-{lang_code}'

3507

trans_name += format_field(lang_name, template=' from %s')

3508

# Add an "-orig" label to the original language so that it can be distinguished.

3509

# The subs are returned without "-orig" as well for compatibility

3510

if lang_code == f'a-{orig_trans_code}':

3511

process_language(

3512

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3513

# Setting tlang=lang returns damaged subtitles.

3514

process_language(automatic_captions, base_url, trans_code, trans_name,

3515

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3516

info['automatic_captions'] = automatic_captions

3517

info['subtitles'] = subtitles

3518

3519

parsed_url = compat_urllib_parse_urlparse(url)

3520

for component in [parsed_url.fragment, parsed_url.query]:

3521

query = compat_parse_qs(component)

3522

for k, v in query.items():

3523

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3524

d_k += '_time'

3525

if d_k not in info and k in s_ks:

3526

info[d_k] = parse_duration(query[k][0])

3527

3528

# Youtube Music Auto-generated description

3529

if video_description:

3530

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3531

if mobj:

3532

release_year = mobj.group('release_year')

3533

release_date = mobj.group('release_date')

3534

if release_date:

3535

release_date = release_date.replace('-', '')

3536

if not release_year:

3537

release_year = release_date[:4]

3538

info.update({

3539

'album': mobj.group('album'.strip()),

3540

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3541

'track': mobj.group('track').strip(),

3542

'release_date': release_date,

3543

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3549

webpage, self._YT_INITIAL_DATA_RE, video_id,

3550

'yt initial data')

3551

if not initial_data:

3552

query = {'videoId': video_id}

3553

query.update(self._get_checkok_params())

3554

initial_data = self._extract_response(

3555

item_id=video_id, ep='next', fatal=False,

3556

ytcfg=master_ytcfg, query=query,

3557

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3558

note='Downloading initial data API JSON')

3559

3560

try:

3561

# This will error if there is no livechat

3562

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3563

info.setdefault('subtitles', {})['live_chat'] = [{

3564

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

3565

'video_id': video_id,

3566

'ext': 'json',

3567

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

3568

}]

3569

except (KeyError, IndexError, TypeError):

pass

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3575

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3576

or None)

3577

3578

contents = traverse_obj(

3579

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3580

expected_type=list, default=[])

3581

3582

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3583

if vpir:

3584

stl = vpir.get('superTitleLink')

3585

if stl:

3586

stl = self._get_text(stl)

3587

if try_get(

3588

vpir,

3589

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3590

info['location'] = stl

3591

else:

3592

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

3593

if mobj:

3594

info.update({

3595

'series': mobj.group(1),

3596

'season_number': int(mobj.group(2)),

3597

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3602

list) or []):

3603

tbr = tlb.get('toggleButtonRenderer') or {}

3604

for getter, regex in [(

3605

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3606

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3607

lambda x: x['accessibility'],

3608

lambda x: x['accessibilityData']['accessibilityData'],

3609

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3610

label = (try_get(tbr, getter, dict) or {}).get('label')

3611

if label:

3612

mobj = re.match(regex, label)

3613

if mobj:

3614

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3615

break

3616

sbr_tooltip = try_get(

3617

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3618

if sbr_tooltip:

3619

like_count, dislike_count = sbr_tooltip.split(' / ')

3620

info.update({

3621

'like_count': str_to_int(like_count),

3622

'dislike_count': str_to_int(dislike_count),

3623

})

3624

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3625

if vsir:

3626

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3627

info.update({

3628

'channel': self._get_text(vor, 'title'),

3629

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3634

list) or []

3635

multiple_songs = False

3636

for row in rows:

3637

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3638

multiple_songs = True

3639

break

3640

for row in rows:

3641

mrr = row.get('metadataRowRenderer') or {}

3642

mrr_title = mrr.get('title')

3643

if not mrr_title:

3644

continue

3645

mrr_title = self._get_text(mrr, 'title')

3646

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3647

if mrr_title == 'License':

3648

info['license'] = mrr_contents_text

3649

elif not multiple_songs:

3650

if mrr_title == 'Album':

3651

info['album'] = mrr_contents_text

3652

elif mrr_title == 'Artist':

3653

info['artist'] = mrr_contents_text

3654

elif mrr_title == 'Song':

3655

info['track'] = mrr_contents_text

3656

3657

fallbacks = {

3658

'channel': 'uploader',

3659

'channel_id': 'uploader_id',

3660

'channel_url': 'uploader_url',

3661

}

3662

3663

# The upload date for scheduled, live and past live streams / premieres in microformats

3664

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3665

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3666

upload_date = (

3667

unified_strdate(get_first(microformats, 'uploadDate'))

3668

or unified_strdate(search_meta('uploadDate')))

3669

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3670

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')

3671

info['upload_date'] = upload_date

3672

3673

for to, frm in fallbacks.items():

3674

if not info.get(to):

3675

info[to] = info.get(frm)

3676

3677

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3683

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3684

is_membersonly = None

3685

is_premium = None

3686

if initial_data and is_private is not None:

3687

is_membersonly = False

3688

is_premium = False

3689

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3690

badge_labels = set()

3691

for content in contents:

3692

if not isinstance(content, dict):

3693

continue

3694

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3695

for badge_label in badge_labels:

3696

if badge_label.lower() == 'members only':

3697

is_membersonly = True

3698

elif badge_label.lower() == 'premium':

3699

is_premium = True

3700

elif badge_label.lower() == 'unlisted':

3701

is_unlisted = True

3702

3703

info['availability'] = self._availability(

3704

is_private=is_private,

3705

needs_premium=is_premium,

3706

needs_subscription=is_membersonly,

3707

needs_auth=info['age_limit'] >= 18,

3708

is_unlisted=None if is_private is None else is_unlisted)

3709

3710

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3711

3712

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3718

3719

@staticmethod

3720

def passthrough_smuggled_data(func):

3721

def _smuggle(entries, smuggled_data):

3722

for entry in entries:

3723

# TODO: Convert URL to music.youtube instead.

3724

# Do we need to passthrough any other smuggled_data?

3725

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3726

yield entry

3727

3728

@functools.wraps(func)

3729

def wrapper(self, url):

3730

url, smuggled_data = unsmuggle_url(url, {})

3731

if self.is_music_url(url):

3732

smuggled_data['is_music_url'] = True

3733

info_dict = func(self, url, smuggled_data)

3734

if smuggled_data and info_dict.get('entries'):

3735

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3740

channel_id = self._html_search_meta(

3741

'channelId', webpage, 'channel id', default=None)

3742

if channel_id:

3743

return channel_id

3744

channel_url = self._html_search_meta(

3745

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3746

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3747

'twitter:app:url:googleplay'), webpage, 'channel url')

3748

return self._search_regex(

3749

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3750

channel_url, 'channel id')

3751

3752

@staticmethod

3753

def _extract_basic_item_renderer(item):

3754

# Modified from _extract_grid_item_renderer

3755

known_basic_renderers = (

3756

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'

3757

)

3758

for key, renderer in item.items():

3759

if not isinstance(renderer, dict):

3760

continue

3761

elif key in known_basic_renderers:

3762

return renderer

3763

elif key.startswith('grid') and key.endswith('Renderer'):

3764

return renderer

3765

3766

def _grid_entries(self, grid_renderer):

3767

for item in grid_renderer['items']:

3768

if not isinstance(item, dict):

3769

continue

3770

renderer = self._extract_basic_item_renderer(item)

3771

if not isinstance(renderer, dict):

3772

continue

3773

title = self._get_text(renderer, 'title')

3774

3775

# playlist

3776

playlist_id = renderer.get('playlistId')

3777

if playlist_id:

3778

yield self.url_result(

3779

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3780

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3785

if video_id:

3786

yield self._extract_video(renderer)

3787

continue

3788

# channel

3789

channel_id = renderer.get('channelId')

3790

if channel_id:

3791

yield self.url_result(

3792

'https://www.youtube.com/channel/%s' % channel_id,

3793

ie=YoutubeTabIE.ie_key(), video_title=title)

3794

continue

3795

# generic endpoint URL support

3796

ep_url = urljoin('https://www.youtube.com/', try_get(

3797

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3798

compat_str))

3799

if ep_url:

3800

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3801

if ie.suitable(ep_url):

3802

yield self.url_result(

3803

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3804

break

3805

3806

def _music_reponsive_list_entry(self, renderer):

3807

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3808

if video_id:

3809

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3810

ie=YoutubeIE.ie_key(), video_id=video_id)

3811

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3812

if playlist_id:

3813

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3814

if video_id:

3815

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3816

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3817

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3818

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3819

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3820

if browse_id:

3821

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3822

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3823

3824

def _shelf_entries_from_content(self, shelf_renderer):

3825

content = shelf_renderer.get('content')

3826

if not isinstance(content, dict):

3827

return

3828

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3829

if renderer:

3830

# TODO: add support for nested playlists so each shelf is processed

3831

# as separate playlist

3832

# TODO: this includes only first N items

3833

for entry in self._grid_entries(renderer):

3834

yield entry

3835

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3841

ep = try_get(

3842

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3843

compat_str)

3844

shelf_url = urljoin('https://www.youtube.com', ep)

3845

if shelf_url:

3846

# Skipping links to another channels, note that checking for

3847

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3848

# will not work

3849

if skip_channels and '/channels?' in shelf_url:

3850

return

3851

title = self._get_text(shelf_renderer, 'title')

3852

yield self.url_result(shelf_url, video_title=title)

3853

# Shelf may not contain shelf URL, fallback to extraction from content

3854

for entry in self._shelf_entries_from_content(shelf_renderer):

3855

yield entry

3856

3857

def _playlist_entries(self, video_list_renderer):

3858

for content in video_list_renderer['contents']:

3859

if not isinstance(content, dict):

3860

continue

3861

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3862

if not isinstance(renderer, dict):

3863

continue

3864

video_id = renderer.get('videoId')

3865

if not video_id:

3866

continue

3867

yield self._extract_video(renderer)

3868

3869

def _rich_entries(self, rich_grid_renderer):

3870

renderer = try_get(

3871

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3872

video_id = renderer.get('videoId')

3873

if not video_id:

3874

return

3875

yield self._extract_video(renderer)

3876

3877

def _video_entry(self, video_renderer):

3878

video_id = video_renderer.get('videoId')

3879

if video_id:

3880

return self._extract_video(video_renderer)

3881

3882

def _post_thread_entries(self, post_thread_renderer):

3883

post_renderer = try_get(

3884

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3885

if not post_renderer:

3886

return

3887

# video attachment

3888

video_renderer = try_get(

3889

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3890

video_id = video_renderer.get('videoId')

3891

if video_id:

3892

entry = self._extract_video(video_renderer)

3893

if entry:

3894

yield entry

3895

# playlist attachment

3896

playlist_id = try_get(

3897

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3898

if playlist_id:

3899

yield self.url_result(

3900

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3901

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3902

# inline video links

3903

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3904

for run in runs:

3905

if not isinstance(run, dict):

3906

continue

3907

ep_url = try_get(

3908

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3909

if not ep_url:

3910

continue

3911

if not YoutubeIE.suitable(ep_url):

3912

continue

3913

ep_video_id = YoutubeIE._match_id(ep_url)

3914

if video_id == ep_video_id:

3915

continue

3916

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3917

3918

def _post_thread_continuation_entries(self, post_thread_continuation):

3919

contents = post_thread_continuation.get('contents')

3920

if not isinstance(contents, list):

3921

return

3922

for content in contents:

3923

renderer = content.get('backstagePostThreadRenderer')

3924

if not isinstance(renderer, dict):

3925

continue

3926

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3931

for content in contents:

3932

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3933

if video_renderer:

3934

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

3940

# continuation_list is modified in-place with continuation_list = [continuation_token]

3941

continuation_list[:] = [None]

3942

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3943

for content in contents:

3944

if not isinstance(content, dict):

3945

continue

3946

is_renderer = traverse_obj(

3947

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

3948

expected_type=dict)

3949

if not is_renderer:

3950

renderer = content.get('richItemRenderer')

3951

if renderer:

3952

for entry in self._rich_entries(renderer):

3953

yield entry

3954

continuation_list[0] = self._extract_continuation(parent_renderer)

3955

continue

3956

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3957

for isr_content in isr_contents:

3958

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3963

'gridRenderer': self._grid_entries,

3964

'shelfRenderer': lambda x: self._shelf_entries(x),

3965

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

3966

'backstagePostThreadRenderer': self._post_thread_entries,

3967

'videoRenderer': lambda x: [self._video_entry(x)],

3968

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

3969

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

3970

}

3971

for key, renderer in isr_content.items():

3972

if key not in known_renderers:

3973

continue

3974

for entry in known_renderers[key](renderer):

3975

if entry:

3976

yield entry

3977

continuation_list[0] = self._extract_continuation(renderer)

3978

break

3979

3980

if not continuation_list[0]:

3981

continuation_list[0] = self._extract_continuation(is_renderer)

3982

3983

if not continuation_list[0]:

3984

continuation_list[0] = self._extract_continuation(parent_renderer)

3985

3986

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

3987

continuation_list = [None]

3988

extract_entries = lambda x: self._extract_entries(x, continuation_list)

3989

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3994

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3995

for entry in extract_entries(parent_renderer):

3996

yield entry

3997

continuation = continuation_list[0]

3998

3999

for page_num in itertools.count(1):

4000

if not continuation:

4001

break

4002

headers = self.generate_api_headers(

4003

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4004

response = self._extract_response(

4005

item_id='%s page %s' % (item_id, page_num),

4006

query=continuation, headers=headers, ytcfg=ytcfg,

4007

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4012

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4013

visitor_data = self._extract_visitor_data(response) or visitor_data

4014

4015

known_continuation_renderers = {

4016

'playlistVideoListContinuation': self._playlist_entries,

4017

'gridContinuation': self._grid_entries,

4018

'itemSectionContinuation': self._post_thread_continuation_entries,

4019

'sectionListContinuation': extract_entries, # for feeds

4020

}

4021

continuation_contents = try_get(

4022

response, lambda x: x['continuationContents'], dict) or {}

4023

continuation_renderer = None

4024

for key, value in continuation_contents.items():

4025

if key not in known_continuation_renderers:

4026

continue

4027

continuation_renderer = value

4028

continuation_list = [None]

4029

for entry in known_continuation_renderers[key](continuation_renderer):

4030

yield entry

4031

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4032

break

4033

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4038

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4039

'gridVideoRenderer': (self._grid_entries, 'items'),

4040

'gridChannelRenderer': (self._grid_entries, 'items'),

4041

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4042

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4043

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4044

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4045

}

4046

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4047

continuation_items = try_get(

4048

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4049

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4050

video_items_renderer = None

4051

for key, value in continuation_item.items():

4052

if key not in known_renderers:

4053

continue

4054

video_items_renderer = {known_renderers[key][1]: continuation_items}

4055

continuation_list = [None]

4056

for entry in known_renderers[key][0](video_items_renderer):

4057

yield entry

4058

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4059

break

4060

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4066

for tab in tabs:

4067

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4068

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4073

4074

@classmethod

4075

def _extract_uploader(cls, data):

4076

uploader = {}

4077

renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4078

owner = try_get(

4079

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4080

if owner:

4081

uploader['uploader'] = owner.get('text')

4082

uploader['uploader_id'] = try_get(

4083

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4084

uploader['uploader_url'] = urljoin(

4085

'https://www.youtube.com/',

4086

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4087

return {k: v for k, v in uploader.items() if v is not None}

4088

4089

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4090

playlist_id = title = description = channel_url = channel_name = channel_id = None

4091

tags = []

4092

4093

selected_tab = self._extract_selected_tab(tabs)

4094

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4095

renderer = try_get(

4096

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4097

if renderer:

4098

channel_name = renderer.get('title')

4099

channel_url = renderer.get('channelUrl')

4100

channel_id = renderer.get('externalId')

4101

else:

4102

renderer = try_get(

4103

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4104

4105

if renderer:

4106

title = renderer.get('title')

4107

description = renderer.get('description', '')

4108

playlist_id = channel_id

4109

tags = renderer.get('keywords', '').split()

4110

4111

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4112

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4113

def _get_uncropped(url):

4114

return url_or_none((url or '').split('=')[0] + '=s0')

4115

4116

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4117

if avatar_thumbnails:

4118

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4119

if uncropped_avatar:

4120

avatar_thumbnails.append({

4121

'url': uncropped_avatar,

4122

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4127

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4128

for banner in channel_banners:

4129

banner['preference'] = -10

4130

4131

if channel_banners:

4132

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4133

if uncropped_banner:

4134

channel_banners.append({

4135

'url': uncropped_banner,

4136

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4141

primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail'))

4142

4143

if playlist_id is None:

4144

playlist_id = item_id

4145

4146

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4147

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4148

if title is None:

4149

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4150

title += format_field(selected_tab, 'title', ' - %s')

4151

title += format_field(selected_tab, 'expandedText', ' - %s')

4152

4153

metadata = {

4154

'playlist_id': playlist_id,

4155

'playlist_title': title,

4156

'playlist_description': description,

4157

'uploader': channel_name,

4158

'uploader_id': channel_id,

4159

'uploader_url': channel_url,

4160

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4161

'tags': tags,

4162

'view_count': self._get_count(playlist_stats, 1),

4163

'availability': self._extract_availability(data),

4164

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4165

'playlist_count': self._get_count(playlist_stats, 0),

4166

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4167

}

4168

if not channel_id:

4169

metadata.update(self._extract_uploader(data))

4170

metadata.update({

4171

'channel': metadata['uploader'],

4172

'channel_id': metadata['uploader_id'],

4173

'channel_url': metadata['uploader_url']})

4174

return self.playlist_result(

4175

self._entries(

4176

selected_tab, playlist_id, ytcfg,

4177

self._extract_account_syncid(ytcfg, data),

4178

self._extract_visitor_data(data, ytcfg)),

4179

**metadata)

4180

4181

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):

4182

first_id = last_id = response = None

4183

for page_num in itertools.count(1):

4184

videos = list(self._playlist_entries(playlist))

4185

if not videos:

4186

return

4187

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4188

if start >= len(videos):

4189

return

4190

for video in videos[start:]:

4191

if video['id'] == first_id:

4192

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

4193

return

4194

yield video

4195

first_id = first_id or videos[0]['id']

4196

last_id = videos[-1]['id']

4197

watch_endpoint = try_get(

4198

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4199

headers = self.generate_api_headers(

4200

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4201

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4202

query = {

4203

'playlistId': playlist_id,

4204

'videoId': watch_endpoint.get('videoId') or last_id,

4205

'index': watch_endpoint.get('index') or len(videos),

4206

'params': watch_endpoint.get('params') or 'OAE%3D'

4207

}

4208

response = self._extract_response(

4209

item_id='%s page %d' % (playlist_id, page_num),

4210

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4211

check_get_keys='contents'

4212

)

4213

playlist = try_get(

4214

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4215

4216

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4217

title = playlist.get('title') or try_get(

4218

data, lambda x: x['titleText']['simpleText'], compat_str)

4219

playlist_id = playlist.get('playlistId') or item_id

4220

4221

# Delegating everything except mix playlists to regular tab-based playlist URL

4222

playlist_url = urljoin(url, try_get(

4223

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4224

compat_str))

4225

if playlist_url and playlist_url != url:

4226

return self.url_result(

4227

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4228

video_title=title)

4229

4230

return self.playlist_result(

4231

self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),

4232

playlist_id=playlist_id, playlist_title=title)

4233

4234

def _extract_availability(self, data):

4235

"""

4236

Gets the availability of a given playlist/tab.

4237

Note: Unless YouTube tells us explicitly, we do not assume it is public

4238

@param data: response

4239

"""

4240

is_private = is_unlisted = None

4241

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4242

badge_labels = self._extract_badges(renderer)

4243

4244

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4245

privacy_dropdown_entries = try_get(

4246

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4247

for renderer_dict in privacy_dropdown_entries:

4248

is_selected = try_get(

4249

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4250

if not is_selected:

4251

continue

4252

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4253

if label:

4254

badge_labels.add(label.lower())

4255

break

4256

4257

for badge_label in badge_labels:

4258

if badge_label == 'unlisted':

4259

is_unlisted = True

4260

elif badge_label == 'private':

4261

is_private = True

4262

elif badge_label == 'public':

4263

is_unlisted = is_private = False

4264

return self._availability(is_private, False, False, False, is_unlisted)

4265

4266

@staticmethod

4267

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4268

sidebar_renderer = try_get(

4269

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4270

for item in sidebar_renderer:

4271

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4276

"""

4277

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4278

"""

4279

browse_id = params = None

4280

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4281

if not renderer:

4282

return

4283

menu_renderer = try_get(

4284

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4285

for menu_item in menu_renderer:

4286

if not isinstance(menu_item, dict):

4287

continue

4288

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4289

text = try_get(

4290

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4291

if not text or text.lower() != 'show unavailable videos':

4292

continue

4293

browse_endpoint = try_get(

4294

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4295

browse_id = browse_endpoint.get('browseId')

4296

params = browse_endpoint.get('params')

4297

break

4298

4299

headers = self.generate_api_headers(

4300

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4301

visitor_data=self._extract_visitor_data(data, ytcfg))

4302

query = {

4303

'params': params or 'wgYCCAA=',

4304

'browseId': browse_id or 'VL%s' % item_id

4305

}

4306

return self._extract_response(

4307

item_id=item_id, headers=headers, query=query,

4308

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4309

note='Downloading API JSON with unavailable videos')

4310

4311

def _extract_webpage(self, url, item_id, fatal=True):

4312

retries = self.get_param('extractor_retries', 3)

4313

count = -1

4314

webpage = data = last_error = None

4315

while count < retries:

4316

count += 1

4317

# Sometimes youtube returns a webpage with incomplete ytInitialData

4318

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4319

if last_error:

4320

self.report_warning('%s. Retrying ...' % last_error)

4321

try:

4322

webpage = self._download_webpage(

4323

url, item_id,

4324

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4325

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4326

except ExtractorError as e:

4327

if isinstance(e.cause, network_exceptions):

4328

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4329

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4339

except ExtractorError as e:

4340

if fatal:

4341

raise

4342

self.report_warning(error_to_compat_str(e))

4343

break

4344

4345

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4346

break

4347

4348

last_error = 'Incomplete yt initial data received'

4349

if count >= retries:

4350

if fatal:

4351

raise ExtractorError(last_error)

4352

self.report_warning(last_error)

break

return webpage, data

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4358

data = None

4359

if 'webpage' not in self._configuration_arg('skip'):

4360

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4361

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4362

# Reject webpage data if redirected to home page without explicitly requesting

4363

selected_tab = self._extract_selected_tab(traverse_obj(

4364

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4365

if (url != 'https://www.youtube.com/feed/recommended'

4366

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4367

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4368

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4369

if fatal:

4370

raise ExtractorError(msg, expected=True)

4371

self.report_warning(msg, only_once=True)

4372

if not data:

4373

if not ytcfg and self.is_authenticated:

4374

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

4375

if 'authcheck' not in self._configuration_arg('skip') and fatal:

4376

raise ExtractorError(

4377

msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'

4378

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4379

expected=True)

4380

self.report_warning(msg, only_once=True)

4381

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4382

return data, ytcfg

4383

4384

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4385

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4386

resolve_response = self._extract_response(

4387

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4388

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4389

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4390

for ep_key, ep in endpoints.items():

4391

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4392

if params:

4393

return self._extract_response(

4394

item_id=item_id, query=params, ep=ep, headers=headers,

4395

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4396

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4397

err_note = 'Failed to resolve url (does the playlist exist?)'

4398

if fatal:

4399

raise ExtractorError(err_note, expected=True)

4400

self.report_warning(err_note, item_id)

4401

4402

_SEARCH_PARAMS = None

4403

4404

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4405

data = {'query': query}

4406

if params is NO_DEFAULT:

4407

params = self._SEARCH_PARAMS

4408

if params:

4409

data['params'] = params

4410

4411

content_keys = (

4412

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4413

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4414

# ytmusic search

4415

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4416

('continuationContents', ),

4417

)

4418

check_get_keys = tuple(set(keys[0] for keys in content_keys))

4419

4420

continuation_list = [None]

4421

for page_num in itertools.count(1):

4422

data.update(continuation_list[0] or {})

4423

search = self._extract_response(

4424

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4425

default_client=default_client, check_get_keys=check_get_keys)

4426

slr_contents = traverse_obj(search, *content_keys)

4427

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4428

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4433

IE_DESC = 'YouTube Tabs'

4434

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4443

(?P<not_channel>

4444

feed/|hashtag/|

4445

(?:playlist|watch)\?.*?\blist=

4446

)|

4447

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4452

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4453

}

4454

IE_NAME = 'youtube:tab'

4455

4456

_TESTS = [{

4457

'note': 'playlists, multipage',

4458

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4459

'playlist_mincount': 94,

4460

'info_dict': {

4461

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4462

'title': 'Igor Kleiner - Playlists',

4463

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4464

'uploader': 'Igor Kleiner',

4465

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4466

'channel': 'Igor Kleiner',

4467

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4468

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4469

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4470

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4471

'channel_follower_count': int

4472

},

4473

}, {

4474

'note': 'playlists, multipage, different order',

4475

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4476

'playlist_mincount': 94,

4477

'info_dict': {

4478

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4479

'title': 'Igor Kleiner - Playlists',

4480

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4481

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4482

'uploader': 'Igor Kleiner',

4483

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4484

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4485

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4486

'channel': 'Igor Kleiner',

4487

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4488

'channel_follower_count': int

4489

},

4490

}, {

4491

'note': 'playlists, series',

4492

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4493

'playlist_mincount': 5,

4494

'info_dict': {

4495

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4496

'title': '3Blue1Brown - Playlists',

4497

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4498

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4499

'uploader': '3Blue1Brown',

4500

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4501

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4502

'channel': '3Blue1Brown',

4503

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4504

'tags': ['Mathematics'],

4505

'channel_follower_count': int

4506

},

4507

}, {

4508

'note': 'playlists, singlepage',

4509

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4510

'playlist_mincount': 4,

4511

'info_dict': {

4512

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4513

'title': 'ThirstForScience - Playlists',

4514

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4515

'uploader': 'ThirstForScience',

4516

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4517

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4518

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4519

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4520

'tags': 'count:13',

4521

'channel': 'ThirstForScience',

4522

'channel_follower_count': int

4523

}

4524

}, {

4525

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4526

'only_matching': True,

4527

}, {

4528

'note': 'basic, single video playlist',

4529

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4530

'info_dict': {

4531

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4532

'uploader': 'Sergey M.',

4533

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4534

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4539

'channel': 'Sergey M.',

4540

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4541

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4542

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4547

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4548

'info_dict': {

4549

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4550

'uploader': 'Sergey M.',

4551

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4552

'title': 'youtube-dl empty playlist',

4553

'tags': [],

4554

'channel': 'Sergey M.',

4555

'description': '',

4556

'modified_date': '20160902',

4557

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4558

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4559

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4565

'info_dict': {

4566

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4567

'title': 'lex will - Home',

4568

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4569

'uploader': 'lex will',

4570

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4571

'channel': 'lex will',

4572

'tags': ['bible', 'history', 'prophesy'],

4573

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4574

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4575

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4576

'channel_follower_count': int

4577

},

4578

'playlist_mincount': 2,

4579

}, {

4580

'note': 'Videos tab',

4581

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4582

'info_dict': {

4583

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4584

'title': 'lex will - Videos',

4585

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4586

'uploader': 'lex will',

4587

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4588

'tags': ['bible', 'history', 'prophesy'],

4589

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4590

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4591

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4592

'channel': 'lex will',

4593

'channel_follower_count': int

4594

},

4595

'playlist_mincount': 975,

4596

}, {

4597

'note': 'Videos tab, sorted by popular',

4598

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4599

'info_dict': {

4600

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4601

'title': 'lex will - Videos',

4602

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4603

'uploader': 'lex will',

4604

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4605

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4606

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4607

'channel': 'lex will',

4608

'tags': ['bible', 'history', 'prophesy'],

4609

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4610

'channel_follower_count': int

4611

},

4612

'playlist_mincount': 199,

4613

}, {

4614

'note': 'Playlists tab',

4615

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4616

'info_dict': {

4617

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4618

'title': 'lex will - Playlists',

4619

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4620

'uploader': 'lex will',

4621

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4622

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4623

'channel': 'lex will',

4624

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4625

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4626

'tags': ['bible', 'history', 'prophesy'],

4627

'channel_follower_count': int

4628

},

4629

'playlist_mincount': 17,

4630

}, {

4631

'note': 'Community tab',

4632

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4633

'info_dict': {

4634

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4635

'title': 'lex will - Community',

4636

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4637

'uploader': 'lex will',

4638

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4639

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4640

'channel': 'lex will',

4641

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4642

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4643

'tags': ['bible', 'history', 'prophesy'],

4644

'channel_follower_count': int

4645

},

4646

'playlist_mincount': 18,

4647

}, {

4648

'note': 'Channels tab',

4649

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4650

'info_dict': {

4651

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4652

'title': 'lex will - Channels',

4653

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4654

'uploader': 'lex will',

4655

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4656

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4657

'channel': 'lex will',

4658

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4659

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4660

'tags': ['bible', 'history', 'prophesy'],

4661

'channel_follower_count': int

4662

},

4663

'playlist_mincount': 12,

4664

}, {

4665

'note': 'Search tab',

4666

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4667

'playlist_mincount': 40,

4668

'info_dict': {

4669

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4670

'title': '3Blue1Brown - Search - linear algebra',

4671

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4672

'uploader': '3Blue1Brown',

4673

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4674

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4675

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4676

'tags': ['Mathematics'],

4677

'channel': '3Blue1Brown',

4678

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4679

'channel_follower_count': int

4680

},

4681

}, {

4682

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4683

'only_matching': True,

4684

}, {

4685

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4686

'only_matching': True,

4687

}, {

4688

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4689

'only_matching': True,

4690

}, {

4691

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4692

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4693

'info_dict': {

4694

'title': '29C3: Not my department',

4695

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4696

'uploader': 'Christiaan008',

4697

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4698

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4699

'tags': [],

4700

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4701

'view_count': int,

4702

'modified_date': '20150605',

4703

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4704

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4705

'channel': 'Christiaan008',

4706

},

4707

'playlist_count': 96,

4708

}, {

4709

'note': 'Large playlist',

4710

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4711

'info_dict': {

4712

'title': 'Uploads from Cauchemar',

4713

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4714

'uploader': 'Cauchemar',

4715

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4716

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4717

'tags': [],

4718

'modified_date': r're:\d{8}',

4719

'channel': 'Cauchemar',

4720

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4721

'view_count': int,

4722

'description': '',

4723

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4724

},

4725

'playlist_mincount': 1123,

4726

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4727

}, {

4728

'note': 'even larger playlist, 8832 videos',

4729

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4730

'only_matching': True,

4731

}, {

4732

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4733

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4734

'info_dict': {

4735

'title': 'Uploads from Interstellar Movie',

4736

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4737

'uploader': 'Interstellar Movie',

4738

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4739

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4740

'tags': [],

4741

'view_count': int,

4742

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4743

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4744

'channel': 'Interstellar Movie',

4745

'description': '',

4746

'modified_date': r're:\d{8}',

4747

},

4748

'playlist_mincount': 21,

4749

}, {

4750

'note': 'Playlist with "show unavailable videos" button',

4751

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4752

'info_dict': {

4753

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4754

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4755

'uploader': 'Phim Siêu Nhân Nhật Bản',

4756

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4757

'view_count': int,

4758

'channel': 'Phim Siêu Nhân Nhật Bản',

4759

'tags': [],

4760

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4761

'description': '',

4762

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4763

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4764

'modified_date': r're:\d{8}',

4765

},

4766

'playlist_mincount': 200,

4767

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4768

}, {

4769

'note': 'Playlist with unavailable videos in page 7',

4770

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4771

'info_dict': {

4772

'title': 'Uploads from BlankTV',

4773

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4774

'uploader': 'BlankTV',

4775

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4776

'channel': 'BlankTV',

4777

'channel_url': 'https://www.youtube.com/c/blanktv',

4778

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4779

'view_count': int,

4780

'tags': [],

4781

'uploader_url': 'https://www.youtube.com/c/blanktv',

4782

'modified_date': r're:\d{8}',

4783

'description': '',

4784

},

4785

'playlist_mincount': 1000,

4786

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4787

}, {

4788

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4789

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4790

'info_dict': {

4791

'title': 'Data Analysis with Dr Mike Pound',

4792

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4793

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4794

'uploader': 'Computerphile',

4795

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4796

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4797

'tags': [],

4798

'view_count': int,

4799

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4800

'channel_url': 'https://www.youtube.com/user/Computerphile',

4801

'channel': 'Computerphile',

4802

},

4803

'playlist_mincount': 11,

4804

}, {

4805

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4806

'only_matching': True,

4807

}, {

4808

'note': 'Playlist URL that does not actually serve a playlist',

4809

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4814

'uploader': 'STREEM',

4815

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4816

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4817

'upload_date': '20150526',

4818

'license': 'Standard YouTube License',

4819

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4820

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4827

},

4828

'skip': 'This video is not available.',

4829

'add_ie': [YoutubeIE.ie_key()],

4830

}, {

4831

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4832

'only_matching': True,

4833

}, {

4834

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4835

'only_matching': True,

4836

}, {

4837

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4838

'info_dict': {

4839

'id': 'GgL890LIznQ', # This will keep changing

4840

'ext': 'mp4',

4841

'title': str,

4842

'uploader': 'Sky News',

4843

'uploader_id': 'skynews',

4844

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4845

'upload_date': r're:\d{8}',

4846

'description': str,

4847

'categories': ['News & Politics'],

4848

'tags': list,

4849

'like_count': int,

4850

'release_timestamp': 1642502819,

4851

'channel': 'Sky News',

4852

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4853

'age_limit': 0,

4854

'view_count': int,

4855

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

4856

'playable_in_embed': True,

4857

'release_date': '20220118',

4858

'availability': 'public',

4859

'live_status': 'is_live',

4860

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4861

'channel_follower_count': int

4862

},

4863

'params': {

4864

'skip_download': True,

4865

},

4866

'expected_warnings': ['Ignoring subtitle tracks found in '],

4867

}, {

4868

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4873

'uploader': 'The Young Turks',

4874

'uploader_id': 'TheYoungTurks',

4875

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4876

'upload_date': '20150715',

4877

'license': 'Standard YouTube License',

4878

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4879

'categories': ['News & Politics'],

4880

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4885

},

4886

'only_matching': True,

4887

}, {

4888

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4889

'only_matching': True,

4890

}, {

4891

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4892

'only_matching': True,

4893

}, {

4894

'note': 'A channel that is not live. Should raise error',

4895

'url': 'https://www.youtube.com/user/numberphile/live',

4896

'only_matching': True,

4897

}, {

4898

'url': 'https://www.youtube.com/feed/trending',

4899

'only_matching': True,

4900

}, {

4901

'url': 'https://www.youtube.com/feed/library',

4902

'only_matching': True,

4903

}, {

4904

'url': 'https://www.youtube.com/feed/history',

4905

'only_matching': True,

4906

}, {

4907

'url': 'https://www.youtube.com/feed/subscriptions',

4908

'only_matching': True,

4909

}, {

4910

'url': 'https://www.youtube.com/feed/watch_later',

4911

'only_matching': True,

4912

}, {

4913

'note': 'Recommended - redirects to home page.',

4914

'url': 'https://www.youtube.com/feed/recommended',

4915

'only_matching': True,

4916

}, {

4917

'note': 'inline playlist with not always working continuations',

4918

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

4919

'only_matching': True,

4920

}, {

4921

'url': 'https://www.youtube.com/course',

4922

'only_matching': True,

4923

}, {

4924

'url': 'https://www.youtube.com/zsecurity',

4925

'only_matching': True,

4926

}, {

4927

'url': 'http://www.youtube.com/NASAgovVideo/videos',

4928

'only_matching': True,

4929

}, {

4930

'url': 'https://www.youtube.com/TheYoungTurks/live',

4931

'only_matching': True,

4932

}, {

4933

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

4940

}, {

4941

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

4942

'only_matching': True,

4943

}, {

4944

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

4945

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4946

'only_matching': True

4947

}, {

4948

'note': '/browse/ should redirect to /channel/',

4949

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

4950

'only_matching': True

4951

}, {

4952

'note': 'VLPL, should redirect to playlist?list=PL...',

4953

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4954

'info_dict': {

4955

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4956

'uploader': 'NoCopyrightSounds',

4957

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

4958

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4959

'title': 'NCS Releases',

4960

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4961

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4962

'modified_date': r're:\d{8}',

4963

'view_count': int,

4964

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4965

'tags': [],

4966

'channel': 'NoCopyrightSounds',

4967

},

4968

'playlist_mincount': 166,

4969

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4970

}, {

4971

'note': 'Topic, should redirect to playlist?list=UU...',

4972

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4973

'info_dict': {

4974

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4975

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4976

'title': 'Uploads from Royalty Free Music - Topic',

4977

'uploader': 'Royalty Free Music - Topic',

4978

'tags': [],

4979

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4980

'channel': 'Royalty Free Music - Topic',

4981

'view_count': int,

4982

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4983

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4984

'modified_date': r're:\d{8}',

4985

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4986

'description': '',

4987

},

4988

'expected_warnings': [

4989

'The URL does not have a videos tab',

4990

r'[Uu]navailable videos (are|will be) hidden',

4991

],

4992

'playlist_mincount': 101,

4993

}, {

4994

'note': 'Topic without a UU playlist',

4995

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

4996

'info_dict': {

4997

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

4998

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

4999

'tags': [],

5000

},

5001

'expected_warnings': [

5002

'the playlist redirect gave error',

5003

],

5004

'playlist_mincount': 9,

5005

}, {

5006

'note': 'Youtube music Album',

5007

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5008

'info_dict': {

5009

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5010

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5015

'modified_date': r're:\d{8}',

5016

},

5017

'playlist_count': 50,

5018

}, {

5019

'note': 'unlisted single video playlist',

5020

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5021

'info_dict': {

5022

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5023

'uploader': 'colethedj',

5024

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5025

'title': 'yt-dlp unlisted playlist test',

5026

'availability': 'unlisted',

5027

'tags': [],

5028

'modified_date': '20211208',

5029

'channel': 'colethedj',

5030

'view_count': int,

5031

'description': '',

5032

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5033

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5034

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5039

'url': 'https://www.youtube.com/feed/recommended',

5040

'info_dict': {

5041

'id': 'recommended',

5042

'title': 'recommended',

5043

'tags': [],

5044

},

5045

'playlist_mincount': 50,

5046

'params': {

5047

'skip_download': True,

5048

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5049

},

5050

}, {

5051

'note': 'API Fallback: /videos tab, sorted by oldest first',

5052

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5053

'info_dict': {

5054

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5055

'title': 'Cody\'sLab - Videos',

5056

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5057

'uploader': 'Cody\'sLab',

5058

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5059

'channel': 'Cody\'sLab',

5060

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5061

'tags': [],

5062

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5063

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5064

'channel_follower_count': int

5065

},

5066

'playlist_mincount': 650,

5067

'params': {

5068

'skip_download': True,

5069

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5070

},

5071

}, {

5072

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5073

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5074

'info_dict': {

5075

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5076

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5077

'title': 'Uploads from Royalty Free Music - Topic',

5078

'uploader': 'Royalty Free Music - Topic',

5079

'modified_date': r're:\d{8}',

5080

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5081

'description': '',

5082

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5083

'tags': [],

5084

'channel': 'Royalty Free Music - Topic',

5085

'view_count': int,

5086

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5087

},

5088

'expected_warnings': [

5089

'does not have a videos tab',

5090

r'[Uu]navailable videos (are|will be) hidden',

5091

],

5092

'playlist_mincount': 101,

5093

'params': {

5094

'skip_download': True,

5095

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5096

},

5097

}, {

5098

'note': 'non-standard redirect to regional channel',

5099

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5100

'only_matching': True

}]

@classmethod

def suitable(cls, url):

5105

return False if YoutubeIE.suitable(url) else super(

5106

YoutubeTabIE, cls).suitable(url)

5107

5108

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5109

5110

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5111

def _real_extract(self, url, smuggled_data):

5112

item_id = self._match_id(url)

5113

url = compat_urlparse.urlunparse(

5114

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5115

compat_opts = self.get_param('compat_opts', [])

5116

5117

def get_mobj(url):

5118

mobj = self._URL_RE.match(url).groupdict()

5119

mobj.update((k, '') for k, v in mobj.items() if v is None)

5120

return mobj

5121

5122

mobj, redirect_warning = get_mobj(url), None

5123

# Youtube returns incomplete data if tabname is not lower case

5124

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5125

if is_channel:

5126

if smuggled_data.get('is_music_url'):

5127

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5128

item_id = item_id[2:]

5129

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5130

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5131

mdata = self._extract_tab_endpoint(

5132

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5133

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5134

get_all=False, expected_type=compat_str)

5135

if not murl:

5136

raise ExtractorError('Failed to resolve album to playlist')

5137

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5138

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5139

pre = f'https://www.youtube.com/channel/{item_id}'

5140

5141

original_tab_name = tab

5142

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5143

# Home URLs should redirect to /videos/

5144

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5145

'To download only the videos in the home page, add a "/featured" to the URL')

5146

tab = '/videos'

5147

5148

url = ''.join((pre, tab, post))

5149

mobj = get_mobj(url)

5150

5151

# Handle both video/playlist URLs

5152

qs = parse_qs(url)

5153

video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]

5154

5155

if not video_id and mobj['not_channel'].startswith('watch'):

5156

if not playlist_id:

5157

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5158

raise ExtractorError('Unable to recognize tab page')

5159

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5160

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5161

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5162

mobj = get_mobj(url)

5163

5164

if video_id and playlist_id:

5165

if self.get_param('noplaylist'):

5166

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5167

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5168

ie=YoutubeIE.ie_key(), video_id=video_id)

5169

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5170

5171

data, ytcfg = self._extract_data(url, item_id)

5172

5173

# YouTube may provide a non-standard redirect to the regional channel

5174

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5175

redirect_url = traverse_obj(

5176

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5177

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5178

redirect_url = ''.join((

5179

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5180

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5181

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5182

5183

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5184

if tabs:

5185

selected_tab = self._extract_selected_tab(tabs)

5186

selected_tab_name = selected_tab.get('title', '').lower()

5187

if selected_tab_name == 'home':

5188

selected_tab_name = 'featured'

5189

requested_tab_name = mobj['tab'][1:]

5190

if 'no-youtube-channel-redirect' not in compat_opts:

5191

if requested_tab_name == 'live':

5192

# Live tab should have redirected to the video

5193

raise ExtractorError('The channel is not currently live', expected=True)

5194

if requested_tab_name not in ('', selected_tab_name):

5195

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5196

if not original_tab_name:

5197

if item_id[:2] == 'UC':

5198

# Topic channels don't have /videos. Use the equivalent playlist instead

5199

pl_id = f'UU{item_id[2:]}'

5200

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5201

try:

5202

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5203

except ExtractorError:

5204

redirect_warning += ' and the playlist redirect gave error'

5205

else:

5206

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5207

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5208

if selected_tab_name and selected_tab_name != requested_tab_name:

5209

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5210

else:

5211

raise ExtractorError(redirect_warning, expected=True)

5212

5213

if redirect_warning:

5214

self.to_screen(redirect_warning)

5215

self.write_debug(f'Final URL: {url}')

5216

5217

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5218

if 'no-youtube-unavailable-videos' not in compat_opts:

5219

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5220

self._extract_and_report_alerts(data, only_once=True)

5221

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5222

if tabs:

5223

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5224

5225

playlist = traverse_obj(

5226

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5227

if playlist:

5228

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5229

5230

video_id = traverse_obj(

5231

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5232

if video_id:

5233

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5234

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5235

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5236

ie=YoutubeIE.ie_key(), video_id=video_id)

5237

5238

raise ExtractorError('Unable to recognize tab page')

5239

5240

5241

class YoutubePlaylistIE(InfoExtractor):

5242

IE_DESC = 'YouTube playlists'

5243

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5254

)''' % {

5255

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5256

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5257

}

5258

IE_NAME = 'youtube:playlist'

5259

_TESTS = [{

5260

'note': 'issue #673',

5261

'url': 'PLBB231211A4F62143',

5262

'info_dict': {

5263

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5264

'id': 'PLBB231211A4F62143',

5265

'uploader': 'Wickman',

5266

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5267

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5268

'view_count': int,

5269

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5270

'modified_date': r're:\d{8}',

5271

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5272

'channel': 'Wickman',

5273

'tags': [],

5274

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5275

},

5276

'playlist_mincount': 29,

5277

}, {

5278

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5279

'info_dict': {

5280

'title': 'YDL_safe_search',

5281

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5282

},

5283

'playlist_count': 2,

5284

'skip': 'This playlist is private',

5285

}, {

5286

'note': 'embedded',

5287

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5292

'uploader': 'milan',

5293

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5294

'description': '',

5295

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5296

'tags': [],

5297

'modified_date': '20140919',

5298

'view_count': int,

5299

'channel': 'milan',

5300

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5301

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5302

},

5303

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5304

}, {

5305

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5306

'playlist_mincount': 654,

5307

'info_dict': {

5308

'title': '2018 Chinese New Singles (11/6 updated)',

5309

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5310

'uploader': 'LBK',

5311

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5312

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5313

'channel': 'LBK',

5314

'view_count': int,

5315

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5316

'tags': [],

5317

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5318

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5319

'modified_date': r're:\d{8}',

5320

},

5321

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5322

}, {

5323

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5324

'only_matching': True,

5325

}, {

5326

# music album playlist

5327

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5328

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5333

if YoutubeTabIE.suitable(url):

5334

return False

5335

from ..utils import parse_qs

5336

qs = parse_qs(url)

5337

if qs.get('v', [None])[0]:

5338

return False

5339

return super(YoutubePlaylistIE, cls).suitable(url)

5340

5341

def _real_extract(self, url):

5342

playlist_id = self._match_id(url)

5343

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5344

url = update_url_query(

5345

'https://www.youtube.com/playlist',

5346

parse_qs(url) or {'list': playlist_id})

5347

if is_music_url:

5348

url = smuggle_url(url, {'is_music_url': True})

5349

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5350

5351

5352

class YoutubeYtBeIE(InfoExtractor):

5353

IE_DESC = 'youtu.be'

5354

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5355

_TESTS = [{

5356

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5361

'uploader': 'Backus-Page House Museum',

5362

'uploader_id': 'backuspagemuseum',

5363

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5364

'upload_date': '20161008',

5365

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5366

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5371

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5372

'channel': 'Backus-Page House Museum',

5373

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5374

'live_status': 'not_live',

5375

'view_count': int,

5376

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5377

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5383

},

5384

}, {

5385

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5386

'only_matching': True,

5387

}]

5388

5389

def _real_extract(self, url):

5390

mobj = self._match_valid_url(url)

5391

video_id = mobj.group('id')

5392

playlist_id = mobj.group('playlist_id')

5393

return self.url_result(

5394

update_url_query('https://www.youtube.com/watch', {

5395

'v': video_id,

5396

'list': playlist_id,

5397

'feature': 'youtu.be',

5398

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5399

5400

5401

class YoutubeLivestreamEmbedIE(InfoExtractor):

5402

IE_DESC = 'YouTube livestream embeds'

5403

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5404

_TESTS = [{

5405

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5406

'only_matching': True,

5407

}]

5408

5409

def _real_extract(self, url):

5410

channel_id = self._match_id(url)

5411

return self.url_result(

5412

f'https://www.youtube.com/channel/{channel_id}/live',

5413

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5414

5415

5416

class YoutubeYtUserIE(InfoExtractor):

5417

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5418

IE_NAME = 'youtube:user'

5419

_VALID_URL = r'ytuser:(?P<id>.+)'

5420

_TESTS = [{

5421

'url': 'ytuser:phihag',

5422

'only_matching': True,

5423

}]

5424

5425

def _real_extract(self, url):

5426

user_id = self._match_id(url)

5427

return self.url_result(

5428

'https://www.youtube.com/user/%s/videos' % user_id,

5429

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5430

5431

5432

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5433

IE_NAME = 'youtube:favorites'

5434

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5435

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5436

_LOGIN_REQUIRED = True

5437

_TESTS = [{

5438

'url': ':ytfav',

5439

'only_matching': True,

5440

}, {

5441

'url': ':ytfavorites',

5442

'only_matching': True,

5443

}]

5444

5445

def _real_extract(self, url):

5446

return self.url_result(

5447

'https://www.youtube.com/playlist?list=LL',

5448

ie=YoutubeTabIE.ie_key())

5449

5450

5451

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5452

IE_DESC = 'YouTube search'

5453

IE_NAME = 'youtube:search'

5454

_SEARCH_KEY = 'ytsearch'

5455

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5456

_TESTS = [{

5457

'url': 'ytsearch5:youtube-dl test video',

5458

'playlist_count': 5,

5459

'info_dict': {

5460

'id': 'youtube-dl test video',

5461

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5467

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5468

_SEARCH_KEY = 'ytsearchdate'

5469

IE_DESC = 'YouTube search, newest videos first'

5470

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5471

_TESTS = [{

5472

'url': 'ytsearchdate5:youtube-dl test video',

5473

'playlist_count': 5,

5474

'info_dict': {

5475

'id': 'youtube-dl test video',

5476

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5482

IE_DESC = 'YouTube search URLs with sorting and filter support'

5483

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5484

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5485

_TESTS = [{

5486

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5487

'playlist_mincount': 5,

5488

'info_dict': {

5489

'id': 'youtube-dl test video',

5490

'title': 'youtube-dl test video',

5491

}

5492

}, {

5493

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5494

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5502

'only_matching': True,

5503

}]

5504

5505

def _real_extract(self, url):

5506

qs = parse_qs(url)

5507

query = (qs.get('search_query') or qs.get('q'))[0]

5508

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5509

5510

5511

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5512

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5513

IE_NAME = 'youtube:music:search_url'

5514

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5515

_TESTS = [{

5516

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5517

'playlist_count': 16,

5518

'info_dict': {

5519

'id': 'royalty free music',

5520

'title': 'royalty free music',

5521

}

5522

}, {

5523

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5524

'playlist_mincount': 30,

5525

'info_dict': {

5526

'id': 'royalty free music - songs',

5527

'title': 'royalty free music - songs',

5528

},

5529

'params': {'extract_flat': 'in_playlist'}

5530

}, {

5531

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5532

'playlist_mincount': 30,

5533

'info_dict': {

5534

'id': 'royalty free music - community playlists',

5535

'title': 'royalty free music - community playlists',

5536

},

5537

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5542

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5543

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5544

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5545

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5546

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5547

}

5548

5549

def _real_extract(self, url):

5550

qs = parse_qs(url)

5551

query = (qs.get('search_query') or qs.get('q'))[0]

5552

params = qs.get('sp', (None,))[0]

5553

if params:

5554

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5555

else:

5556

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5557

params = self._SECTIONS.get(section)

5558

if not params:

5559

section = None

5560

title = join_nonempty(query, section, delim=' - ')

5561

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5562

5563

5564

class YoutubeFeedsInfoExtractor(InfoExtractor):

5565

"""

5566

Base class for feed extractors

5567

Subclasses must define the _FEED_NAME property.

5568

"""

5569

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5575

5576

def _real_extract(self, url):

5577

return self.url_result(

5578

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5579

5580

5581

class YoutubeWatchLaterIE(InfoExtractor):

5582

IE_NAME = 'youtube:watchlater'

5583

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5584

_VALID_URL = r':ytwatchlater'

5585

_TESTS = [{

5586

'url': ':ytwatchlater',

5587

'only_matching': True,

5588

}]

5589

5590

def _real_extract(self, url):

5591

return self.url_result(

5592

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5593

5594

5595

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5596

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5597

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5598

_FEED_NAME = 'recommended'

5599

_LOGIN_REQUIRED = False

5600

_TESTS = [{

5601

'url': ':ytrec',

5602

'only_matching': True,

5603

}, {

5604

'url': ':ytrecommended',

5605

'only_matching': True,

5606

}, {

5607

'url': 'https://youtube.com',

5608

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5613

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5614

_VALID_URL = r':ytsub(?:scription)?s?'

5615

_FEED_NAME = 'subscriptions'

5616

_TESTS = [{

5617

'url': ':ytsubs',

5618

'only_matching': True,

5619

}, {

5620

'url': ':ytsubscriptions',

5621

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5626

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5627

_VALID_URL = r':ythis(?:tory)?'

5628

_FEED_NAME = 'history'

5629

_TESTS = [{

5630

'url': ':ythistory',

5631

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

5636

IE_NAME = 'youtube:truncated_url'

5637

IE_DESC = False # Do not list

5638

_VALID_URL = r'''(?x)

5639

(?:https?://)?

5640

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5641

(?:watch\?(?:

5642

feature=[a-z_]+|

5643

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5656

'only_matching': True,

5657

}, {

5658

'url': 'https://www.youtube.com/watch?',

5659

'only_matching': True,

5660

}, {

5661

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5662

'only_matching': True,

5663

}, {

5664

'url': 'https://www.youtube.com/watch?feature=foo',

5665

'only_matching': True,

5666

}, {

5667

'url': 'https://www.youtube.com/watch?hl=en-GB',

5668

'only_matching': True,

5669

}, {

5670

'url': 'https://www.youtube.com/watch?t=2372',

5671

'only_matching': True,

5672

}]

5673

5674

def _real_extract(self, url):

5675

raise ExtractorError(

5676

'Did you forget to quote the URL? Remember that & is a meta '

5677

'character in most shells, so you want to put the URL in quotes, '

5678

'like youtube-dl '

5679

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5680

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5685

IE_NAME = 'youtube:clip'

5686

IE_DESC = False # Do not list

5687

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5688

5689

def _real_extract(self, url):

5690

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5691

return self.url_result(url, 'Generic')

5692

5693

5694

class YoutubeTruncatedIDIE(InfoExtractor):

5695

IE_NAME = 'youtube:truncated_id'

5696

IE_DESC = False # Do not list

5697

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5698

5699

_TESTS = [{

5700

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5701

'only_matching': True,

5702

}]

5703

5704

def _real_extract(self, url):

5705

video_id = self._match_id(url)

5706

raise ExtractorError(

5707

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

5708

expected=True)