jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import calendar
	6	import copy
	7	import datetime
	8	import functools
	9	import hashlib
	10	import itertools
	11	import json
	12	import math
	13	import os.path
	14	import random
	15	import re
	16	import sys
	17	import time
	18	import traceback
	19	import threading
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from ..compat import (
	23	compat_chr,
	24	compat_HTTPError,
	25	compat_parse_qs,
	26	compat_str,
	27	compat_urllib_parse_unquote_plus,
	28	compat_urllib_parse_urlencode,
	29	compat_urllib_parse_urlparse,
	30	compat_urlparse,
	31	)
	32	from ..jsinterp import JSInterpreter
	33	from ..utils import (
	34	bug_reports_message,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	ExtractorError,
	40	float_or_none,
	41	format_field,
	42	get_first,
	43	int_or_none,
	44	is_html,
	45	join_nonempty,
	46	js_to_json,
	47	mimetype2ext,
	48	network_exceptions,
	49	NO_DEFAULT,
	50	orderedSet,
	51	parse_codecs,
	52	parse_count,
	53	parse_duration,
	54	parse_iso8601,
	55	parse_qs,
	56	qualities,
	57	remove_end,
	58	remove_start,
	59	smuggle_url,
	60	str_or_none,
	61	str_to_int,
	62	strftime_or_none,
	63	traverse_obj,
	64	try_get,
	65	unescapeHTML,
	66	unified_strdate,
	67	unified_timestamp,
	68	unsmuggle_url,
	69	update_url_query,
	70	url_or_none,
	71	urljoin,
	72	variadic,
	73	)
	74
	75
	76	# any clients starting with _ cannot be explicity requested by the user
	77	INNERTUBE_CLIENTS = {
	78	'web': {
	79	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	80	'INNERTUBE_CONTEXT': {
	81	'client': {
	82	'clientName': 'WEB',
	83	'clientVersion': '2.20211221.00.00',
	84	}
	85	},
	86	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	87	},
	88	'web_embedded': {
	89	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	90	'INNERTUBE_CONTEXT': {
	91	'client': {
	92	'clientName': 'WEB_EMBEDDED_PLAYER',
	93	'clientVersion': '1.20211215.00.01',
	94	},
	95	},
	96	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	97	},
	98	'web_music': {
	99	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	100	'INNERTUBE_HOST': 'music.youtube.com',
	101	'INNERTUBE_CONTEXT': {
	102	'client': {
	103	'clientName': 'WEB_REMIX',
	104	'clientVersion': '1.20211213.00.00',
	105	}
	106	},
	107	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	108	},
	109	'web_creator': {
	110	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	111	'INNERTUBE_CONTEXT': {
	112	'client': {
	113	'clientName': 'WEB_CREATOR',
	114	'clientVersion': '1.20211220.02.00',
	115	}
	116	},
	117	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	118	},
	119	'android': {
	120	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	121	'INNERTUBE_CONTEXT': {
	122	'client': {
	123	'clientName': 'ANDROID',
	124	'clientVersion': '16.49',
	125	}
	126	},
	127	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	128	'REQUIRE_JS_PLAYER': False
	129	},
	130	'android_embedded': {
	131	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	132	'INNERTUBE_CONTEXT': {
	133	'client': {
	134	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	135	'clientVersion': '16.49',
	136	},
	137	},
	138	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	139	'REQUIRE_JS_PLAYER': False
	140	},
	141	'android_music': {
	142	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	143	'INNERTUBE_CONTEXT': {
	144	'client': {
	145	'clientName': 'ANDROID_MUSIC',
	146	'clientVersion': '4.57',
	147	}
	148	},
	149	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	150	'REQUIRE_JS_PLAYER': False
	151	},
	152	'android_creator': {
	153	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	154	'INNERTUBE_CONTEXT': {
	155	'client': {
	156	'clientName': 'ANDROID_CREATOR',
	157	'clientVersion': '21.47',
	158	},
	159	},
	160	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	161	'REQUIRE_JS_PLAYER': False
	162	},
	163	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	164	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	165	'ios': {
	166	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS',
	170	'clientVersion': '16.46',
	171	'deviceModel': 'iPhone14,3',
	172	}
	173	},
	174	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	175	'REQUIRE_JS_PLAYER': False
	176	},
	177	'ios_embedded': {
	178	'INNERTUBE_CONTEXT': {
	179	'client': {
	180	'clientName': 'IOS_MESSAGES_EXTENSION',
	181	'clientVersion': '16.46',
	182	'deviceModel': 'iPhone14,3',
	183	},
	184	},
	185	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	186	'REQUIRE_JS_PLAYER': False
	187	},
	188	'ios_music': {
	189	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	190	'INNERTUBE_CONTEXT': {
	191	'client': {
	192	'clientName': 'IOS_MUSIC',
	193	'clientVersion': '4.57',
	194	},
	195	},
	196	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	197	'REQUIRE_JS_PLAYER': False
	198	},
	199	'ios_creator': {
	200	'INNERTUBE_CONTEXT': {
	201	'client': {
	202	'clientName': 'IOS_CREATOR',
	203	'clientVersion': '21.47',
	204	},
	205	},
	206	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	207	'REQUIRE_JS_PLAYER': False
	208	},
	209	# mweb has 'ultralow' formats
	210	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	211	'mweb': {
	212	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	213	'INNERTUBE_CONTEXT': {
	214	'client': {
	215	'clientName': 'MWEB',
	216	'clientVersion': '2.20211221.01.00',
	217	}
	218	},
	219	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	220	},
	221	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	222	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	223	'tv_embedded': {
	224	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	225	'INNERTUBE_CONTEXT': {
	226	'client': {
	227	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	228	'clientVersion': '2.0',
	229	},
	230	},
	231	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	232	},
	233	}
	234
	235
	236	def _split_innertube_client(client_name):
	237	variant, *base = client_name.rsplit('.', 1)
	238	if base:
	239	return variant, base[0], variant
	240	base, *variant = client_name.split('_', 1)
	241	return client_name, base, variant[0] if variant else None
	242
	243
	244	def build_innertube_clients():
	245	THIRD_PARTY = {
	246	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	247	}
	248	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	249	priority = qualities(BASE_CLIENTS[::-1])
	250
	251	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	252	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	253	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	254	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	255	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	256
	257	_, base_client, variant = _split_innertube_client(client)
	258	ytcfg['priority'] = 10 * priority(base_client)
	259
	260	if not variant:
	261	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	262	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	263	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	264	embedscreen['priority'] -= 3
	265	elif variant == 'embedded':
	266	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	267	ytcfg['priority'] -= 2
	268	else:
	269	ytcfg['priority'] -= 3
	270
	271
	272	build_innertube_clients()
	273
	274
	275	class YoutubeBaseInfoExtractor(InfoExtractor):
	276	"""Provide base functions for Youtube extractors"""
	277
	278	_RESERVED_NAMES = (
	279	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	280	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	281	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	282	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	283
	284	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	285
	286	# _NETRC_MACHINE = 'youtube'
	287
	288	# If True it will raise an error if no login info is provided
	289	_LOGIN_REQUIRED = False
	290
	291	_INVIDIOUS_SITES = (
	292	# invidious-redirect websites
	293	r'(?:www\.)?redirect\.invidious\.io',
	294	r'(?:(?:www\|dev)\.)?invidio\.us',
	295	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
	296	r'(?:www\.)?invidious\.pussthecat\.org',
	297	r'(?:www\.)?invidious\.zee\.li',
	298	r'(?:www\.)?invidious\.ethibox\.fr',
	299	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	300	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	301	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	302	# youtube-dl invidious instances list
	303	r'(?:(?:www\|no)\.)?invidiou\.sh',
	304	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	305	r'(?:www\.)?invidious\.kabi\.tk',
	306	r'(?:www\.)?invidious\.mastodon\.host',
	307	r'(?:www\.)?invidious\.zapashcanon\.fr',
	308	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	309	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	310	r'(?:www\.)?invidious\.himiko\.cloud',
	311	r'(?:www\.)?invidious\.reallyancient\.tech',
	312	r'(?:www\.)?invidious\.tube',
	313	r'(?:www\.)?invidiou\.site',
	314	r'(?:www\.)?invidious\.site',
	315	r'(?:www\.)?invidious\.xyz',
	316	r'(?:www\.)?invidious\.nixnet\.xyz',
	317	r'(?:www\.)?invidious\.048596\.xyz',
	318	r'(?:www\.)?invidious\.drycat\.fr',
	319	r'(?:www\.)?inv\.skyn3t\.in',
	320	r'(?:www\.)?tube\.poal\.co',
	321	r'(?:www\.)?tube\.connect\.cafe',
	322	r'(?:www\.)?vid\.wxzm\.sx',
	323	r'(?:www\.)?vid\.mint\.lgbt',
	324	r'(?:www\.)?vid\.puffyan\.us',
	325	r'(?:www\.)?yewtu\.be',
	326	r'(?:www\.)?yt\.elukerio\.org',
	327	r'(?:www\.)?yt\.lelux\.fi',
	328	r'(?:www\.)?invidious\.ggc-project\.de',
	329	r'(?:www\.)?yt\.maisputain\.ovh',
	330	r'(?:www\.)?ytprivate\.com',
	331	r'(?:www\.)?invidious\.13ad\.de',
	332	r'(?:www\.)?invidious\.toot\.koeln',
	333	r'(?:www\.)?invidious\.fdn\.fr',
	334	r'(?:www\.)?watch\.nettohikari\.com',
	335	r'(?:www\.)?invidious\.namazso\.eu',
	336	r'(?:www\.)?invidious\.silkky\.cloud',
	337	r'(?:www\.)?invidious\.exonip\.de',
	338	r'(?:www\.)?invidious\.riverside\.rocks',
	339	r'(?:www\.)?invidious\.blamefran\.net',
	340	r'(?:www\.)?invidious\.moomoo\.de',
	341	r'(?:www\.)?ytb\.trom\.tf',
	342	r'(?:www\.)?yt\.cyberhost\.uk',
	343	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	344	r'(?:www\.)?qklhadlycap4cnod\.onion',
	345	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	346	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	347	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	348	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	349	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	350	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	351	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	352	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	353	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	354	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	355	)
	356
	357	def _initialize_consent(self):
	358	cookies = self._get_cookies('https://www.youtube.com/')
	359	if cookies.get('__Secure-3PSID'):
	360	return
	361	consent_id = None
	362	consent = cookies.get('CONSENT')
	363	if consent:
	364	if 'YES' in consent.value:
	365	return
	366	consent_id = self._search_regex(
	367	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	368	if not consent_id:
	369	consent_id = random.randint(100, 999)
	370	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	371
	372	def _initialize_pref(self):
	373	cookies = self._get_cookies('https://www.youtube.com/')
	374	pref_cookie = cookies.get('PREF')
	375	pref = {}
	376	if pref_cookie:
	377	try:
	378	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	379	except ValueError:
	380	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	381	pref.update({'hl': 'en', 'tz': 'UTC'})
	382	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	383
	384	def _real_initialize(self):
	385	self._initialize_pref()
	386	self._initialize_consent()
	387	if (self._LOGIN_REQUIRED
	388	and self.get_param('cookiefile') is None
	389	and self.get_param('cookiesfrombrowser') is None):
	390	self.raise_login_required('Login details are needed to download this content', method='cookies')
	391
	392	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	393	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	394	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	395
	396	def _get_default_ytcfg(self, client='web'):
	397	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	398
	399	def _get_innertube_host(self, client='web'):
	400	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	401
	402	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	403	# try_get but with fallback to default ytcfg client values when present
	404	_func = lambda y: try_get(y, getter, expected_type)
	405	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	406
	407	def _extract_client_name(self, ytcfg, default_client='web'):
	408	return self._ytcfg_get_safe(
	409	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	410	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	411
	412	def _extract_client_version(self, ytcfg, default_client='web'):
	413	return self._ytcfg_get_safe(
	414	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	415	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	416
	417	def _extract_api_key(self, ytcfg=None, default_client='web'):
	418	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	419
	420	def _extract_context(self, ytcfg=None, default_client='web'):
	421	context = get_first(
	422	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	423	# Enforce language and tz for extraction
	424	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	425	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	426	return context
	427
	428	_SAPISID = None
	429
	430	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	431	time_now = round(time.time())
	432	if self._SAPISID is None:
	433	yt_cookies = self._get_cookies('https://www.youtube.com')
	434	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	435	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	436	sapisid_cookie = dict_get(
	437	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	438	if sapisid_cookie and sapisid_cookie.value:
	439	self._SAPISID = sapisid_cookie.value
	440	self.write_debug('Extracted SAPISID cookie')
	441	# SAPISID cookie is required if not already present
	442	if not yt_cookies.get('SAPISID'):
	443	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	444	self._set_cookie(
	445	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	446	else:
	447	self._SAPISID = False
	448	if not self._SAPISID:
	449	return None
	450	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	451	sapisidhash = hashlib.sha1(
	452	f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
	453	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	454
	455	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	456	note='Downloading API JSON', errnote='Unable to download API page',
	457	context=None, api_key=None, api_hostname=None, default_client='web'):
	458
	459	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	460	data.update(query)
	461	real_headers = self.generate_api_headers(default_client=default_client)
	462	real_headers.update({'content-type': 'application/json'})
	463	if headers:
	464	real_headers.update(headers)
	465	return self._download_json(
	466	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	467	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	468	data=json.dumps(data).encode('utf8'), headers=real_headers,
	469	query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
	470
	471	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	472	data = self._search_regex(
	473	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	474	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	475	if data:
	476	return self._parse_json(data, item_id, fatal=fatal)
	477
	478	@staticmethod
	479	def _extract_session_index(*data):
	480	"""
	481	Index of current account in account list.
	482	See: https://github.com/yt-dlp/yt-dlp/pull/519
	483	"""
	484	for ytcfg in data:
	485	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	486	if session_index is not None:
	487	return session_index
	488
	489	# Deprecated?
	490	def _extract_identity_token(self, ytcfg=None, webpage=None):
	491	if ytcfg:
	492	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	493	if token:
	494	return token
	495	if webpage:
	496	return self._search_regex(
	497	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	498	'identity token', default=None, fatal=False)
	499
	500	@staticmethod

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import time

import traceback

import threading

from .common import InfoExtractor, SearchInfoExtractor

22

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

28

compat_urllib_parse_urlencode,

29

compat_urllib_parse_urlparse,

30

compat_urlparse,

31

)

32

from ..jsinterp import JSInterpreter

33

from ..utils import (

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

NO_DEFAULT,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

77

INNERTUBE_CLIENTS = {

78

'web': {

79

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

80

'INNERTUBE_CONTEXT': {

81

'client': {

82

'clientName': 'WEB',

83

'clientVersion': '2.20211221.00.00',

84

}

85

},

86

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

87

},

88

'web_embedded': {

89

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

90

'INNERTUBE_CONTEXT': {

91

'client': {

92

'clientName': 'WEB_EMBEDDED_PLAYER',

93

'clientVersion': '1.20211215.00.01',

94

},

95

},

96

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

97

},

98

'web_music': {

99

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

100

'INNERTUBE_HOST': 'music.youtube.com',

101

'INNERTUBE_CONTEXT': {

102

'client': {

103

'clientName': 'WEB_REMIX',

104

'clientVersion': '1.20211213.00.00',

105

}

106

},

107

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

108

},

109

'web_creator': {

110

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

111

'INNERTUBE_CONTEXT': {

112

'client': {

113

'clientName': 'WEB_CREATOR',

114

'clientVersion': '1.20211220.02.00',

115

}

116

},

117

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

118

},

119

'android': {

120

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

121

'INNERTUBE_CONTEXT': {

122

'client': {

123

'clientName': 'ANDROID',

124

'clientVersion': '16.49',

125

}

126

},

127

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

128

'REQUIRE_JS_PLAYER': False

129

},

130

'android_embedded': {

131

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

132

'INNERTUBE_CONTEXT': {

133

'client': {

134

'clientName': 'ANDROID_EMBEDDED_PLAYER',

135

'clientVersion': '16.49',

136

},

137

},

138

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

139

'REQUIRE_JS_PLAYER': False

140

},

141

'android_music': {

142

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

143

'INNERTUBE_CONTEXT': {

144

'client': {

145

'clientName': 'ANDROID_MUSIC',

146

'clientVersion': '4.57',

147

}

148

},

149

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

150

'REQUIRE_JS_PLAYER': False

151

},

152

'android_creator': {

153

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

154

'INNERTUBE_CONTEXT': {

155

'client': {

156

'clientName': 'ANDROID_CREATOR',

157

'clientVersion': '21.47',

158

},

159

},

160

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

161

'REQUIRE_JS_PLAYER': False

162

},

163

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

164

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

165

'ios': {

166

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS',

170

'clientVersion': '16.46',

171

'deviceModel': 'iPhone14,3',

172

}

173

},

174

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

175

'REQUIRE_JS_PLAYER': False

176

},

177

'ios_embedded': {

178

'INNERTUBE_CONTEXT': {

179

'client': {

180

'clientName': 'IOS_MESSAGES_EXTENSION',

181

'clientVersion': '16.46',

182

'deviceModel': 'iPhone14,3',

183

},

184

},

185

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

186

'REQUIRE_JS_PLAYER': False

187

},

188

'ios_music': {

189

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

190

'INNERTUBE_CONTEXT': {

191

'client': {

192

'clientName': 'IOS_MUSIC',

193

'clientVersion': '4.57',

194

},

195

},

196

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

197

'REQUIRE_JS_PLAYER': False

198

},

199

'ios_creator': {

200

'INNERTUBE_CONTEXT': {

201

'client': {

202

'clientName': 'IOS_CREATOR',

203

'clientVersion': '21.47',

204

},

205

},

206

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

207

'REQUIRE_JS_PLAYER': False

208

},

209

# mweb has 'ultralow' formats

210

# See: https://github.com/yt-dlp/yt-dlp/pull/557

211

'mweb': {

212

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

213

'INNERTUBE_CONTEXT': {

214

'client': {

215

'clientName': 'MWEB',

216

'clientVersion': '2.20211221.01.00',

217

}

218

},

219

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

220

},

221

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

222

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

223

'tv_embedded': {

224

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

225

'INNERTUBE_CONTEXT': {

226

'client': {

227

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

228

'clientVersion': '2.0',

229

},

230

},

231

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

237

variant, *base = client_name.rsplit('.', 1)

238

if base:

239

return variant, base[0], variant

240

base, *variant = client_name.split('_', 1)

241

return client_name, base, variant[0] if variant else None

242

243

244

def build_innertube_clients():

245

THIRD_PARTY = {

246

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

247

}

248

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

249

priority = qualities(BASE_CLIENTS[::-1])

250

251

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

252

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

253

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

254

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

255

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

256

257

_, base_client, variant = _split_innertube_client(client)

258

ytcfg['priority'] = 10 * priority(base_client)

259

260

if not variant:

261

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

262

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

263

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

264

embedscreen['priority'] -= 3

265

elif variant == 'embedded':

266

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

267

ytcfg['priority'] -= 2

268

else:

269

ytcfg['priority'] -= 3

270

271

272

build_innertube_clients()

273

274

275

class YoutubeBaseInfoExtractor(InfoExtractor):

276

"""Provide base functions for Youtube extractors"""

277

278

_RESERVED_NAMES = (

279

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

285

286

# _NETRC_MACHINE = 'youtube'

287

288

# If True it will raise an error if no login info is provided

289

_LOGIN_REQUIRED = False

290

291

_INVIDIOUS_SITES = (

292

# invidious-redirect websites

293

r'(?:www\.)?redirect\.invidious\.io',

294

r'(?:(?:www|dev)\.)?invidio\.us',

295

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

296

r'(?:www\.)?invidious\.pussthecat\.org',

297

r'(?:www\.)?invidious\.zee\.li',

298

r'(?:www\.)?invidious\.ethibox\.fr',

299

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

300

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

301

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

302

# youtube-dl invidious instances list

303

r'(?:(?:www|no)\.)?invidiou\.sh',

304

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

305

r'(?:www\.)?invidious\.kabi\.tk',

306

r'(?:www\.)?invidious\.mastodon\.host',

307

r'(?:www\.)?invidious\.zapashcanon\.fr',

308

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

309

r'(?:www\.)?invidious\.tinfoil-hat\.net',

310

r'(?:www\.)?invidious\.himiko\.cloud',

311

r'(?:www\.)?invidious\.reallyancient\.tech',

312

r'(?:www\.)?invidious\.tube',

313

r'(?:www\.)?invidiou\.site',

314

r'(?:www\.)?invidious\.site',

315

r'(?:www\.)?invidious\.xyz',

316

r'(?:www\.)?invidious\.nixnet\.xyz',

317

r'(?:www\.)?invidious\.048596\.xyz',

318

r'(?:www\.)?invidious\.drycat\.fr',

319

r'(?:www\.)?inv\.skyn3t\.in',

320

r'(?:www\.)?tube\.poal\.co',

321

r'(?:www\.)?tube\.connect\.cafe',

322

r'(?:www\.)?vid\.wxzm\.sx',

323

r'(?:www\.)?vid\.mint\.lgbt',

324

r'(?:www\.)?vid\.puffyan\.us',

325

r'(?:www\.)?yewtu\.be',

326

r'(?:www\.)?yt\.elukerio\.org',

327

r'(?:www\.)?yt\.lelux\.fi',

328

r'(?:www\.)?invidious\.ggc-project\.de',

329

r'(?:www\.)?yt\.maisputain\.ovh',

330

r'(?:www\.)?ytprivate\.com',

331

r'(?:www\.)?invidious\.13ad\.de',

332

r'(?:www\.)?invidious\.toot\.koeln',

333

r'(?:www\.)?invidious\.fdn\.fr',

334

r'(?:www\.)?watch\.nettohikari\.com',

335

r'(?:www\.)?invidious\.namazso\.eu',

336

r'(?:www\.)?invidious\.silkky\.cloud',

337

r'(?:www\.)?invidious\.exonip\.de',

338

r'(?:www\.)?invidious\.riverside\.rocks',

339

r'(?:www\.)?invidious\.blamefran\.net',

340

r'(?:www\.)?invidious\.moomoo\.de',

341

r'(?:www\.)?ytb\.trom\.tf',

342

r'(?:www\.)?yt\.cyberhost\.uk',

343

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

344

r'(?:www\.)?qklhadlycap4cnod\.onion',

345

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

346

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

347

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

348

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

349

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

350

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

351

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

352

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

353

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

354

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

355

)

356

357

def _initialize_consent(self):

358

cookies = self._get_cookies('https://www.youtube.com/')

359

if cookies.get('__Secure-3PSID'):

360

return

361

consent_id = None

362

consent = cookies.get('CONSENT')

363

if consent:

364

if 'YES' in consent.value:

365

return

366

consent_id = self._search_regex(

367

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

368

if not consent_id:

369

consent_id = random.randint(100, 999)

370

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

371

372

def _initialize_pref(self):

373

cookies = self._get_cookies('https://www.youtube.com/')

374

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

379

except ValueError:

380

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

381

pref.update({'hl': 'en', 'tz': 'UTC'})

382

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

383

384

def _real_initialize(self):

385

self._initialize_pref()

386

self._initialize_consent()

387

if (self._LOGIN_REQUIRED

388

and self.get_param('cookiefile') is None

389

and self.get_param('cookiesfrombrowser') is None):

390

self.raise_login_required('Login details are needed to download this content', method='cookies')

391

392

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

393

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

394

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

395

396

def _get_default_ytcfg(self, client='web'):

397

return copy.deepcopy(INNERTUBE_CLIENTS[client])

398

399

def _get_innertube_host(self, client='web'):

400

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

401

402

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

403

# try_get but with fallback to default ytcfg client values when present

404

_func = lambda y: try_get(y, getter, expected_type)

405

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

406

407

def _extract_client_name(self, ytcfg, default_client='web'):

408

return self._ytcfg_get_safe(

409

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

410

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

411

412

def _extract_client_version(self, ytcfg, default_client='web'):

413

return self._ytcfg_get_safe(

414

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

415

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

416

417

def _extract_api_key(self, ytcfg=None, default_client='web'):

418

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

419

420

def _extract_context(self, ytcfg=None, default_client='web'):

421

context = get_first(

422

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

423

# Enforce language and tz for extraction

424

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

425

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

431

time_now = round(time.time())

432

if self._SAPISID is None:

433

yt_cookies = self._get_cookies('https://www.youtube.com')

434

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

435

# See: https://github.com/yt-dlp/yt-dlp/issues/393

436

sapisid_cookie = dict_get(

437

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

438

if sapisid_cookie and sapisid_cookie.value:

439

self._SAPISID = sapisid_cookie.value

440

self.write_debug('Extracted SAPISID cookie')

441

# SAPISID cookie is required if not already present

442

if not yt_cookies.get('SAPISID'):

443

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

444

self._set_cookie(

445

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

446

else:

447

self._SAPISID = False

448

if not self._SAPISID:

449

return None

450

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

451

sapisidhash = hashlib.sha1(

452

f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()

453

return f'SAPISIDHASH {time_now}_{sapisidhash}'

454

455

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

456

note='Downloading API JSON', errnote='Unable to download API page',

457

context=None, api_key=None, api_hostname=None, default_client='web'):

458

459

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

460

data.update(query)

461

real_headers = self.generate_api_headers(default_client=default_client)

462

real_headers.update({'content-type': 'application/json'})

463

if headers:

464

real_headers.update(headers)

465

return self._download_json(

466

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

467

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

468

data=json.dumps(data).encode('utf8'), headers=real_headers,

469

query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})

470

471

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

472

data = self._search_regex(

473

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

474

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

475

if data:

476

return self._parse_json(data, item_id, fatal=fatal)

477

478

@staticmethod

479

def _extract_session_index(*data):

480

"""

481

Index of current account in account list.

482

See: https://github.com/yt-dlp/yt-dlp/pull/519

483

"""

484

for ytcfg in data:

485

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

486

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

491

if ytcfg:

492

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

497

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

498

'identity token', default=None, fatal=False)

499

500

@staticmethod

501

def _extract_account_syncid(*args):

502

"""

503

Extract syncId required to download private playlists of secondary channels

504

@params response and/or ytcfg

505

"""

506

for data in args:

507

# ytcfg includes channel_syncid if on secondary channel

508

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

513

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

514

if len(sync_ids) >= 2 and sync_ids[1]:

515

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

516

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

521

"""

522

Extracts visitorData from an API response or ytcfg

523

Appears to be used to track session state

524

"""

525

return get_first(

526

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

expected_type=str)

@property

def is_authenticated(self):

531

return bool(self._generate_sapisidhash_header())

532

533

def extract_ytcfg(self, video_id, webpage):

534

if not webpage:

535

return {}

536

return self._parse_json(

537

self._search_regex(

538

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

539

default='{}'), video_id, fatal=False) or {}

540

541

def generate_api_headers(

542

self, *, ytcfg=None, account_syncid=None, session_index=None,

543

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

544

545

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

546

headers = {

547

'X-YouTube-Client-Name': compat_str(

548

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

549

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

550

'Origin': origin,

551

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

552

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

553

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

554

}

555

if session_index is None:

556

session_index = self._extract_session_index(ytcfg)

557

if account_syncid or session_index is not None:

558

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

559

560

auth = self._generate_sapisidhash_header(origin)

561

if auth is not None:

562

headers['Authorization'] = auth

563

headers['X-Origin'] = origin

564

return {h: v for h, v in headers.items() if v is not None}

565

566

@staticmethod

567

def _build_api_continuation_query(continuation, ctp=None):

568

query = {

569

'continuation': continuation

570

}

571

# TODO: Inconsistency with clickTrackingParams.

572

# Currently we have a fixed ctp contained within context (from ytcfg)

573

# and a ctp in root query for continuation.

574

if ctp:

575

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

580

next_continuation = try_get(

581

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

582

lambda x: x['continuation']['reloadContinuationData']), dict)

583

if not next_continuation:

584

return

585

continuation = next_continuation.get('continuation')

586

if not continuation:

587

return

588

ctp = next_continuation.get('clickTrackingParams')

589

return cls._build_api_continuation_query(continuation, ctp)

590

591

@classmethod

592

def _extract_continuation_ep_data(cls, continuation_ep: dict):

593

if isinstance(continuation_ep, dict):

594

continuation = try_get(

595

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

596

if not continuation:

597

return

598

ctp = continuation_ep.get('clickTrackingParams')

599

return cls._build_api_continuation_query(continuation, ctp)

600

601

@classmethod

602

def _extract_continuation(cls, renderer):

603

next_continuation = cls._extract_next_continuation_data(renderer)

604

if next_continuation:

605

return next_continuation

606

607

contents = []

608

for key in ('contents', 'items'):

609

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

610

611

for content in contents:

612

if not isinstance(content, dict):

613

continue

614

continuation_ep = try_get(

615

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

616

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

617

dict)

618

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

624

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

625

if not isinstance(alert_dict, dict):

626

continue

627

for alert in alert_dict.values():

628

alert_type = alert.get('type')

629

if not alert_type:

630

continue

631

message = cls._get_text(alert, 'text')

632

if message:

633

yield alert_type, message

634

635

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

636

errors = []

637

warnings = []

638

for alert_type, alert_message in alerts:

639

if alert_type.lower() == 'error' and fatal:

640

errors.append([alert_type, alert_message])

641

else:

642

warnings.append([alert_type, alert_message])

643

644

for alert_type, alert_message in (warnings + errors[:-1]):

645

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)

646

if errors:

647

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

648

649

def _extract_and_report_alerts(self, data, *args, **kwargs):

650

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

651

652

def _extract_badges(self, renderer: dict):

653

badges = set()

654

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

655

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

656

if label:

657

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

662

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

667

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

668

obj = [obj]

669

for item in obj:

670

text = try_get(item, lambda x: x['simpleText'], compat_str)

671

if text:

672

return text

673

runs = try_get(item, lambda x: x['runs'], list) or []

674

if not runs and isinstance(item, list):

675

runs = item

676

677

runs = runs[:min(len(runs), max_runs or len(runs))]

678

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

683

count_text = self._get_text(data, *path_list) or ''

684

count = parse_count(count_text)

685

if count is None:

686

count = str_to_int(

687

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

692

"""

693

Extract thumbnails from thumbnails dict

694

@param path_list: path list to level that contains 'thumbnails' key

695

"""

696

thumbnails = []

697

for path in path_list or [()]:

698

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

699

thumbnail_url = url_or_none(thumbnail.get('url'))

700

if not thumbnail_url:

701

continue

702

# Sometimes youtube gives a wrong thumbnail URL. See:

703

# https://github.com/yt-dlp/yt-dlp/issues/233

704

# https://github.com/ytdl-org/youtube-dl/issues/28023

705

if 'maxresdefault' in thumbnail_url:

706

thumbnail_url = thumbnail_url.split('?')[0]

707

thumbnails.append({

708

'url': thumbnail_url,

709

'height': int_or_none(thumbnail.get('height')),

710

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

716

"""

717

Extracts a relative time from string and converts to dt object

718

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

723

if start:

724

return datetime_from_str(start)

725

try:

726

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

731

text = self._get_text(renderer, *path_list) or ''

732

dt = self.extract_relative_time(text)

733

timestamp = None

734

if isinstance(dt, datetime.datetime):

735

timestamp = calendar.timegm(dt.timetuple())

736

737

if timestamp is None:

738

timestamp = (

739

unified_timestamp(text) or unified_timestamp(

740

self._search_regex(

741

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

742

text.lower(), 'time text', default=None)))

743

744

if text and timestamp is None:

745

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

746

return timestamp, text

747

748

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

749

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

750

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

755

if check_get_keys is None:

756

check_get_keys = []

757

while count < retries:

758

count += 1

759

if last_error:

760

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

761

try:

762

response = self._call_api(

763

ep=ep, fatal=True, headers=headers,

764

video_id=item_id, query=query,

765

context=self._extract_context(ytcfg, default_client),

766

api_key=self._extract_api_key(ytcfg, default_client),

767

api_hostname=api_hostname, default_client=default_client,

768

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

769

except ExtractorError as e:

770

if isinstance(e.cause, network_exceptions):

771

if isinstance(e.cause, compat_HTTPError):

772

first_bytes = e.cause.read(512)

773

if not is_html(first_bytes):

774

yt_error = try_get(

775

self._parse_json(

776

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

777

lambda x: x['error']['message'], compat_str)

778

if yt_error:

779

self._report_alerts([('ERROR', yt_error)], fatal=False)

780

# Downloading page may result in intermittent 5xx HTTP error

781

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

782

# We also want to catch all other network exceptions since errors in later pages can be troublesome

783

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

784

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

785

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

797

except ExtractorError as e:

798

# YouTube servers may return errors we want to retry on in a 200 OK response

799

# See: https://github.com/yt-dlp/yt-dlp/issues/839

800

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

806

return

807

if not check_get_keys or dict_get(response, check_get_keys):

808

break

809

# Youtube sometimes sends incomplete data

810

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

811

last_error = 'Incomplete data received'

812

if count >= retries:

813

if fatal:

814

raise ExtractorError(last_error)

815

else:

816

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

822

return re.match(r'https?://music\.youtube\.com/', url) is not None

823

824

def _extract_video(self, renderer):

825

video_id = renderer.get('videoId')

826

title = self._get_text(renderer, 'title')

827

description = self._get_text(renderer, 'descriptionSnippet')

828

duration = parse_duration(self._get_text(

829

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

830

if duration is None:

831

duration = parse_duration(self._search_regex(

832

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

833

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

834

video_id, default=None, group='duration'))

835

836

view_count = self._get_count(renderer, 'viewCountText')

837

838

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

839

channel_id = traverse_obj(

840

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)

841

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

842

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

843

overlay_style = traverse_obj(

844

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)

845

badges = self._extract_badges(renderer)

846

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

847

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

848

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str))

849

url = f'https://www.youtube.com/watch?v={video_id}'

850

if overlay_style == 'SHORTS' or (navigation_url and '/shorts/' in navigation_url):

851

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

860

'duration': duration,

861

'view_count': view_count,

862

'uploader': uploader,

863

'channel_id': channel_id,

864

'thumbnails': thumbnails,

865

'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None,

866

'live_status': ('is_upcoming' if scheduled_timestamp is not None

867

else 'was_live' if 'streamed' in time_text.lower()

868

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

869

else None),

870

'release_timestamp': scheduled_timestamp,

871

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

876

IE_DESC = 'YouTube'

877

_VALID_URL = r"""(?x)^

878

(

879

(?:https?://|//) # http(s):// or protocol-independent URL

880

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

881

(?:www\.)?deturl\.com/www\.youtube\.com|

882

(?:www\.)?pwnyoutube\.com|

883

(?:www\.)?hooktube\.com|

884

(?:www\.)?yourepeat\.com|

885

tube\.majestyc\.net|

886

%(invidious)s|

887

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

888

(?:.*?\#/)? # handle anchor (#/) redirect urls

889

(?: # the various things that can precede the ID:

890

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

891

|(?: # or the v= param in all its forms

892

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

893

(?:\?|\#!?) # the params delimiter ? or # or #!

894

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

900

vid\.plus| # or vid.plus/xxxx

901

zwearz\.com/watch| # or zwearz.com/watch/xxxx

902

%(invidious)s

903

)/

904

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

905

)

906

)? # all until now is optional -> you can pass the naked ID

907

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

908

(?(1).+)? # if we found the ID, everything can follow

909

(?:\#|$)""" % {

910

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

911

}

912

_PLAYER_INFO_RE = (

913

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

914

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

915

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

916

)

917

_formats = {

918

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

919

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

920

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

921

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

922

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

923

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

924

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

925

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

926

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

927

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

928

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

929

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

930

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

931

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

932

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

933

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

934

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

935

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

940

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

941

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

942

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

943

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

944

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

945

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

946

947

# Apple HTTP Live Streaming

948

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

949

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

950

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

951

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

952

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

953

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

954

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

955

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

956

957

# DASH mp4 video

958

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

959

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

960

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

961

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

962

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

963

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

964

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

965

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

966

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

967

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

968

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

969

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

970

971

# Dash mp4 audio

972

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

973

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

974

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

975

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

976

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

977

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

978

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

979

980

# Dash webm

981

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

982

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

983

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

984

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

985

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

986

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

987

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

988

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

989

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

990

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

991

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

992

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

993

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

994

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

995

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

996

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

997

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

998

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

999

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1000

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1001

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1002

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1003

1004

# Dash webm audio

1005

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1006

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1007

1008

# Dash webm audio with opus inside

1009

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1010

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1011

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1012

1013

# RTMP (unnamed)

1014

'_rtmp': {'protocol': 'rtmp'},

1015

1016

# av01 video only formats sometimes served with "unknown" codecs

1017

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1018

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1019

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1020

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1021

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1022

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1023

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1024

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1025

}

1026

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1038

'uploader': 'Philipp Hagemeister',

1039

'uploader_id': 'phihag',

1040

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1041

'channel': 'Philipp Hagemeister',

1042

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1043

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1044

'upload_date': '20121002',

1045

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1046

'categories': ['Science & Technology'],

1047

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1052

'playable_in_embed': True,

1053

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1054

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1063

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1068

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1069

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1070

'uploader': 'SET India',

1071

'uploader_id': 'setindia',

1072

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1073

'age_limit': 18,

1074

},

1075

'skip': 'Private video',

1076

},

1077

{

1078

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1079

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1084

'uploader': 'Philipp Hagemeister',

1085

'uploader_id': 'phihag',

1086

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1087

'channel': 'Philipp Hagemeister',

1088

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1089

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1090

'upload_date': '20121002',

1091

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1092

'categories': ['Science & Technology'],

1093

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1098

'playable_in_embed': True,

1099

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1100

'live_status': 'not_live',

1101

'age_limit': 0,

1102

'channel_follower_count': int

1103

},

1104

'params': {

1105

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1110

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1115

'uploader_id': '8KVIDEO',

1116

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1117

'description': '',

1118

'uploader': '8KVIDEO',

1119

'title': 'UHDTV TEST 8K VIDEO.mp4'

1120

},

1121

'params': {

1122

'youtube_include_dash_manifest': True,

1123

'format': '141',

1124

},

1125

'skip': 'format 141 not served anymore',

1126

},

1127

# DASH manifest with encrypted signature

1128

{

1129

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1134

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1135

'duration': 244,

1136

'uploader': 'AfrojackVEVO',

1137

'uploader_id': 'AfrojackVEVO',

1138

'upload_date': '20131011',

1139

'abr': 129.495,

1140

'like_count': int,

1141

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1142

'playable_in_embed': True,

1143

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1144

'view_count': int,

1145

'track': 'The Spark',

1146

'live_status': 'not_live',

1147

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1148

'channel': 'Afrojack',

1149

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1150

'tags': 'count:19',

1151

'availability': 'public',

1152

'categories': ['Music'],

1153

'age_limit': 0,

1154

'alt_title': 'The Spark',

1155

'channel_follower_count': int

1156

},

1157

'params': {

1158

'youtube_include_dash_manifest': True,

1159

'format': '141/bestaudio[ext=m4a]',

1160

},

1161

},

1162

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1163

{

1164

'note': 'Embed allowed age-gate video',

1165

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1170

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1171

'duration': 142,

1172

'uploader': 'The Witcher',

1173

'uploader_id': 'WitcherGame',

1174

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1175

'upload_date': '20140605',

1176

'age_limit': 18,

1177

'categories': ['Gaming'],

1178

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1179

'availability': 'needs_auth',

1180

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1181

'like_count': int,

1182

'channel': 'The Witcher',

1183

'live_status': 'not_live',

1184

'tags': 'count:17',

1185

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1186

'playable_in_embed': True,

1187

'view_count': int,

1188

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1193

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1198

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1199

'upload_date': '20200408',

1200

'uploader_id': 'FlyingKitty900',

1201

'uploader': 'FlyingKitty',

1202

'age_limit': 18,

1203

'availability': 'needs_auth',

1204

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1205

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1206

'channel': 'FlyingKitty',

1207

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1208

'view_count': int,

1209

'categories': ['Entertainment'],

1210

'live_status': 'not_live',

1211

'tags': ['Flyingkitty', 'godzilla 2'],

1212

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1213

'like_count': int,

1214

'duration': 177,

1215

'playable_in_embed': True,

1216

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1221

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1222

'info_dict': {

1223

'id': 'Tq92D6wQ1mg',

1224

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1225

'ext': 'mp4',

1226

'upload_date': '20191228',

1227

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1228

'uploader': 'Projekt Melody',

1229

'description': 'md5:17eccca93a786d51bc67646756894066',

1230

'age_limit': 18,

1231

'like_count': int,

1232

'availability': 'needs_auth',

1233

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1234

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1235

'view_count': int,

1236

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1237

'channel': 'Projekt Melody',

1238

'live_status': 'not_live',

1239

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1240

'playable_in_embed': True,

1241

'categories': ['Entertainment'],

1242

'duration': 106,

1243

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1244

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1249

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1254

'uploader': 'Herr Lurik',

1255

'uploader_id': 'st3in234',

1256

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1257

'upload_date': '20130730',

1258

'track': 'Such mich find mich',

1259

'age_limit': 0,

1260

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1261

'like_count': int,

1262

'playable_in_embed': False,

1263

'creator': 'OOMPH!',

1264

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1265

'view_count': int,

1266

'alt_title': 'Such mich find mich',

1267

'duration': 210,

1268

'channel': 'Herr Lurik',

1269

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1270

'categories': ['Music'],

1271

'availability': 'public',

1272

'uploader_url': 'http://www.youtube.com/user/st3in234',

1273

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1274

'live_status': 'not_live',

1275

'artist': 'OOMPH!',

1276

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1281

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1282

'only_matching': True,

1283

},

1284

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1285

# YouTube Red ad is not captured for creator

1286

{

1287

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1293

'uploader_id': 'deadmau5',

1294

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1295

'creator': 'deadmau5',

1296

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1297

'uploader': 'deadmau5',

1298

'title': 'Deadmau5 - Some Chords (HD)',

1299

'alt_title': 'Some Chords',

1300

'availability': 'public',

1301

'tags': 'count:14',

1302

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1303

'view_count': int,

1304

'live_status': 'not_live',

1305

'channel': 'deadmau5',

1306

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1307

'like_count': int,

1308

'track': 'Some Chords',

1309

'artist': 'deadmau5',

1310

'playable_in_embed': True,

1311

'age_limit': 0,

1312

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1313

'categories': ['Music'],

1314

'album': 'Some Chords',

1315

'channel_follower_count': int

1316

},

1317

'expected_warnings': [

1318

'DASH manifest missing',

1319

]

1320

},

1321

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1322

{

1323

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1329

'uploader_id': 'olympic',

1330

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1331

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1332

'uploader': 'Olympics',

1333

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1334

'like_count': int,

1335

'release_timestamp': 1343767800,

1336

'playable_in_embed': True,

1337

'categories': ['Sports'],

1338

'release_date': '20120731',

1339

'channel': 'Olympics',

1340

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1341

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1342

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1343

'age_limit': 0,

1344

'availability': 'public',

1345

'live_status': 'was_live',

1346

'view_count': int,

1347

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1348

'channel_follower_count': int

1349

},

1350

'params': {

1351

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1361

'duration': 85,

1362

'upload_date': '20110310',

1363

'uploader_id': 'AllenMeow',

1364

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1365

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1366

'uploader': '孫ᄋᄅ',

1367

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1368

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1373

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1374

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1375

'view_count': int,

1376

'categories': ['People & Blogs'],

1377

'like_count': int,

1378

'live_status': 'not_live',

1379

'availability': 'unlisted',

1380

'channel_follower_count': int

1381

},

1382

},

1383

# url_encoded_fmt_stream_map is empty string

1384

{

1385

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1390

'description': '',

1391

'upload_date': '20150404',

1392

'uploader_id': 'spbelect',

1393

'uploader': 'Наблюдатели Петербурга',

1394

},

1395

'params': {

1396

'skip_download': 'requires avconv',

1397

},

1398

'skip': 'This live event has ended.',

1399

},

1400

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1401

{

1402

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1407

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1408

'duration': 220,

1409

'upload_date': '20150625',

1410

'uploader_id': 'dorappi2000',

1411

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1412

'uploader': 'dorappi2000',

1413

'formats': 'mincount:31',

1414

},

1415

'skip': 'not actual anymore',

1416

},

1417

# DASH manifest with segment_list

1418

{

1419

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1420

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1425

'uploader': 'Airtek',

1426

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1427

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1428

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1429

},

1430

'params': {

1431

'youtube_include_dash_manifest': True,

1432

'format': '135', # bestvideo

1433

},

1434

'skip': 'This live event has ended.',

1435

},

1436

{

1437

# Multifeed videos (multiple cameras), URL is for Main Camera

1438

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1439

'info_dict': {

1440

'id': 'jvGDaLqkpTg',

1441

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1442

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1449

'description': 'md5:e03b909557865076822aa169218d6a5d',

1450

'duration': 10643,

1451

'upload_date': '20161111',

1452

'uploader': 'Team PGP',

1453

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1454

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1461

'description': 'md5:e03b909557865076822aa169218d6a5d',

1462

'duration': 10991,

1463

'upload_date': '20161111',

1464

'uploader': 'Team PGP',

1465

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1466

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1473

'description': 'md5:e03b909557865076822aa169218d6a5d',

1474

'duration': 10995,

1475

'upload_date': '20161111',

1476

'uploader': 'Team PGP',

1477

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1478

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1485

'description': 'md5:e03b909557865076822aa169218d6a5d',

1486

'duration': 10990,

1487

'upload_date': '20161111',

1488

'uploader': 'Team PGP',

1489

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1490

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1495

},

1496

'skip': 'Not multifeed anymore',

1497

},

1498

{

1499

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1500

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1501

'info_dict': {

1502

'id': 'gVfLd0zydlo',

1503

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1504

},

1505

'playlist_count': 2,

1506

'skip': 'Not multifeed anymore',

1507

},

1508

{

1509

'url': 'https://vid.plus/FlRa-iH7PGw',

1510

'only_matching': True,

1511

},

1512

{

1513

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1514

'only_matching': True,

1515

},

1516

{

1517

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1518

# Also tests cut-off URL expansion in video description (see

1519

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1520

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1521

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1526

'alt_title': 'Dark Walk',

1527

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1528

'duration': 133,

1529

'upload_date': '20151119',

1530

'uploader_id': 'IronSoulElf',

1531

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1532

'uploader': 'IronSoulElf',

1533

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1534

'track': 'Dark Walk',

1535

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1536

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1537

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1538

'categories': ['Film & Animation'],

1539

'view_count': int,

1540

'live_status': 'not_live',

1541

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1542

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1543

'tags': 'count:13',

1544

'availability': 'public',

1545

'channel': 'IronSoulElf',

1546

'playable_in_embed': True,

1547

'like_count': int,

1548

'age_limit': 0,

1549

'channel_follower_count': int

1550

},

1551

'params': {

1552

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1557

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1558

'only_matching': True,

1559

},

1560

{

1561

# Video with yt:stretch=17:0

1562

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1567

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1568

'upload_date': '20151107',

1569

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1570

'uploader': 'CH GAMER DROID',

1571

},

1572

'params': {

1573

'skip_download': True,

1574

},

1575

'skip': 'This video does not exist.',

1576

},

1577

{

1578

# Video with incomplete 'yt:stretch=16:'

1579

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1580

'only_matching': True,

1581

},

1582

{

1583

# Video licensed under Creative Commons

1584

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1589

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1590

'duration': 721,

1591

'upload_date': '20150128',

1592

'uploader_id': 'BerkmanCenter',

1593

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1594

'uploader': 'The Berkman Klein Center for Internet & Society',

1595

'license': 'Creative Commons Attribution license (reuse allowed)',

1596

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1597

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1598

'like_count': int,

1599

'age_limit': 0,

1600

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1601

'channel': 'The Berkman Klein Center for Internet & Society',

1602

'availability': 'public',

1603

'view_count': int,

1604

'categories': ['Education'],

1605

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1606

'live_status': 'not_live',

1607

'playable_in_embed': True,

1608

'channel_follower_count': int

1609

},

1610

'params': {

1611

'skip_download': True,

},

},

{

# Channel-like uploader_url

1616

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1621

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1622

'duration': 4060,

1623

'upload_date': '20151120',

1624

'uploader': 'Bernie Sanders',

1625

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1626

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1627

'license': 'Creative Commons Attribution license (reuse allowed)',

1628

'playable_in_embed': True,

1629

'tags': 'count:12',

1630

'like_count': int,

1631

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1632

'age_limit': 0,

1633

'availability': 'public',

1634

'categories': ['News & Politics'],

1635

'channel': 'Bernie Sanders',

1636

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1637

'view_count': int,

1638

'live_status': 'not_live',

1639

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1640

'channel_follower_count': int

1641

},

1642

'params': {

1643

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1648

'only_matching': True,

1649

},

1650

{

1651

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1652

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1653

'only_matching': True,

1654

},

1655

{

1656

# Rental video preview

1657

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1662

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1663

'upload_date': '20150811',

1664

'uploader': 'FlixMatrix',

1665

'uploader_id': 'FlixMatrixKaravan',

1666

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1667

'license': 'Standard YouTube License',

1668

},

1669

'params': {

1670

'skip_download': True,

1671

},

1672

'skip': 'This video is not available.',

1673

},

1674

{

1675

# YouTube Red video with episode data

1676

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1681

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1682

'duration': 2085,

1683

'upload_date': '20170118',

1684

'uploader': 'Vsauce',

1685

'uploader_id': 'Vsauce',

1686

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1687

'series': 'Mind Field',

1688

'season_number': 1,

1689

'episode_number': 1,

1690

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1691

'tags': 'count:12',

1692

'view_count': int,

1693

'availability': 'public',

1694

'age_limit': 0,

1695

'channel': 'Vsauce',

1696

'episode': 'Episode 1',

1697

'categories': ['Entertainment'],

1698

'season': 'Season 1',

1699

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1700

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1701

'like_count': int,

1702

'playable_in_embed': True,

1703

'live_status': 'not_live',

1704

'channel_follower_count': int

1705

},

1706

'params': {

1707

'skip_download': True,

1708

},

1709

'expected_warnings': [

1710

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1715

# as inappropriate or offensive to some audiences.

1716

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1721

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1722

'duration': 965,

1723

'upload_date': '20140124',

1724

'uploader': 'New Century Foundation',

1725

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1726

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1727

},

1728

'params': {

1729

'skip_download': True,

1730

},

1731

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1736

'only_matching': True,

1737

},

1738

{

1739

# geo restricted to JP

1740

'url': 'sJL6WA-aGkQ',

1741

'only_matching': True,

1742

},

1743

{

1744

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1745

'only_matching': True,

1746

},

1747

{

1748

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1749

'only_matching': True,

1750

},

1751

{

1752

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1753

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1754

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1759

'only_matching': True,

1760

},

1761

{

1762

# Video with unsupported adaptive stream type formats

1763

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1768

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1769

'duration': 433,

1770

'upload_date': '20130923',

1771

'uploader': 'Amelia Putri Harwita',

1772

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1773

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1774

'formats': 'maxcount:10',

1775

},

1776

'params': {

1777

'skip_download': True,

1778

'youtube_include_dash_manifest': False,

1779

},

1780

'skip': 'not actual anymore',

1781

},

1782

{

1783

# Youtube Music Auto-generated description

1784

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1789

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1790

'upload_date': '20190312',

1791

'uploader': 'Stephen - Topic',

1792

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1793

'artist': 'Stephen',

1794

'track': 'Voyeur Girl',

1795

'album': 'it\'s too much love to know my dear',

1796

'release_date': '20190313',

1797

'release_year': 2019,

1798

'alt_title': 'Voyeur Girl',

1799

'view_count': int,

1800

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1801

'playable_in_embed': True,

1802

'like_count': int,

1803

'categories': ['Music'],

1804

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1805

'channel': 'Stephen',

1806

'availability': 'public',

1807

'creator': 'Stephen',

1808

'duration': 169,

1809

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1810

'age_limit': 0,

1811

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1812

'tags': 'count:11',

1813

'live_status': 'not_live',

1814

'channel_follower_count': int

1815

},

1816

'params': {

1817

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1822

'only_matching': True,

1823

},

1824

{

1825

# invalid -> valid video id redirection

1826

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1831

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1832

'upload_date': '20090125',

1833

'uploader': 'Prochorowka',

1834

'uploader_id': 'Prochorowka',

1835

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1836

'artist': 'Panjabi MC',

1837

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1838

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1839

},

1840

'params': {

1841

'skip_download': True,

1842

},

1843

'skip': 'Video unavailable',

1844

},

1845

{

1846

# empty description results in an empty string

1847

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1854

'uploader_id': 'ElevageOrVert',

1855

'uploader': 'ElevageOrVert',

1856

'view_count': int,

1857

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1858

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1859

'like_count': int,

1860

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1861

'tags': [],

1862

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1863

'availability': 'public',

1864

'age_limit': 0,

1865

'categories': ['Pets & Animals'],

1866

'duration': 7,

1867

'playable_in_embed': True,

1868

'live_status': 'not_live',

1869

'channel': 'ElevageOrVert',

1870

'channel_follower_count': int

1871

},

1872

'params': {

1873

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1878

# see [2] for an example with '};' inside ytInitialPlayerResponse

1879

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1880

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1881

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1886

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1887

'upload_date': '20130831',

1888

'uploader_id': 'kudvenkat',

1889

'uploader': 'kudvenkat',

1890

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1891

'like_count': int,

1892

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1893

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1894

'live_status': 'not_live',

1895

'categories': ['Education'],

1896

'availability': 'public',

1897

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1898

'tags': 'count:12',

1899

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1904

'channel_follower_count': int

1905

},

1906

'params': {

1907

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1912

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1913

'only_matching': True,

1914

},

1915

{

1916

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1917

'only_matching': True,

1918

},

1919

{

1920

# https://github.com/ytdl-org/youtube-dl/pull/28094

1921

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1927

'upload_date': '20141120',

1928

'uploader': 'The Cinematic Orchestra - Topic',

1929

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1930

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1931

'artist': 'The Cinematic Orchestra',

1932

'track': 'Burn Out',

1933

'album': 'Every Day',

1934

'like_count': int,

1935

'live_status': 'not_live',

1936

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1941

'creator': 'The Cinematic Orchestra',

1942

'channel': 'The Cinematic Orchestra',

1943

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1944

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1945

'availability': 'public',

1946

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1947

'categories': ['Music'],

1948

'playable_in_embed': True,

1949

'channel_follower_count': int

1950

},

1951

'params': {

1952

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1957

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1958

'only_matching': True,

1959

},

1960

{

1961

# controversial video, requires bpctr/contentCheckOk

1962

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1967

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1968

'uploader': 'CBS Mornings',

1969

'uploader_id': 'CBSThisMorning',

1970

'upload_date': '20140716',

1971

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1972

'duration': 170,

1973

'categories': ['News & Politics'],

1974

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1975

'view_count': int,

1976

'channel': 'CBS Mornings',

1977

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1978

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1979

'age_limit': 18,

1980

'availability': 'needs_auth',

1981

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1982

'like_count': int,

1983

'live_status': 'not_live',

1984

'playable_in_embed': True,

1985

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1990

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1995

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1996

'upload_date': '20201120',

1997

'uploader': 'Walk around Japan',

1998

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1999

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2000

'duration': 1456,

2001

'categories': ['Travel & Events'],

2002

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2003

'view_count': int,

2004

'channel': 'Walk around Japan',

2005

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2006

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2007

'age_limit': 0,

2008

'availability': 'public',

2009

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2010

'live_status': 'not_live',

2011

'playable_in_embed': True,

2012

'channel_follower_count': int

2013

},

2014

'params': {

2015

'skip_download': True,

2016

},

2017

}, {

2018

# Has multiple audio streams

2019

'url': 'WaOKSUlf4TM',

2020

'only_matching': True

2021

}, {

2022

# Requires Premium: has format 141 when requested using YTM url

2023

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2024

'only_matching': True

2025

}, {

2026

# multiple subtitles with same lang_code

2027

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2028

'only_matching': True,

2029

}, {

2030

# Force use android client fallback

2031

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2032

'info_dict': {

2033

'id': 'YOelRv7fMxY',

2034

'title': 'DIGGING A SECRET TUNNEL Part 1',

2035

'ext': '3gp',

2036

'upload_date': '20210624',

2037

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2038

'uploader': 'colinfurze',

2039

'uploader_id': 'colinfurze',

2040

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2041

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2042

'duration': 596,

2043

'categories': ['Entertainment'],

2044

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2045

'view_count': int,

2046

'channel': 'colinfurze',

2047

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2048

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2049

'age_limit': 0,

2050

'availability': 'public',

2051

'like_count': int,

2052

'live_status': 'not_live',

2053

'playable_in_embed': True,

2054

'channel_follower_count': int

2055

},

2056

'params': {

2057

'format': '17', # 3gp format available on android

2058

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2063

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2064

'only_matching': True,

2065

'params': {

2066

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2071

'only_matching': True,

2072

}, {

2073

'note': 'Storyboards',

2074

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2080

'uploader_id': 'scishow',

2081

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2082

'upload_date': '20140324',

2083

'uploader': 'SciShow',

2084

'like_count': int,

2085

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2086

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2087

'view_count': int,

2088

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2089

'playable_in_embed': True,

2090

'tags': 'count:12',

2091

'uploader_url': 'http://www.youtube.com/user/scishow',

2092

'availability': 'public',

2093

'channel': 'SciShow',

2094

'live_status': 'not_live',

2095

'duration': 248,

2096

'categories': ['Education'],

2097

'age_limit': 0,

2098

'channel_follower_count': int

2099

}, 'params': {'format': 'mhtml', 'skip_download': True}

2100

}, {

2101

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2102

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2107

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2108

'uploader': 'Leon Nguyen',

2109

'uploader_id': 'VNSXIII',

2110

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2111

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2112

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2117

'tags': 'count:23',

2118

'playable_in_embed': True,

2119

'live_status': 'not_live',

2120

'upload_date': '20220103',

2121

'like_count': int,

2122

'availability': 'public',

2123

'channel': 'Leon Nguyen',

2124

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2125

'channel_follower_count': int

2126

}

2127

}, {

2128

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2129

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2134

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2135

'uploader': 'Quackity',

2136

'uploader_id': 'QuackityHQ',

2137

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2138

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2139

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2144

'tags': 'count:26',

2145

'playable_in_embed': True,

2146

'live_status': 'not_live',

2147

'release_timestamp': 1641172509,

2148

'release_date': '20220103',

2149

'upload_date': '20220103',

2150

'like_count': int,

2151

'availability': 'public',

2152

'channel': 'Quackity',

2153

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2154

'channel_follower_count': int

2155

}

2156

},

2157

{ # continuous livestream. Microformat upload date should be preferred.

2158

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2159

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2160

'info_dict': {

2161

'id': 'kgx4WGK0oNU',

2162

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2163

'ext': 'mp4',

2164

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2165

'availability': 'public',

2166

'age_limit': 0,

2167

'release_timestamp': 1637975704,

2168

'upload_date': '20210619',

2169

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2170

'live_status': 'is_live',

2171

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2172

'uploader': '阿鲍Abao',

2173

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2174

'channel': 'Abao in Tokyo',

2175

'channel_follower_count': int,

2176

'release_date': '20211127',

2177

'tags': 'count:39',

2178

'categories': ['People & Blogs'],

2179

'like_count': int,

2180

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2181

'view_count': int,

2182

'playable_in_embed': True,

2183

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2184

},

2185

'params': {'skip_download': True}

},

]

@classmethod

def suitable(cls, url):

2191

from ..utils import parse_qs

2192

2193

qs = parse_qs(url)

2194

if qs.get('list', [None])[0]:

2195

return False

2196

return super(YoutubeIE, cls).suitable(url)

2197

2198

def __init__(self, *args, **kwargs):

2199

super(YoutubeIE, self).__init__(*args, **kwargs)

2200

self._code_cache = {}

2201

self._player_cache = {}

2202

2203

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2204

lock = threading.Lock()

2205

2206

is_live = True

2207

start_time = time.time()

2208

formats = [f for f in formats if f.get('is_from_start')]

2209

2210

def refetch_manifest(format_id, delay):

2211

nonlocal formats, start_time, is_live

2212

if time.time() <= start_time + delay:

2213

return

2214

2215

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2216

video_details = traverse_obj(

2217

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2218

microformats = traverse_obj(

2219

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2220

expected_type=dict, default=[])

2221

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2222

start_time = time.time()

2223

2224

def mpd_feed(format_id, delay):

2225

"""

2226

@returns (manifest_url, manifest_stream_number, is_live) or None

2227

"""

2228

with lock:

2229

refetch_manifest(format_id, delay)

2230

2231

f = next((f for f in formats if f['format_id'] == format_id), None)

2232

if not f:

2233

if not is_live:

2234

self.to_screen(f'{video_id}: Video is no longer live')

2235

else:

2236

self.report_warning(

2237

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2238

return None

2239

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2244

f['fragments'] = functools.partial(

2245

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2246

2247

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2248

FETCH_SPAN, MAX_DURATION = 5, 432000

2249

2250

mpd_url, stream_number, is_live = None, None, True

2251

2252

begin_index = 0

2253

download_start_time = ctx.get('start') or time.time()

2254

2255

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2256

if lack_early_segments:

2257

self.report_warning(bug_reports_message(

2258

'Starting download from the last 120 hours of the live stream since '

2259

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2260

lack_early_segments = True

2261

2262

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2263

fragments, fragment_base_url = None, None

2264

2265

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2266

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2267

# Obtain from MPD's maximum seq value

2268

old_mpd_url = mpd_url

2269

last_error = ctx.pop('last_error', None)

2270

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2271

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2272

or (mpd_url, stream_number, False))

2273

if not refresh_sequence:

2274

if expire_fast and not is_live:

2275

return False, last_seq

2276

elif old_mpd_url == mpd_url:

2277

return True, last_seq

2278

try:

2279

fmts, _ = self._extract_mpd_formats_and_subtitles(

2280

mpd_url, None, note=False, errnote=False, fatal=False)

2281

except ExtractorError:

2282

fmts = None

2283

if not fmts:

2284

no_fragment_score += 2

2285

return False, last_seq

2286

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2287

fragments = fmt_info['fragments']

2288

fragment_base_url = fmt_info['fragment_base_url']

2289

assert fragment_base_url

2290

2291

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2292

return True, _last_seq

2293

2294

while is_live:

2295

fetch_time = time.time()

2296

if no_fragment_score > 30:

2297

return

2298

if last_segment_url:

2299

# Obtain from "X-Head-Seqnum" header value from each segment

2300

try:

2301

urlh = self._request_webpage(

2302

last_segment_url, None, note=False, errnote=False, fatal=False)

2303

except ExtractorError:

2304

urlh = None

2305

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2306

if last_seq is None:

2307

no_fragment_score += 2

2308

last_segment_url = None

2309

continue

2310

else:

2311

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2312

no_fragment_score += 2

2313

if not should_continue:

2314

continue

2315

2316

if known_idx > last_seq:

2317

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2323

# skip from the start when it's negative value

2324

known_idx = last_seq + begin_index

2325

if lack_early_segments:

2326

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2327

try:

2328

for idx in range(known_idx, last_seq):

2329

# do not update sequence here or you'll get skipped some part of it

2330

should_continue, _ = _extract_sequence_from_mpd(False, False)

2331

if not should_continue:

2332

known_idx = idx - 1

2333

raise ExtractorError('breaking out of outer loop')

2334

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2335

yield {

2336

'url': last_segment_url,

2337

}

2338

if known_idx == last_seq:

2339

no_fragment_score += 5

2340

else:

2341

no_fragment_score = 0

2342

known_idx = last_seq

2343

except ExtractorError:

2344

continue

2345

2346

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2347

2348

def _extract_player_url(self, *ytcfgs, webpage=None):

2349

player_url = traverse_obj(

2350

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2351

get_all=False, expected_type=compat_str)

2352

if not player_url:

2353

return

2354

return urljoin('https://www.youtube.com', player_url)

2355

2356

def _download_player_url(self, video_id, fatal=False):

2357

res = self._download_webpage(

2358

'https://www.youtube.com/iframe_api',

2359

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2360

if res:

2361

player_version = self._search_regex(

2362

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2363

if player_version:

2364

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2365

2366

def _signature_cache_id(self, example_sig):

2367

""" Return a string representation of a signature """

2368

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2369

2370

@classmethod

2371

def _extract_player_info(cls, player_url):

2372

for player_re in cls._PLAYER_INFO_RE:

2373

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2378

return id_m.group('id')

2379

2380

def _load_player(self, video_id, player_url, fatal=True):

2381

player_id = self._extract_player_info(player_url)

2382

if player_id not in self._code_cache:

2383

code = self._download_webpage(

2384

player_url, video_id, fatal=fatal,

2385

note='Downloading player ' + player_id,

2386

errnote='Download of %s failed' % player_url)

2387

if code:

2388

self._code_cache[player_id] = code

2389

return self._code_cache.get(player_id)

2390

2391

def _extract_signature_function(self, video_id, player_url, example_sig):

2392

player_id = self._extract_player_info(player_url)

2393

2394

# Read from filesystem cache

2395

func_id = 'js_%s_%s' % (

2396

player_id, self._signature_cache_id(example_sig))

2397

assert os.path.basename(func_id) == func_id

2398

2399

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2400

if cache_spec is not None:

2401

return lambda s: ''.join(s[i] for i in cache_spec)

2402

2403

code = self._load_player(video_id, player_url)

2404

if code:

2405

res = self._parse_sig_js(code)

2406

2407

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2408

cache_res = res(test_string)

2409

cache_spec = [ord(c) for c in cache_res]

2410

2411

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2412

return res

2413

2414

def _print_sig_code(self, func, example_sig):

2415

if not self.get_param('youtube_print_sig_code'):

2416

return

2417

2418

def gen_sig_code(idxs):

2419

def _genslice(start, end, step):

2420

starts = '' if start == 0 else str(start)

2421

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2422

steps = '' if step == 1 else (':%d' % step)

2423

return 's[%s%s%s]' % (starts, ends, steps)

2424

2425

step = None

2426

# Quelch pyflakes warnings - start will be set when step is set

2427

start = '(Never used)'

2428

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2433

step = None

2434

continue

2435

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2445

2446

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2447

cache_res = func(test_string)

2448

cache_spec = [ord(c) for c in cache_res]

2449

expr_code = ' + '.join(gen_sig_code(cache_spec))

2450

signature_id_tuple = '(%s)' % (

2451

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2452

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2453

' return %s\n') % (signature_id_tuple, expr_code)

2454

self.to_screen('Extracted signature function:\n' + code)

2455

2456

def _parse_sig_js(self, jscode):

2457

funcname = self._search_regex(

2458

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2459

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2460

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2461

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2462

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2463

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2464

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2465

# Obsolete patterns

2466

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2467

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2468

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2469

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2470

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2471

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2472

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2473

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2474

jscode, 'Initial JS player signature function name', group='sig')

2475

2476

jsi = JSInterpreter(jscode)

2477

initial_function = jsi.extract_function(funcname)

2478

return lambda s: initial_function([s])

2479

2480

def _decrypt_signature(self, s, video_id, player_url):

2481

"""Turn the encrypted s field into a working signature"""

2482

2483

if player_url is None:

2484

raise ExtractorError('Cannot decrypt signature without player_url')

2485

2486

try:

2487

player_id = (player_url, self._signature_cache_id(s))

2488

if player_id not in self._player_cache:

2489

func = self._extract_signature_function(

2490

video_id, player_url, s

2491

)

2492

self._player_cache[player_id] = func

2493

func = self._player_cache[player_id]

2494

self._print_sig_code(func, s)

2495

return func(s)

2496

except Exception as e:

2497

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2498

2499

def _decrypt_nsig(self, s, video_id, player_url):

2500

"""Turn the encrypted n field into a working signature"""

2501

if player_url is None:

2502

raise ExtractorError('Cannot decrypt nsig without player_url')

2503

player_url = urljoin('https://www.youtube.com', player_url)

2504

2505

sig_id = ('nsig_value', s)

2506

if sig_id in self._player_cache:

2507

return self._player_cache[sig_id]

2508

2509

try:

2510

player_id = ('nsig', player_url)

2511

if player_id not in self._player_cache:

2512

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2513

func = self._player_cache[player_id]

2514

self._player_cache[sig_id] = func(s)

2515

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2516

return self._player_cache[sig_id]

2517

except Exception as e:

2518

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2519

2520

def _extract_n_function_name(self, jscode):

2521

nfunc, idx = self._search_regex(

2522

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2523

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2524

if not idx:

2525

return nfunc

2526

return json.loads(js_to_json(self._search_regex(

2527

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2528

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2529

2530

def _extract_n_function(self, video_id, player_url):

2531

player_id = self._extract_player_info(player_url)

2532

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2533

2534

if func_code:

2535

jsi = JSInterpreter(func_code)

2536

else:

2537

jscode = self._load_player(video_id, player_url)

2538

funcname = self._extract_n_function_name(jscode)

2539

jsi = JSInterpreter(jscode)

2540

func_code = jsi.extract_function_code(funcname)

2541

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2542

2543

if self.get_param('youtube_print_sig_code'):

2544

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2545

2546

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2547

2548

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2549

"""

2550

Extract signatureTimestamp (sts)

2551

Required to tell API what sig/player version is in use.

2552

"""

2553

sts = None

2554

if isinstance(ytcfg, dict):

2555

sts = int_or_none(ytcfg.get('STS'))

2556

2557

if not sts:

2558

# Attempt to extract from player

2559

if player_url is None:

2560

error_msg = 'Cannot extract signature timestamp without player_url.'

2561

if fatal:

2562

raise ExtractorError(error_msg)

2563

self.report_warning(error_msg)

2564

return

2565

code = self._load_player(video_id, player_url, fatal=fatal)

2566

if code:

2567

sts = int_or_none(self._search_regex(

2568

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2569

'JS player signature timestamp', group='sts', fatal=fatal))

2570

return sts

2571

2572

def _mark_watched(self, video_id, player_responses):

2573

playback_url = get_first(

2574

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2575

expected_type=url_or_none)

2576

if not playback_url:

2577

self.report_warning('Unable to mark watched')

2578

return

2579

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2580

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2581

2582

# cpn generation algorithm is reverse engineered from base.js.

2583

# In fact it works even with dummy cpn.

2584

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2585

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2592

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2593

2594

self._download_webpage(

2595

playback_url, video_id, 'Marking watched',

2596

'Unable to mark watched', fatal=False)

2597

2598

@staticmethod

2599

def _extract_urls(webpage):

2600

# Embedded YouTube player

2601

entries = [

2602

unescapeHTML(mobj.group('url'))

2603

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2614

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2615

\1''', webpage)]

2616

2617

# lazyYT YouTube embed

2618

entries.extend(list(map(

2619

unescapeHTML,

2620

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2621

2622

# Wordpress "YouTube Video Importer" plugin

2623

matches = re.findall(r'''(?x)<div[^>]+

2624

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2625

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2626

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2632

urls = YoutubeIE._extract_urls(webpage)

2633

return urls[0] if urls else None

2634

2635

@classmethod

2636

def extract_id(cls, url):

2637

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2638

if mobj is None:

2639

raise ExtractorError('Invalid URL: %s' % url)

2640

return mobj.group('id')

2641

2642

def _extract_chapters_from_json(self, data, duration):

2643

chapter_list = traverse_obj(

2644

data, (

2645

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2646

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2647

), expected_type=list)

2648

2649

return self._extract_chapters(

2650

chapter_list,

2651

chapter_time=lambda chapter: float_or_none(

2652

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2653

chapter_title=lambda chapter: traverse_obj(

2654

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2655

duration=duration)

2656

2657

def _extract_chapters_from_engagement_panel(self, data, duration):

2658

content_list = traverse_obj(

2659

data,

2660

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2661

expected_type=list, default=[])

2662

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2663

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2668

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2669

chapter_time, chapter_title, duration)

2670

for contents in content_list

2671

))), [])

2672

2673

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2674

chapters = []

2675

last_chapter = {'start_time': 0}

2676

for idx, chapter in enumerate(chapter_list or []):

2677

title = chapter_title(chapter)

2678

start_time = chapter_time(chapter)

2679

if start_time is None:

2680

continue

2681

last_chapter['end_time'] = start_time

2682

if start_time < last_chapter['start_time']:

2683

if idx == 1:

2684

chapters.pop()

2685

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2686

else:

2687

self.report_warning(f'Invalid start time for chapter "{title}"')

2688

continue

2689

last_chapter = {'start_time': start_time, 'title': title}

2690

chapters.append(last_chapter)

2691

last_chapter['end_time'] = duration

2692

return chapters

2693

2694

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2695

return self._parse_json(self._search_regex(

2696

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

2697

regex), webpage, name, default='{}'), video_id, fatal=False)

2698

2699

def _extract_comment(self, comment_renderer, parent=None):

2700

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2705

2706

# note: timestamp is an estimate calculated from the current time and time_text

2707

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2708

author = self._get_text(comment_renderer, 'authorText')

2709

author_id = try_get(comment_renderer,

2710

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2711

2712

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2713

lambda x: x['likeCount']), compat_str)) or 0

2714

author_thumbnail = try_get(comment_renderer,

2715

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2716

2717

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2718

is_favorited = 'creatorHeart' in (try_get(

2719

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2724

'time_text': time_text,

2725

'like_count': votes,

2726

'is_favorited': is_favorited,

2727

'author': author,

2728

'author_id': author_id,

2729

'author_thumbnail': author_thumbnail,

2730

'author_is_uploader': author_is_uploader,

2731

'parent': parent or 'root'

2732

}

2733

2734

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2735

2736

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2737

2738

def extract_header(contents):

2739

_continuation = None

2740

for content in contents:

2741

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2742

expected_comment_count = self._get_count(

2743

comments_header_renderer, 'countText', 'commentsCount')

2744

2745

if expected_comment_count:

2746

tracker['est_total'] = expected_comment_count

2747

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2748

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2749

2750

sort_menu_item = try_get(

2751

comments_header_renderer,

2752

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2753

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2754

2755

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2756

if not _continuation:

2757

continue

2758

2759

sort_text = str_or_none(sort_menu_item.get('title'))

2760

if not sort_text:

2761

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2762

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2767

if not parent:

2768

tracker['current_page_thread'] = 0

2769

for content in contents:

2770

if not parent and tracker['total_parent_comments'] >= max_parents:

2771

yield

2772

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2773

comment_renderer = get_first(

2774

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2775

expected_type=dict, default={})

2776

2777

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2782

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2783

yield comment

2784

2785

# Attempt to get the replies

2786

comment_replies_renderer = try_get(

2787

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2788

2789

if comment_replies_renderer:

2790

tracker['current_page_thread'] += 1

2791

comment_entries_iter = self._comment_entries(

2792

comment_replies_renderer, ytcfg, video_id,

2793

parent=comment.get('id'), tracker=tracker)

2794

for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):

2795

yield reply_comment

2796

2797

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2803

total_parent_comments=0,

2804

total_reply_comments=0)

2805

2806

# TODO: Deprecated

2807

# YouTube comments have a max depth of 2

2808

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2809

if max_depth:

2810

self._downloader.deprecation_warning(

2811

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2812

if max_depth == 1 and parent:

2813

return

2814

2815

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2816

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2817

2818

continuation = self._extract_continuation(root_continuation_data)

2819

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2820

if message and not parent:

2821

self.report_warning(message, video_id=video_id)

2822

2823

response = None

2824

is_first_continuation = parent is None

2825

2826

for page_num in itertools.count(0):

2827

if not continuation:

2828

break

2829

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2830

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2831

if page_num == 0:

2832

if is_first_continuation:

2833

note_prefix = 'Downloading comment section API JSON'

2834

else:

2835

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2836

tracker['current_page_thread'], comment_prog_str)

2837

else:

2838

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2839

' ' if parent else '', ' replies' if parent else '',

2840

page_num, comment_prog_str)

2841

2842

response = self._extract_response(

2843

item_id=None, query=continuation,

2844

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2845

check_get_keys='onResponseReceivedEndpoints')

2846

2847

continuation_contents = traverse_obj(

2848

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2849

2850

continuation = None

2851

for continuation_section in continuation_contents:

2852

continuation_items = traverse_obj(

2853

continuation_section,

2854

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2855

get_all=False, expected_type=list) or []

2856

if is_first_continuation:

2857

continuation = extract_header(continuation_items)

2858

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

def _get_comments(self, ytcfg, video_id, contents, webpage):

2872

"""Entry for comment extraction"""

2873

def _real_comment_extract(contents):

2874

renderer = next((

2875

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2876

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2877

yield from self._comment_entries(renderer, ytcfg, video_id)

2878

2879

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2880

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2881

2882

@staticmethod

2883

def _get_checkok_params():

2884

return {'contentCheckOk': True, 'racyCheckOk': True}

2885

2886

@classmethod

2887

def _generate_player_context(cls, sts=None):

2888

context = {

2889

'html5Preference': 'HTML5_PREF_WANTS',

2890

}

2891

if sts is not None:

2892

context['signatureTimestamp'] = sts

2893

return {

2894

'playbackContext': {

2895

'contentPlaybackContext': context

2896

},

2897

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2902

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2903

return True

2904

2905

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2906

AGE_GATE_REASONS = (

2907

'confirm your age', 'age-restricted', 'inappropriate', # reason

2908

'age_verification_required', 'age_check_required', # status

2909

)

2910

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2911

2912

@staticmethod

2913

def _is_unplayable(player_response):

2914

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2915

2916

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2917

2918

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2919

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2920

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2921

headers = self.generate_api_headers(

2922

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

2923

2924

yt_query = {'videoId': video_id}

2925

yt_query.update(self._generate_player_context(sts))

2926

return self._extract_response(

2927

item_id=video_id, ep='player', query=yt_query,

2928

ytcfg=player_ytcfg, headers=headers, fatal=True,

2929

default_client=client,

2930

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2931

) or None

2932

2933

def _get_requested_clients(self, url, smuggled_data):

2934

requested_clients = []

2935

default = ['android', 'web']

2936

allowed_clients = sorted(

2937

[client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],

2938

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

2939

for client in self._configuration_arg('player_client'):

2940

if client in allowed_clients:

2941

requested_clients.append(client)

2942

elif client == 'default':

2943

requested_clients.extend(default)

2944

elif client == 'all':

2945

requested_clients.extend(allowed_clients)

2946

else:

2947

self.report_warning(f'Skipping unsupported client {client}')

2948

if not requested_clients:

2949

requested_clients = default

2950

2951

if smuggled_data.get('is_music_url') or self.is_music_url(url):

2952

requested_clients.extend(

2953

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

2954

2955

return orderedSet(requested_clients)

2956

2957

def _extract_player_ytcfg(self, client, video_id):

2958

url = {

2959

'web_music': 'https://music.youtube.com',

2960

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())

2965

return self.extract_ytcfg(video_id, webpage) or {}

2966

2967

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

2968

initial_pr = None

2969

if webpage:

2970

initial_pr = self._extract_yt_initial_variable(

2971

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2972

video_id, 'initial player response')

2973

2974

all_clients = set(clients)

2975

clients = clients[::-1]

2976

prs = []

2977

2978

def append_client(*client_names):

2979

""" Append the first client name that exists but not already used """

2980

for client_name in client_names:

2981

actual_client = _split_innertube_client(client_name)[0]

2982

if actual_client in INNERTUBE_CLIENTS:

2983

if actual_client not in all_clients:

2984

clients.append(client_name)

2985

all_clients.add(actual_client)

2986

return

2987

2988

# Android player_response does not have microFormats which are needed for

2989

# extraction of some data. So we return the initial_pr with formats

2990

# stripped out even if not requested by the user

2991

# See: https://github.com/yt-dlp/yt-dlp/issues/501

2992

if initial_pr:

2993

pr = dict(initial_pr)

2994

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

2999

player_url = None

3000

while clients:

3001

client, base_client, variant = _split_innertube_client(clients.pop())

3002

player_ytcfg = master_ytcfg if client == 'web' else {}

3003

if 'configs' not in self._configuration_arg('player_skip'):

3004

player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg

3005

3006

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3007

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3008

if 'js' in self._configuration_arg('player_skip'):

3009

require_js_player = False

3010

player_url = None

3011

3012

if not player_url and not tried_iframe_fallback and require_js_player:

3013

player_url = self._download_player_url(video_id)

3014

tried_iframe_fallback = True

3015

3016

try:

3017

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3018

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3019

except ExtractorError as e:

3020

if last_error:

3021

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3029

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3030

append_client(f'{base_client}_creator')

3031

elif self._is_agegated(pr):

3032

if variant == 'tv_embedded':

3033

append_client(f'{base_client}_embedded')

3034

elif not variant:

3035

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3041

return prs, player_url

3042

3043

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3044

itags, stream_ids = {}, []

3045

itag_qualities, res_qualities = {}, {}

3046

q = qualities([

3047

# Normally tiny is the smallest video-only formats. But

3048

# audio-only formats with unknown quality may get tagged as tiny

3049

'tiny',

3050

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3051

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3052

])

3053

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3054

3055

for fmt in streaming_formats:

3056

if fmt.get('targetDurationSec'):

3057

continue

3058

3059

itag = str_or_none(fmt.get('itag'))

3060

audio_track = fmt.get('audioTrack') or {}

3061

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3062

if stream_id in stream_ids:

3063

continue

3064

3065

quality = fmt.get('quality')

3066

height = int_or_none(fmt.get('height'))

3067

if quality == 'tiny' or not quality:

3068

quality = fmt.get('audioQuality', '').lower() or quality

3069

# The 3gp format (17) in android client has a quality of "small",

3070

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3076

if height:

3077

res_qualities[height] = quality

3078

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3079

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3080

# number of fragment that would subsequently requested with (`&sq=N`)

3081

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3082

continue

3083

3084

fmt_url = fmt.get('url')

3085

if not fmt_url:

3086

sc = compat_parse_qs(fmt.get('signatureCipher'))

3087

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3088

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3089

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

3094

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

3095

fmt_url += '&' + sp + '=' + signature

3096

3097

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3102

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3103

except ExtractorError as e:

3104

self.report_warning(

3105

f'nsig extraction failed: You may experience throttling for some formats\n'

3106

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3111

stream_ids.append(stream_id)

3112

3113

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3114

language_preference = (

3115

10 if audio_track.get('audioIsDefault') and 10

3116

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3117

else -1)

3118

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3119

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3120

# Make sure to avoid false positives with small duration differences.

3121

# Eg: __2ABJjxzNo, ySuUZEjARPY

3122

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3123

if is_damaged:

3124

self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3125

dct = {

3126

'asr': int_or_none(fmt.get('audioSampleRate')),

3127

'filesize': int_or_none(fmt.get('contentLength')),

3128

'format_id': itag,

3129

'format_note': join_nonempty(

3130

'%s%s' % (audio_track.get('displayName') or '',

3131

' (default)' if language_preference > 0 else ''),

3132

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3133

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3134

'source_preference': -10 if throttled else -1,

3135

'fps': int_or_none(fmt.get('fps')) or None,

3136

'height': height,

3137

'quality': q(quality),

3138

'has_drm': bool(fmt.get('drmFamilies')),

3139

'tbr': tbr,

3140

'url': fmt_url,

3141

'width': int_or_none(fmt.get('width')),

3142

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3143

'desc' if language_preference < -1 else ''),

3144

'language_preference': language_preference,

3145

# Strictly de-prioritize damaged and 3gp formats

3146

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3147

}

3148

mime_mobj = re.match(

3149

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3150

if mime_mobj:

3151

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3152

dct.update(parse_codecs(mime_mobj.group(2)))

3153

no_audio = dct.get('acodec') == 'none'

3154

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3160

dct['downloader_options'] = {

3161

# Youtube throttles chunks >~10M

3162

'http_chunk_size': 10485760,

3163

}

3164

if dct.get('ext'):

3165

dct['container'] = dct['ext'] + '_dash'

3166

yield dct

3167

3168

live_from_start = is_live and self.get_param('live_from_start')

3169

skip_manifests = self._configuration_arg('skip')

3170

if not self.get_param('youtube_include_hls_manifest', True):

3171

skip_manifests.append('hls')

3172

get_dash = 'dash' not in skip_manifests and (

3173

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3174

get_hls = not live_from_start and 'hls' not in skip_manifests

3175

3176

def process_manifest_format(f, proto, itag):

3177

if itag in itags:

3178

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3179

return False

3180

itag = f'{itag}-{proto}'

3181

if itag:

3182

f['format_id'] = itag

3183

itags[itag] = proto

3184

3185

f['quality'] = next((

3186

q(qdict[val])

3187

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3188

if val in qdict), -1)

3189

return True

3190

3191

for sd in streaming_data:

3192

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3193

if hls_manifest_url:

3194

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3195

if process_manifest_format(f, 'hls', self._search_regex(

3196

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3197

yield f

3198

3199

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3200

if dash_manifest_url:

3201

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3202

if process_manifest_format(f, 'dash', f['format_id']):

3203

f['filesize'] = int_or_none(self._search_regex(

3204

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3205

if live_from_start:

3206

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3211

spec = get_first(

3212

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3213

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3218

args = args.split('#')

3219

counts = list(map(int_or_none, args[:5]))

3220

if len(args) != 8 or not all(counts):

3221

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3222

continue

3223

width, height, frame_count, cols, rows = counts

3224

N, sigh = args[6:]

3225

3226

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3227

fragment_count = frame_count / (cols * rows)

3228

fragment_duration = duration / fragment_count

3229

yield {

3230

'format_id': f'sb{i}',

3231

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3241

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3242

} for j in range(math.ceil(fragment_count))],

3243

}

3244

3245

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3246

webpage = None

3247

if 'webpage' not in self._configuration_arg('player_skip'):

3248

webpage = self._download_webpage(

3249

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

3250

3251

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3252

3253

player_responses, player_url = self._extract_player_responses(

3254

self._get_requested_clients(url, smuggled_data),

3255

video_id, webpage, master_ytcfg)

3256

3257

return webpage, master_ytcfg, player_responses, player_url

3258

3259

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3260

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3261

is_live = get_first(video_details, 'isLive')

3262

if is_live is None:

3263

is_live = get_first(live_broadcast_details, 'isLiveNow')

3264

3265

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3266

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3267

3268

return live_broadcast_details, is_live, streaming_data, formats

3269

3270

def _real_extract(self, url):

3271

url, smuggled_data = unsmuggle_url(url, {})

3272

video_id = self._match_id(url)

3273

3274

base_url = self.http_scheme() + '//www.youtube.com/'

3275

webpage_url = base_url + 'watch?v=' + video_id

3276

3277

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3278

3279

playability_statuses = traverse_obj(

3280

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3281

3282

trailer_video_id = get_first(

3283

playability_statuses,

3284

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3285

expected_type=str)

3286

if trailer_video_id:

3287

return self.url_result(

3288

trailer_video_id, self.ie_key(), trailer_video_id)

3289

3290

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3291

if webpage else (lambda x: None))

3292

3293

video_details = traverse_obj(

3294

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3295

microformats = traverse_obj(

3296

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3297

expected_type=dict, default=[])

3298

video_title = (

3299

get_first(video_details, 'title')

3300

or self._get_text(microformats, (..., 'title'))

3301

or search_meta(['og:title', 'twitter:title', 'title']))

3302

video_description = get_first(video_details, 'shortDescription')

3303

3304

multifeed_metadata_list = get_first(

3305

player_responses,

3306

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3307

expected_type=str)

3308

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3309

if self.get_param('noplaylist'):

3310

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3315

# Unquote should take place before split on comma (,) since textual

3316

# fields may contain comma as well (see

3317

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3318

feed_data = compat_parse_qs(

3319

compat_urllib_parse_unquote_plus(feed))

3320

3321

def feed_entry(name):

3322

return try_get(

3323

feed_data, lambda x: x[name][0], compat_str)

3324

3325

feed_id = feed_entry('id')

3326

if not feed_id:

3327

continue

3328

feed_title = feed_entry('title')

3329

title = video_title

3330

if feed_title:

3331

title += ' (%s)' % feed_title

3332

entries.append({

3333

'_type': 'url_transparent',

3334

'ie_key': 'Youtube',

3335

'url': smuggle_url(

3336

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3337

{'force_singlefeed': True}),

3338

'title': title,

3339

})

3340

feed_ids.append(feed_id)

3341

self.to_screen(

3342

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3343

% (', '.join(feed_ids), video_id))

3344

return self.playlist_result(

3345

entries, video_id, video_title, video_description)

3346

3347

duration = int_or_none(

3348

get_first(video_details, 'lengthSeconds')

3349

or get_first(microformats, 'lengthSeconds')

3350

or parse_duration(search_meta('duration'))) or None

3351

3352

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3353

video_id, microformats, video_details, player_responses, player_url, duration)

3354

3355

if not formats:

3356

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3357

self.report_drm(video_id)

3358

pemr = get_first(

3359

playability_statuses,

3360

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3361

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3362

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3363

if subreason:

3364

if subreason == 'The uploader has not made this video available in your country.':

3365

countries = get_first(microformats, 'availableCountries')

3366

if not countries:

3367

regions_allowed = search_meta('regionsAllowed')

3368

countries = regions_allowed.split(',') if regions_allowed else None

3369

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3370

reason += f'. {subreason}'

3371

if reason:

3372

self.raise_no_formats(reason, expected=True)

3373

3374

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3375

if not keywords and webpage:

3376

keywords = [

3377

unescapeHTML(m.group('content'))

3378

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3379

for keyword in keywords:

3380

if keyword.startswith('yt:stretch='):

3381

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3382

if mobj:

3383

# NB: float is intentional for forcing float division

3384

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3389

f['stretched_ratio'] = ratio

3390

break

3391

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3392

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3393

if thumbnail_url:

3394

thumbnails.append({

3395

'url': thumbnail_url,

3396

})

3397

original_thumbnails = thumbnails.copy()

3398

3399

# The best resolution thumbnails sometimes does not appear in the webpage

3400

# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340

3401

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3402

thumbnail_names = [

3403

'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',

3404

'hqdefault', 'hq1', 'hq2', 'hq3', '0',

3405

'mqdefault', 'mq1', 'mq2', 'mq3',

3406

'default', '1', '2', '3'

3407

]

3408

n_thumbnail_names = len(thumbnail_names)

3409

thumbnails.extend({

3410

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3411

video_id=video_id, name=name, ext=ext,

3412

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3413

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3414

for thumb in thumbnails:

3415

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3416

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3417

self._remove_duplicate_formats(thumbnails)

3418

self._downloader._sort_thumbnails(original_thumbnails)

3419

3420

category = get_first(microformats, 'category') or search_meta('genre')

3421

channel_id = str_or_none(

3422

get_first(video_details, 'channelId')

3423

or get_first(microformats, 'externalChannelId')

3424

or search_meta('channelId'))

3425

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3426

3427

live_content = get_first(video_details, 'isLiveContent')

3428

is_upcoming = get_first(video_details, 'isUpcoming')

3429

if is_live is None:

3430

if is_upcoming or live_content is False:

3431

is_live = False

3432

if is_upcoming is None and (live_content or is_live):

3433

is_upcoming = False

3434

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3435

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3436

if not duration and live_end_time and live_start_time:

3437

duration = live_end_time - live_start_time

3438

3439

if is_live and self.get_param('live_from_start'):

3440

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3441

3442

formats.extend(self._extract_storyboard(player_responses, duration))

3443

3444

# Source is given priority since formats that throttle are given lower source_preference

3445

# When throttling issue is fully fixed, remove this

3446

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3451

'formats': formats,

3452

'thumbnails': thumbnails,

3453

# The best thumbnail that we are sure exists. Prevents unnecessary

3454

# URL checking if user don't care about getting the best possible thumbnail

3455

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3456

'description': video_description,

3457

'uploader': get_first(video_details, 'author'),

3458

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3459

'uploader_url': owner_profile_url,

3460

'channel_id': channel_id,

3461

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3462

'duration': duration,

3463

'view_count': int_or_none(

3464

get_first((video_details, microformats), (..., 'viewCount'))

3465

or search_meta('interactionCount')),

3466

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3467

'age_limit': 18 if (

3468

get_first(microformats, 'isFamilySafe') is False

3469

or search_meta('isFamilyFriendly') == 'false'

3470

or search_meta('og:restrictions:age') == '18+') else 0,

3471

'webpage_url': webpage_url,

3472

'categories': [category] if category else None,

3473

'tags': keywords,

3474

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3475

'is_live': is_live,

3476

'was_live': (False if is_live or is_upcoming or live_content is False

3477

else None if is_live is None or is_upcoming is None

3478

else live_content),

3479

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3480

'release_timestamp': live_start_time,

3481

}

3482

3483

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3484

if pctr:

3485

def get_lang_code(track):

3486

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3487

or track.get('languageCode'))

3488

3489

# Converted into dicts to remove duplicates

3490

captions = {

3491

get_lang_code(sub): sub

3492

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3493

translation_languages = {

3494

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3495

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3496

3497

def process_language(container, base_url, lang_code, sub_name, query):

3498

lang_subs = container.setdefault(lang_code, [])

3499

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3510

for lang_code, caption_track in captions.items():

3511

base_url = caption_track.get('baseUrl')

3512

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3513

if not base_url:

3514

continue

3515

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3516

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3521

if not caption_track.get('isTranslatable'):

3522

continue

3523

for trans_code, trans_name in translation_languages.items():

3524

if not trans_code:

3525

continue

3526

orig_trans_code = trans_code

3527

if caption_track.get('kind') != 'asr':

3528

if 'translated_subs' in self._configuration_arg('skip'):

3529

continue

3530

trans_code += f'-{lang_code}'

3531

trans_name += format_field(lang_name, template=' from %s')

3532

# Add an "-orig" label to the original language so that it can be distinguished.

3533

# The subs are returned without "-orig" as well for compatibility

3534

if lang_code == f'a-{orig_trans_code}':

3535

process_language(

3536

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3537

# Setting tlang=lang returns damaged subtitles.

3538

process_language(automatic_captions, base_url, trans_code, trans_name,

3539

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3540

info['automatic_captions'] = automatic_captions

3541

info['subtitles'] = subtitles

3542

3543

parsed_url = compat_urllib_parse_urlparse(url)

3544

for component in [parsed_url.fragment, parsed_url.query]:

3545

query = compat_parse_qs(component)

3546

for k, v in query.items():

3547

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3548

d_k += '_time'

3549

if d_k not in info and k in s_ks:

3550

info[d_k] = parse_duration(query[k][0])

3551

3552

# Youtube Music Auto-generated description

3553

if video_description:

3554

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3555

if mobj:

3556

release_year = mobj.group('release_year')

3557

release_date = mobj.group('release_date')

3558

if release_date:

3559

release_date = release_date.replace('-', '')

3560

if not release_year:

3561

release_year = release_date[:4]

3562

info.update({

3563

'album': mobj.group('album'.strip()),

3564

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3565

'track': mobj.group('track').strip(),

3566

'release_date': release_date,

3567

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3573

webpage, self._YT_INITIAL_DATA_RE, video_id,

3574

'yt initial data')

3575

if not initial_data:

3576

query = {'videoId': video_id}

3577

query.update(self._get_checkok_params())

3578

initial_data = self._extract_response(

3579

item_id=video_id, ep='next', fatal=False,

3580

ytcfg=master_ytcfg, query=query,

3581

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3582

note='Downloading initial data API JSON')

3583

3584

try:

3585

# This will error if there is no livechat

3586

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3587

info.setdefault('subtitles', {})['live_chat'] = [{

3588

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

3589

'video_id': video_id,

3590

'ext': 'json',

3591

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

3592

}]

3593

except (KeyError, IndexError, TypeError):

pass

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3599

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3600

or None)

3601

3602

contents = traverse_obj(

3603

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3604

expected_type=list, default=[])

3605

3606

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3607

if vpir:

3608

stl = vpir.get('superTitleLink')

3609

if stl:

3610

stl = self._get_text(stl)

3611

if try_get(

3612

vpir,

3613

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3614

info['location'] = stl

3615

else:

3616

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

3617

if mobj:

3618

info.update({

3619

'series': mobj.group(1),

3620

'season_number': int(mobj.group(2)),

3621

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3626

list) or []):

3627

tbr = tlb.get('toggleButtonRenderer') or {}

3628

for getter, regex in [(

3629

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3630

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3631

lambda x: x['accessibility'],

3632

lambda x: x['accessibilityData']['accessibilityData'],

3633

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3634

label = (try_get(tbr, getter, dict) or {}).get('label')

3635

if label:

3636

mobj = re.match(regex, label)

3637

if mobj:

3638

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3639

break

3640

sbr_tooltip = try_get(

3641

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3642

if sbr_tooltip:

3643

like_count, dislike_count = sbr_tooltip.split(' / ')

3644

info.update({

3645

'like_count': str_to_int(like_count),

3646

'dislike_count': str_to_int(dislike_count),

3647

})

3648

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3649

if vsir:

3650

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3651

info.update({

3652

'channel': self._get_text(vor, 'title'),

3653

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3658

list) or []

3659

multiple_songs = False

3660

for row in rows:

3661

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3662

multiple_songs = True

3663

break

3664

for row in rows:

3665

mrr = row.get('metadataRowRenderer') or {}

3666

mrr_title = mrr.get('title')

3667

if not mrr_title:

3668

continue

3669

mrr_title = self._get_text(mrr, 'title')

3670

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3671

if mrr_title == 'License':

3672

info['license'] = mrr_contents_text

3673

elif not multiple_songs:

3674

if mrr_title == 'Album':

3675

info['album'] = mrr_contents_text

3676

elif mrr_title == 'Artist':

3677

info['artist'] = mrr_contents_text

3678

elif mrr_title == 'Song':

3679

info['track'] = mrr_contents_text

3680

3681

fallbacks = {

3682

'channel': 'uploader',

3683

'channel_id': 'uploader_id',

3684

'channel_url': 'uploader_url',

3685

}

3686

3687

# The upload date for scheduled, live and past live streams / premieres in microformats

3688

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3689

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3690

upload_date = (

3691

unified_strdate(get_first(microformats, 'uploadDate'))

3692

or unified_strdate(search_meta('uploadDate')))

3693

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3694

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')

3695

info['upload_date'] = upload_date

3696

3697

for to, frm in fallbacks.items():

3698

if not info.get(to):

3699

info[to] = info.get(frm)

3700

3701

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3707

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3708

is_membersonly = None

3709

is_premium = None

3710

if initial_data and is_private is not None:

3711

is_membersonly = False

3712

is_premium = False

3713

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3714

badge_labels = set()

3715

for content in contents:

3716

if not isinstance(content, dict):

3717

continue

3718

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3719

for badge_label in badge_labels:

3720

if badge_label.lower() == 'members only':

3721

is_membersonly = True

3722

elif badge_label.lower() == 'premium':

3723

is_premium = True

3724

elif badge_label.lower() == 'unlisted':

3725

is_unlisted = True

3726

3727

info['availability'] = self._availability(

3728

is_private=is_private,

3729

needs_premium=is_premium,

3730

needs_subscription=is_membersonly,

3731

needs_auth=info['age_limit'] >= 18,

3732

is_unlisted=None if is_private is None else is_unlisted)

3733

3734

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3735

3736

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3742

3743

@staticmethod

3744

def passthrough_smuggled_data(func):

3745

def _smuggle(entries, smuggled_data):

3746

for entry in entries:

3747

# TODO: Convert URL to music.youtube instead.

3748

# Do we need to passthrough any other smuggled_data?

3749

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3750

yield entry

3751

3752

@functools.wraps(func)

3753

def wrapper(self, url):

3754

url, smuggled_data = unsmuggle_url(url, {})

3755

if self.is_music_url(url):

3756

smuggled_data['is_music_url'] = True

3757

info_dict = func(self, url, smuggled_data)

3758

if smuggled_data and info_dict.get('entries'):

3759

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3764

channel_id = self._html_search_meta(

3765

'channelId', webpage, 'channel id', default=None)

3766

if channel_id:

3767

return channel_id

3768

channel_url = self._html_search_meta(

3769

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3770

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3771

'twitter:app:url:googleplay'), webpage, 'channel url')

3772

return self._search_regex(

3773

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3774

channel_url, 'channel id')

3775

3776

@staticmethod

3777

def _extract_basic_item_renderer(item):

3778

# Modified from _extract_grid_item_renderer

3779

known_basic_renderers = (

3780

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3781

)

3782

for key, renderer in item.items():

3783

if not isinstance(renderer, dict):

3784

continue

3785

elif key in known_basic_renderers:

3786

return renderer

3787

elif key.startswith('grid') and key.endswith('Renderer'):

3788

return renderer

3789

3790

def _grid_entries(self, grid_renderer):

3791

for item in grid_renderer['items']:

3792

if not isinstance(item, dict):

3793

continue

3794

renderer = self._extract_basic_item_renderer(item)

3795

if not isinstance(renderer, dict):

3796

continue

3797

title = self._get_text(renderer, 'title')

3798

3799

# playlist

3800

playlist_id = renderer.get('playlistId')

3801

if playlist_id:

3802

yield self.url_result(

3803

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3804

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3809

if video_id:

3810

yield self._extract_video(renderer)

3811

continue

3812

# channel

3813

channel_id = renderer.get('channelId')

3814

if channel_id:

3815

yield self.url_result(

3816

'https://www.youtube.com/channel/%s' % channel_id,

3817

ie=YoutubeTabIE.ie_key(), video_title=title)

3818

continue

3819

# generic endpoint URL support

3820

ep_url = urljoin('https://www.youtube.com/', try_get(

3821

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3822

compat_str))

3823

if ep_url:

3824

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3825

if ie.suitable(ep_url):

3826

yield self.url_result(

3827

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3828

break

3829

3830

def _music_reponsive_list_entry(self, renderer):

3831

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3832

if video_id:

3833

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3834

ie=YoutubeIE.ie_key(), video_id=video_id)

3835

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3836

if playlist_id:

3837

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3838

if video_id:

3839

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3840

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3841

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3842

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3843

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3844

if browse_id:

3845

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3846

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3847

3848

def _shelf_entries_from_content(self, shelf_renderer):

3849

content = shelf_renderer.get('content')

3850

if not isinstance(content, dict):

3851

return

3852

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3853

if renderer:

3854

# TODO: add support for nested playlists so each shelf is processed

3855

# as separate playlist

3856

# TODO: this includes only first N items

3857

for entry in self._grid_entries(renderer):

3858

yield entry

3859

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3865

ep = try_get(

3866

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3867

compat_str)

3868

shelf_url = urljoin('https://www.youtube.com', ep)

3869

if shelf_url:

3870

# Skipping links to another channels, note that checking for

3871

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3872

# will not work

3873

if skip_channels and '/channels?' in shelf_url:

3874

return

3875

title = self._get_text(shelf_renderer, 'title')

3876

yield self.url_result(shelf_url, video_title=title)

3877

# Shelf may not contain shelf URL, fallback to extraction from content

3878

for entry in self._shelf_entries_from_content(shelf_renderer):

3879

yield entry

3880

3881

def _playlist_entries(self, video_list_renderer):

3882

for content in video_list_renderer['contents']:

3883

if not isinstance(content, dict):

3884

continue

3885

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3886

if not isinstance(renderer, dict):

3887

continue

3888

video_id = renderer.get('videoId')

3889

if not video_id:

3890

continue

3891

yield self._extract_video(renderer)

3892

3893

def _rich_entries(self, rich_grid_renderer):

3894

renderer = try_get(

3895

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3896

video_id = renderer.get('videoId')

3897

if not video_id:

3898

return

3899

yield self._extract_video(renderer)

3900

3901

def _video_entry(self, video_renderer):

3902

video_id = video_renderer.get('videoId')

3903

if video_id:

3904

return self._extract_video(video_renderer)

3905

3906

def _hashtag_tile_entry(self, hashtag_tile_renderer):

3907

url = urljoin('https://youtube.com', traverse_obj(

3908

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

3909

if url:

3910

return self.url_result(

3911

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

3912

3913

def _post_thread_entries(self, post_thread_renderer):

3914

post_renderer = try_get(

3915

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3916

if not post_renderer:

3917

return

3918

# video attachment

3919

video_renderer = try_get(

3920

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3921

video_id = video_renderer.get('videoId')

3922

if video_id:

3923

entry = self._extract_video(video_renderer)

3924

if entry:

3925

yield entry

3926

# playlist attachment

3927

playlist_id = try_get(

3928

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3929

if playlist_id:

3930

yield self.url_result(

3931

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3932

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3933

# inline video links

3934

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3935

for run in runs:

3936

if not isinstance(run, dict):

3937

continue

3938

ep_url = try_get(

3939

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3940

if not ep_url:

3941

continue

3942

if not YoutubeIE.suitable(ep_url):

3943

continue

3944

ep_video_id = YoutubeIE._match_id(ep_url)

3945

if video_id == ep_video_id:

3946

continue

3947

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3948

3949

def _post_thread_continuation_entries(self, post_thread_continuation):

3950

contents = post_thread_continuation.get('contents')

3951

if not isinstance(contents, list):

3952

return

3953

for content in contents:

3954

renderer = content.get('backstagePostThreadRenderer')

3955

if not isinstance(renderer, dict):

3956

continue

3957

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3962

for content in contents:

3963

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3964

if video_renderer:

3965

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

3971

# continuation_list is modified in-place with continuation_list = [continuation_token]

3972

continuation_list[:] = [None]

3973

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3974

for content in contents:

3975

if not isinstance(content, dict):

3976

continue

3977

is_renderer = traverse_obj(

3978

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

3979

expected_type=dict)

3980

if not is_renderer:

3981

renderer = content.get('richItemRenderer')

3982

if renderer:

3983

for entry in self._rich_entries(renderer):

3984

yield entry

3985

continuation_list[0] = self._extract_continuation(parent_renderer)

3986

continue

3987

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3988

for isr_content in isr_contents:

3989

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3994

'gridRenderer': self._grid_entries,

3995

'reelShelfRenderer': self._grid_entries,

3996

'shelfRenderer': self._shelf_entries,

3997

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

3998

'backstagePostThreadRenderer': self._post_thread_entries,

3999

'videoRenderer': lambda x: [self._video_entry(x)],

4000

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4001

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4002

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4003

}

4004

for key, renderer in isr_content.items():

4005

if key not in known_renderers:

4006

continue

4007

for entry in known_renderers[key](renderer):

4008

if entry:

4009

yield entry

4010

continuation_list[0] = self._extract_continuation(renderer)

4011

break

4012

4013

if not continuation_list[0]:

4014

continuation_list[0] = self._extract_continuation(is_renderer)

4015

4016

if not continuation_list[0]:

4017

continuation_list[0] = self._extract_continuation(parent_renderer)

4018

4019

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4020

continuation_list = [None]

4021

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4022

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4027

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4028

for entry in extract_entries(parent_renderer):

4029

yield entry

4030

continuation = continuation_list[0]

4031

4032

for page_num in itertools.count(1):

4033

if not continuation:

4034

break

4035

headers = self.generate_api_headers(

4036

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4037

response = self._extract_response(

4038

item_id='%s page %s' % (item_id, page_num),

4039

query=continuation, headers=headers, ytcfg=ytcfg,

4040

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4045

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4046

visitor_data = self._extract_visitor_data(response) or visitor_data

4047

4048

known_continuation_renderers = {

4049

'playlistVideoListContinuation': self._playlist_entries,

4050

'gridContinuation': self._grid_entries,

4051

'itemSectionContinuation': self._post_thread_continuation_entries,

4052

'sectionListContinuation': extract_entries, # for feeds

4053

}

4054

continuation_contents = try_get(

4055

response, lambda x: x['continuationContents'], dict) or {}

4056

continuation_renderer = None

4057

for key, value in continuation_contents.items():

4058

if key not in known_continuation_renderers:

4059

continue

4060

continuation_renderer = value

4061

continuation_list = [None]

4062

for entry in known_continuation_renderers[key](continuation_renderer):

4063

yield entry

4064

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4065

break

4066

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4071

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4072

'gridVideoRenderer': (self._grid_entries, 'items'),

4073

'gridChannelRenderer': (self._grid_entries, 'items'),

4074

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4075

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4076

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4077

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4078

}

4079

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4080

continuation_items = try_get(

4081

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4082

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4083

video_items_renderer = None

4084

for key, value in continuation_item.items():

4085

if key not in known_renderers:

4086

continue

4087

video_items_renderer = {known_renderers[key][1]: continuation_items}

4088

continuation_list = [None]

4089

for entry in known_renderers[key][0](video_items_renderer):

4090

yield entry

4091

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4092

break

4093

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4099

for tab in tabs:

4100

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4101

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4106

4107

@classmethod

4108

def _extract_uploader(cls, data):

4109

uploader = {}

4110

renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4111

owner = try_get(

4112

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4113

if owner:

4114

uploader['uploader'] = owner.get('text')

4115

uploader['uploader_id'] = try_get(

4116

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4117

uploader['uploader_url'] = urljoin(

4118

'https://www.youtube.com/',

4119

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4120

return {k: v for k, v in uploader.items() if v is not None}

4121

4122

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4123

playlist_id = title = description = channel_url = channel_name = channel_id = None

4124

tags = []

4125

4126

selected_tab = self._extract_selected_tab(tabs)

4127

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4128

renderer = try_get(

4129

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4130

if renderer:

4131

channel_name = renderer.get('title')

4132

channel_url = renderer.get('channelUrl')

4133

channel_id = renderer.get('externalId')

4134

else:

4135

renderer = try_get(

4136

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4137

4138

if renderer:

4139

title = renderer.get('title')

4140

description = renderer.get('description', '')

4141

playlist_id = channel_id

4142

tags = renderer.get('keywords', '').split()

4143

4144

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4145

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4146

def _get_uncropped(url):

4147

return url_or_none((url or '').split('=')[0] + '=s0')

4148

4149

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4150

if avatar_thumbnails:

4151

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4152

if uncropped_avatar:

4153

avatar_thumbnails.append({

4154

'url': uncropped_avatar,

4155

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4160

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4161

for banner in channel_banners:

4162

banner['preference'] = -10

4163

4164

if channel_banners:

4165

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4166

if uncropped_banner:

4167

channel_banners.append({

4168

'url': uncropped_banner,

4169

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4174

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4175

4176

if playlist_id is None:

4177

playlist_id = item_id

4178

4179

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4180

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4181

if title is None:

4182

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4183

title += format_field(selected_tab, 'title', ' - %s')

4184

title += format_field(selected_tab, 'expandedText', ' - %s')

4185

4186

metadata = {

4187

'playlist_id': playlist_id,

4188

'playlist_title': title,

4189

'playlist_description': description,

4190

'uploader': channel_name,

4191

'uploader_id': channel_id,

4192

'uploader_url': channel_url,

4193

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4194

'tags': tags,

4195

'view_count': self._get_count(playlist_stats, 1),

4196

'availability': self._extract_availability(data),

4197

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4198

'playlist_count': self._get_count(playlist_stats, 0),

4199

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4200

}

4201

if not channel_id:

4202

metadata.update(self._extract_uploader(data))

4203

metadata.update({

4204

'channel': metadata['uploader'],

4205

'channel_id': metadata['uploader_id'],

4206

'channel_url': metadata['uploader_url']})

4207

return self.playlist_result(

4208

self._entries(

4209

selected_tab, playlist_id, ytcfg,

4210

self._extract_account_syncid(ytcfg, data),

4211

self._extract_visitor_data(data, ytcfg)),

4212

**metadata)

4213

4214

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):

4215

first_id = last_id = response = None

4216

for page_num in itertools.count(1):

4217

videos = list(self._playlist_entries(playlist))

4218

if not videos:

4219

return

4220

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4221

if start >= len(videos):

4222

return

4223

for video in videos[start:]:

4224

if video['id'] == first_id:

4225

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

4226

return

4227

yield video

4228

first_id = first_id or videos[0]['id']

4229

last_id = videos[-1]['id']

4230

watch_endpoint = try_get(

4231

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4232

headers = self.generate_api_headers(

4233

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4234

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4235

query = {

4236

'playlistId': playlist_id,

4237

'videoId': watch_endpoint.get('videoId') or last_id,

4238

'index': watch_endpoint.get('index') or len(videos),

4239

'params': watch_endpoint.get('params') or 'OAE%3D'

4240

}

4241

response = self._extract_response(

4242

item_id='%s page %d' % (playlist_id, page_num),

4243

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4244

check_get_keys='contents'

4245

)

4246

playlist = try_get(

4247

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4248

4249

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4250

title = playlist.get('title') or try_get(

4251

data, lambda x: x['titleText']['simpleText'], compat_str)

4252

playlist_id = playlist.get('playlistId') or item_id

4253

4254

# Delegating everything except mix playlists to regular tab-based playlist URL

4255

playlist_url = urljoin(url, try_get(

4256

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4257

compat_str))

4258

if playlist_url and playlist_url != url:

4259

return self.url_result(

4260

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4261

video_title=title)

4262

4263

return self.playlist_result(

4264

self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),

4265

playlist_id=playlist_id, playlist_title=title)

4266

4267

def _extract_availability(self, data):

4268

"""

4269

Gets the availability of a given playlist/tab.

4270

Note: Unless YouTube tells us explicitly, we do not assume it is public

4271

@param data: response

4272

"""

4273

is_private = is_unlisted = None

4274

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4275

badge_labels = self._extract_badges(renderer)

4276

4277

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4278

privacy_dropdown_entries = try_get(

4279

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4280

for renderer_dict in privacy_dropdown_entries:

4281

is_selected = try_get(

4282

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4283

if not is_selected:

4284

continue

4285

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4286

if label:

4287

badge_labels.add(label.lower())

4288

break

4289

4290

for badge_label in badge_labels:

4291

if badge_label == 'unlisted':

4292

is_unlisted = True

4293

elif badge_label == 'private':

4294

is_private = True

4295

elif badge_label == 'public':

4296

is_unlisted = is_private = False

4297

return self._availability(is_private, False, False, False, is_unlisted)

4298

4299

@staticmethod

4300

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4301

sidebar_renderer = try_get(

4302

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4303

for item in sidebar_renderer:

4304

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4309

"""

4310

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4311

"""

4312

browse_id = params = None

4313

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4314

if not renderer:

4315

return

4316

menu_renderer = try_get(

4317

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4318

for menu_item in menu_renderer:

4319

if not isinstance(menu_item, dict):

4320

continue

4321

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4322

text = try_get(

4323

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4324

if not text or text.lower() != 'show unavailable videos':

4325

continue

4326

browse_endpoint = try_get(

4327

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4328

browse_id = browse_endpoint.get('browseId')

4329

params = browse_endpoint.get('params')

4330

break

4331

4332

headers = self.generate_api_headers(

4333

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4334

visitor_data=self._extract_visitor_data(data, ytcfg))

4335

query = {

4336

'params': params or 'wgYCCAA=',

4337

'browseId': browse_id or 'VL%s' % item_id

4338

}

4339

return self._extract_response(

4340

item_id=item_id, headers=headers, query=query,

4341

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4342

note='Downloading API JSON with unavailable videos')

4343

4344

def _extract_webpage(self, url, item_id, fatal=True):

4345

retries = self.get_param('extractor_retries', 3)

4346

count = -1

4347

webpage = data = last_error = None

4348

while count < retries:

4349

count += 1

4350

# Sometimes youtube returns a webpage with incomplete ytInitialData

4351

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4352

if last_error:

4353

self.report_warning('%s. Retrying ...' % last_error)

4354

try:

4355

webpage = self._download_webpage(

4356

url, item_id,

4357

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4358

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4359

except ExtractorError as e:

4360

if isinstance(e.cause, network_exceptions):

4361

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4362

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4372

except ExtractorError as e:

4373

if fatal:

4374

raise

4375

self.report_warning(error_to_compat_str(e))

4376

break

4377

4378

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4379

break

4380

4381

last_error = 'Incomplete yt initial data received'

4382

if count >= retries:

4383

if fatal:

4384

raise ExtractorError(last_error)

4385

self.report_warning(last_error)

break

return webpage, data

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4391

data = None

4392

if 'webpage' not in self._configuration_arg('skip'):

4393

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4394

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4395

# Reject webpage data if redirected to home page without explicitly requesting

4396

selected_tab = self._extract_selected_tab(traverse_obj(

4397

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4398

if (url != 'https://www.youtube.com/feed/recommended'

4399

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4400

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4401

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4402

if fatal:

4403

raise ExtractorError(msg, expected=True)

4404

self.report_warning(msg, only_once=True)

4405

if not data:

4406

if not ytcfg and self.is_authenticated:

4407

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

4408

if 'authcheck' not in self._configuration_arg('skip') and fatal:

4409

raise ExtractorError(

4410

msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'

4411

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4412

expected=True)

4413

self.report_warning(msg, only_once=True)

4414

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4415

return data, ytcfg

4416

4417

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4418

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4419

resolve_response = self._extract_response(

4420

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4421

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4422

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4423

for ep_key, ep in endpoints.items():

4424

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4425

if params:

4426

return self._extract_response(

4427

item_id=item_id, query=params, ep=ep, headers=headers,

4428

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4429

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4430

err_note = 'Failed to resolve url (does the playlist exist?)'

4431

if fatal:

4432

raise ExtractorError(err_note, expected=True)

4433

self.report_warning(err_note, item_id)

4434

4435

_SEARCH_PARAMS = None

4436

4437

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4438

data = {'query': query}

4439

if params is NO_DEFAULT:

4440

params = self._SEARCH_PARAMS

4441

if params:

4442

data['params'] = params

4443

4444

content_keys = (

4445

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4446

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4447

# ytmusic search

4448

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4449

('continuationContents', ),

4450

)

4451

check_get_keys = tuple(set(keys[0] for keys in content_keys))

4452

4453

continuation_list = [None]

4454

for page_num in itertools.count(1):

4455

data.update(continuation_list[0] or {})

4456

search = self._extract_response(

4457

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4458

default_client=default_client, check_get_keys=check_get_keys)

4459

slr_contents = traverse_obj(search, *content_keys)

4460

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4461

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4466

IE_DESC = 'YouTube Tabs'

4467

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4476

(?P<not_channel>

4477

feed/|hashtag/|

4478

(?:playlist|watch)\?.*?\blist=

4479

)|

4480

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4485

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4486

}

4487

IE_NAME = 'youtube:tab'

4488

4489

_TESTS = [{

4490

'note': 'playlists, multipage',

4491

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4492

'playlist_mincount': 94,

4493

'info_dict': {

4494

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4495

'title': 'Igor Kleiner - Playlists',

4496

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4497

'uploader': 'Igor Kleiner',

4498

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4499

'channel': 'Igor Kleiner',

4500

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4501

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4502

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4503

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4504

'channel_follower_count': int

4505

},

4506

}, {

4507

'note': 'playlists, multipage, different order',

4508

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4509

'playlist_mincount': 94,

4510

'info_dict': {

4511

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4512

'title': 'Igor Kleiner - Playlists',

4513

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4514

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4515

'uploader': 'Igor Kleiner',

4516

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4517

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4518

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4519

'channel': 'Igor Kleiner',

4520

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4521

'channel_follower_count': int

4522

},

4523

}, {

4524

'note': 'playlists, series',

4525

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4526

'playlist_mincount': 5,

4527

'info_dict': {

4528

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4529

'title': '3Blue1Brown - Playlists',

4530

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4531

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4532

'uploader': '3Blue1Brown',

4533

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4534

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4535

'channel': '3Blue1Brown',

4536

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4537

'tags': ['Mathematics'],

4538

'channel_follower_count': int

4539

},

4540

}, {

4541

'note': 'playlists, singlepage',

4542

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4543

'playlist_mincount': 4,

4544

'info_dict': {

4545

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4546

'title': 'ThirstForScience - Playlists',

4547

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4548

'uploader': 'ThirstForScience',

4549

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4550

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4551

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4552

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4553

'tags': 'count:13',

4554

'channel': 'ThirstForScience',

4555

'channel_follower_count': int

4556

}

4557

}, {

4558

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4559

'only_matching': True,

4560

}, {

4561

'note': 'basic, single video playlist',

4562

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4563

'info_dict': {

4564

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4565

'uploader': 'Sergey M.',

4566

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4567

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4572

'channel': 'Sergey M.',

4573

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4574

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4575

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4580

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4581

'info_dict': {

4582

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4583

'uploader': 'Sergey M.',

4584

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4585

'title': 'youtube-dl empty playlist',

4586

'tags': [],

4587

'channel': 'Sergey M.',

4588

'description': '',

4589

'modified_date': '20160902',

4590

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4591

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4592

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4598

'info_dict': {

4599

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4600

'title': 'lex will - Home',

4601

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4602

'uploader': 'lex will',

4603

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4604

'channel': 'lex will',

4605

'tags': ['bible', 'history', 'prophesy'],

4606

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4607

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4608

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4609

'channel_follower_count': int

4610

},

4611

'playlist_mincount': 2,

4612

}, {

4613

'note': 'Videos tab',

4614

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4615

'info_dict': {

4616

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4617

'title': 'lex will - Videos',

4618

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4619

'uploader': 'lex will',

4620

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4621

'tags': ['bible', 'history', 'prophesy'],

4622

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4623

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4624

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4625

'channel': 'lex will',

4626

'channel_follower_count': int

4627

},

4628

'playlist_mincount': 975,

4629

}, {

4630

'note': 'Videos tab, sorted by popular',

4631

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4632

'info_dict': {

4633

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4634

'title': 'lex will - Videos',

4635

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4636

'uploader': 'lex will',

4637

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4638

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4639

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4640

'channel': 'lex will',

4641

'tags': ['bible', 'history', 'prophesy'],

4642

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4643

'channel_follower_count': int

4644

},

4645

'playlist_mincount': 199,

4646

}, {

4647

'note': 'Playlists tab',

4648

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4649

'info_dict': {

4650

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4651

'title': 'lex will - Playlists',

4652

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4653

'uploader': 'lex will',

4654

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4655

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4656

'channel': 'lex will',

4657

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4658

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4659

'tags': ['bible', 'history', 'prophesy'],

4660

'channel_follower_count': int

4661

},

4662

'playlist_mincount': 17,

4663

}, {

4664

'note': 'Community tab',

4665

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4666

'info_dict': {

4667

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4668

'title': 'lex will - Community',

4669

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4670

'uploader': 'lex will',

4671

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4672

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4673

'channel': 'lex will',

4674

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4675

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4676

'tags': ['bible', 'history', 'prophesy'],

4677

'channel_follower_count': int

4678

},

4679

'playlist_mincount': 18,

4680

}, {

4681

'note': 'Channels tab',

4682

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4683

'info_dict': {

4684

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4685

'title': 'lex will - Channels',

4686

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4687

'uploader': 'lex will',

4688

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4689

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4690

'channel': 'lex will',

4691

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4692

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4693

'tags': ['bible', 'history', 'prophesy'],

4694

'channel_follower_count': int

4695

},

4696

'playlist_mincount': 12,

4697

}, {

4698

'note': 'Search tab',

4699

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4700

'playlist_mincount': 40,

4701

'info_dict': {

4702

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4703

'title': '3Blue1Brown - Search - linear algebra',

4704

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4705

'uploader': '3Blue1Brown',

4706

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4707

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4708

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4709

'tags': ['Mathematics'],

4710

'channel': '3Blue1Brown',

4711

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4712

'channel_follower_count': int

4713

},

4714

}, {

4715

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4716

'only_matching': True,

4717

}, {

4718

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4719

'only_matching': True,

4720

}, {

4721

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4722

'only_matching': True,

4723

}, {

4724

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4725

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4726

'info_dict': {

4727

'title': '29C3: Not my department',

4728

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4729

'uploader': 'Christiaan008',

4730

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4731

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4732

'tags': [],

4733

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4734

'view_count': int,

4735

'modified_date': '20150605',

4736

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4737

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4738

'channel': 'Christiaan008',

4739

},

4740

'playlist_count': 96,

4741

}, {

4742

'note': 'Large playlist',

4743

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4744

'info_dict': {

4745

'title': 'Uploads from Cauchemar',

4746

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4747

'uploader': 'Cauchemar',

4748

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4749

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4750

'tags': [],

4751

'modified_date': r're:\d{8}',

4752

'channel': 'Cauchemar',

4753

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4754

'view_count': int,

4755

'description': '',

4756

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4757

},

4758

'playlist_mincount': 1123,

4759

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4760

}, {

4761

'note': 'even larger playlist, 8832 videos',

4762

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4763

'only_matching': True,

4764

}, {

4765

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4766

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4767

'info_dict': {

4768

'title': 'Uploads from Interstellar Movie',

4769

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4770

'uploader': 'Interstellar Movie',

4771

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4772

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4773

'tags': [],

4774

'view_count': int,

4775

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4776

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4777

'channel': 'Interstellar Movie',

4778

'description': '',

4779

'modified_date': r're:\d{8}',

4780

},

4781

'playlist_mincount': 21,

4782

}, {

4783

'note': 'Playlist with "show unavailable videos" button',

4784

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4785

'info_dict': {

4786

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4787

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4788

'uploader': 'Phim Siêu Nhân Nhật Bản',

4789

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4790

'view_count': int,

4791

'channel': 'Phim Siêu Nhân Nhật Bản',

4792

'tags': [],

4793

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4794

'description': '',

4795

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4796

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4797

'modified_date': r're:\d{8}',

4798

},

4799

'playlist_mincount': 200,

4800

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4801

}, {

4802

'note': 'Playlist with unavailable videos in page 7',

4803

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4804

'info_dict': {

4805

'title': 'Uploads from BlankTV',

4806

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4807

'uploader': 'BlankTV',

4808

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4809

'channel': 'BlankTV',

4810

'channel_url': 'https://www.youtube.com/c/blanktv',

4811

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4812

'view_count': int,

4813

'tags': [],

4814

'uploader_url': 'https://www.youtube.com/c/blanktv',

4815

'modified_date': r're:\d{8}',

4816

'description': '',

4817

},

4818

'playlist_mincount': 1000,

4819

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4820

}, {

4821

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4822

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4823

'info_dict': {

4824

'title': 'Data Analysis with Dr Mike Pound',

4825

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4826

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4827

'uploader': 'Computerphile',

4828

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4829

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4830

'tags': [],

4831

'view_count': int,

4832

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4833

'channel_url': 'https://www.youtube.com/user/Computerphile',

4834

'channel': 'Computerphile',

4835

},

4836

'playlist_mincount': 11,

4837

}, {

4838

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4839

'only_matching': True,

4840

}, {

4841

'note': 'Playlist URL that does not actually serve a playlist',

4842

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4847

'uploader': 'STREEM',

4848

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4849

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4850

'upload_date': '20150526',

4851

'license': 'Standard YouTube License',

4852

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4853

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4860

},

4861

'skip': 'This video is not available.',

4862

'add_ie': [YoutubeIE.ie_key()],

4863

}, {

4864

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4865

'only_matching': True,

4866

}, {

4867

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4868

'only_matching': True,

4869

}, {

4870

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4871

'info_dict': {

4872

'id': 'GgL890LIznQ', # This will keep changing

4873

'ext': 'mp4',

4874

'title': str,

4875

'uploader': 'Sky News',

4876

'uploader_id': 'skynews',

4877

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4878

'upload_date': r're:\d{8}',

4879

'description': str,

4880

'categories': ['News & Politics'],

4881

'tags': list,

4882

'like_count': int,

4883

'release_timestamp': 1642502819,

4884

'channel': 'Sky News',

4885

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4886

'age_limit': 0,

4887

'view_count': int,

4888

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

4889

'playable_in_embed': True,

4890

'release_date': '20220118',

4891

'availability': 'public',

4892

'live_status': 'is_live',

4893

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4894

'channel_follower_count': int

4895

},

4896

'params': {

4897

'skip_download': True,

4898

},

4899

'expected_warnings': ['Ignoring subtitle tracks found in '],

4900

}, {

4901

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4906

'uploader': 'The Young Turks',

4907

'uploader_id': 'TheYoungTurks',

4908

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4909

'upload_date': '20150715',

4910

'license': 'Standard YouTube License',

4911

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4912

'categories': ['News & Politics'],

4913

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4918

},

4919

'only_matching': True,

4920

}, {

4921

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4922

'only_matching': True,

4923

}, {

4924

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4925

'only_matching': True,

4926

}, {

4927

'note': 'A channel that is not live. Should raise error',

4928

'url': 'https://www.youtube.com/user/numberphile/live',

4929

'only_matching': True,

4930

}, {

4931

'url': 'https://www.youtube.com/feed/trending',

4932

'only_matching': True,

4933

}, {

4934

'url': 'https://www.youtube.com/feed/library',

4935

'only_matching': True,

4936

}, {

4937

'url': 'https://www.youtube.com/feed/history',

4938

'only_matching': True,

4939

}, {

4940

'url': 'https://www.youtube.com/feed/subscriptions',

4941

'only_matching': True,

4942

}, {

4943

'url': 'https://www.youtube.com/feed/watch_later',

4944

'only_matching': True,

4945

}, {

4946

'note': 'Recommended - redirects to home page.',

4947

'url': 'https://www.youtube.com/feed/recommended',

4948

'only_matching': True,

4949

}, {

4950

'note': 'inline playlist with not always working continuations',

4951

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

4952

'only_matching': True,

4953

}, {

4954

'url': 'https://www.youtube.com/course',

4955

'only_matching': True,

4956

}, {

4957

'url': 'https://www.youtube.com/zsecurity',

4958

'only_matching': True,

4959

}, {

4960

'url': 'http://www.youtube.com/NASAgovVideo/videos',

4961

'only_matching': True,

4962

}, {

4963

'url': 'https://www.youtube.com/TheYoungTurks/live',

4964

'only_matching': True,

4965

}, {

4966

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

4973

}, {

4974

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

4975

'only_matching': True,

4976

}, {

4977

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

4978

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4979

'only_matching': True

4980

}, {

4981

'note': '/browse/ should redirect to /channel/',

4982

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

4983

'only_matching': True

4984

}, {

4985

'note': 'VLPL, should redirect to playlist?list=PL...',

4986

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4987

'info_dict': {

4988

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4989

'uploader': 'NoCopyrightSounds',

4990

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

4991

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4992

'title': 'NCS Releases',

4993

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4994

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4995

'modified_date': r're:\d{8}',

4996

'view_count': int,

4997

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4998

'tags': [],

4999

'channel': 'NoCopyrightSounds',

5000

},

5001

'playlist_mincount': 166,

5002

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5003

}, {

5004

'note': 'Topic, should redirect to playlist?list=UU...',

5005

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5006

'info_dict': {

5007

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5008

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5009

'title': 'Uploads from Royalty Free Music - Topic',

5010

'uploader': 'Royalty Free Music - Topic',

5011

'tags': [],

5012

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5013

'channel': 'Royalty Free Music - Topic',

5014

'view_count': int,

5015

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5016

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5017

'modified_date': r're:\d{8}',

5018

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5019

'description': '',

5020

},

5021

'expected_warnings': [

5022

'The URL does not have a videos tab',

5023

r'[Uu]navailable videos (are|will be) hidden',

5024

],

5025

'playlist_mincount': 101,

5026

}, {

5027

'note': 'Topic without a UU playlist',

5028

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5029

'info_dict': {

5030

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5031

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5032

'tags': [],

5033

},

5034

'expected_warnings': [

5035

'the playlist redirect gave error',

5036

],

5037

'playlist_mincount': 9,

5038

}, {

5039

'note': 'Youtube music Album',

5040

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5041

'info_dict': {

5042

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5043

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5048

'modified_date': r're:\d{8}',

5049

},

5050

'playlist_count': 50,

5051

}, {

5052

'note': 'unlisted single video playlist',

5053

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5054

'info_dict': {

5055

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5056

'uploader': 'colethedj',

5057

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5058

'title': 'yt-dlp unlisted playlist test',

5059

'availability': 'unlisted',

5060

'tags': [],

5061

'modified_date': '20211208',

5062

'channel': 'colethedj',

5063

'view_count': int,

5064

'description': '',

5065

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5066

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5067

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5072

'url': 'https://www.youtube.com/feed/recommended',

5073

'info_dict': {

5074

'id': 'recommended',

5075

'title': 'recommended',

5076

'tags': [],

5077

},

5078

'playlist_mincount': 50,

5079

'params': {

5080

'skip_download': True,

5081

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5082

},

5083

}, {

5084

'note': 'API Fallback: /videos tab, sorted by oldest first',

5085

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5086

'info_dict': {

5087

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5088

'title': 'Cody\'sLab - Videos',

5089

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5090

'uploader': 'Cody\'sLab',

5091

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5092

'channel': 'Cody\'sLab',

5093

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5094

'tags': [],

5095

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5096

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5097

'channel_follower_count': int

5098

},

5099

'playlist_mincount': 650,

5100

'params': {

5101

'skip_download': True,

5102

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5103

},

5104

}, {

5105

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5106

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5107

'info_dict': {

5108

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5109

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5110

'title': 'Uploads from Royalty Free Music - Topic',

5111

'uploader': 'Royalty Free Music - Topic',

5112

'modified_date': r're:\d{8}',

5113

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5114

'description': '',

5115

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5116

'tags': [],

5117

'channel': 'Royalty Free Music - Topic',

5118

'view_count': int,

5119

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5120

},

5121

'expected_warnings': [

5122

'does not have a videos tab',

5123

r'[Uu]navailable videos (are|will be) hidden',

5124

],

5125

'playlist_mincount': 101,

5126

'params': {

5127

'skip_download': True,

5128

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5129

},

5130

}, {

5131

'note': 'non-standard redirect to regional channel',

5132

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5133

'only_matching': True

}]

@classmethod

def suitable(cls, url):

5138

return False if YoutubeIE.suitable(url) else super(

5139

YoutubeTabIE, cls).suitable(url)

5140

5141

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5142

5143

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5144

def _real_extract(self, url, smuggled_data):

5145

item_id = self._match_id(url)

5146

url = compat_urlparse.urlunparse(

5147

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5148

compat_opts = self.get_param('compat_opts', [])

5149

5150

def get_mobj(url):

5151

mobj = self._URL_RE.match(url).groupdict()

5152

mobj.update((k, '') for k, v in mobj.items() if v is None)

5153

return mobj

5154

5155

mobj, redirect_warning = get_mobj(url), None

5156

# Youtube returns incomplete data if tabname is not lower case

5157

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5158

if is_channel:

5159

if smuggled_data.get('is_music_url'):

5160

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5161

item_id = item_id[2:]

5162

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5163

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5164

mdata = self._extract_tab_endpoint(

5165

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5166

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5167

get_all=False, expected_type=compat_str)

5168

if not murl:

5169

raise ExtractorError('Failed to resolve album to playlist')

5170

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5171

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5172

pre = f'https://www.youtube.com/channel/{item_id}'

5173

5174

original_tab_name = tab

5175

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5176

# Home URLs should redirect to /videos/

5177

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5178

'To download only the videos in the home page, add a "/featured" to the URL')

5179

tab = '/videos'

5180

5181

url = ''.join((pre, tab, post))

5182

mobj = get_mobj(url)

5183

5184

# Handle both video/playlist URLs

5185

qs = parse_qs(url)

5186

video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]

5187

5188

if not video_id and mobj['not_channel'].startswith('watch'):

5189

if not playlist_id:

5190

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5191

raise ExtractorError('Unable to recognize tab page')

5192

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5193

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5194

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5195

mobj = get_mobj(url)

5196

5197

if video_id and playlist_id:

5198

if self.get_param('noplaylist'):

5199

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5200

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5201

ie=YoutubeIE.ie_key(), video_id=video_id)

5202

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5203

5204

data, ytcfg = self._extract_data(url, item_id)

5205

5206

# YouTube may provide a non-standard redirect to the regional channel

5207

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5208

redirect_url = traverse_obj(

5209

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5210

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5211

redirect_url = ''.join((

5212

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5213

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5214

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5215

5216

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5217

if tabs:

5218

selected_tab = self._extract_selected_tab(tabs)

5219

selected_tab_name = selected_tab.get('title', '').lower()

5220

if selected_tab_name == 'home':

5221

selected_tab_name = 'featured'

5222

requested_tab_name = mobj['tab'][1:]

5223

if 'no-youtube-channel-redirect' not in compat_opts:

5224

if requested_tab_name == 'live':

5225

# Live tab should have redirected to the video

5226

raise ExtractorError('The channel is not currently live', expected=True)

5227

if requested_tab_name not in ('', selected_tab_name):

5228

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5229

if not original_tab_name:

5230

if item_id[:2] == 'UC':

5231

# Topic channels don't have /videos. Use the equivalent playlist instead

5232

pl_id = f'UU{item_id[2:]}'

5233

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5234

try:

5235

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5236

except ExtractorError:

5237

redirect_warning += ' and the playlist redirect gave error'

5238

else:

5239

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5240

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5241

if selected_tab_name and selected_tab_name != requested_tab_name:

5242

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5243

else:

5244

raise ExtractorError(redirect_warning, expected=True)

5245

5246

if redirect_warning:

5247

self.to_screen(redirect_warning)

5248

self.write_debug(f'Final URL: {url}')

5249

5250

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5251

if 'no-youtube-unavailable-videos' not in compat_opts:

5252

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5253

self._extract_and_report_alerts(data, only_once=True)

5254

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5255

if tabs:

5256

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5257

5258

playlist = traverse_obj(

5259

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5260

if playlist:

5261

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5262

5263

video_id = traverse_obj(

5264

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5265

if video_id:

5266

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5267

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5268

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5269

ie=YoutubeIE.ie_key(), video_id=video_id)

5270

5271

raise ExtractorError('Unable to recognize tab page')

5272

5273

5274

class YoutubePlaylistIE(InfoExtractor):

5275

IE_DESC = 'YouTube playlists'

5276

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5287

)''' % {

5288

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5289

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5290

}

5291

IE_NAME = 'youtube:playlist'

5292

_TESTS = [{

5293

'note': 'issue #673',

5294

'url': 'PLBB231211A4F62143',

5295

'info_dict': {

5296

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5297

'id': 'PLBB231211A4F62143',

5298

'uploader': 'Wickman',

5299

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5300

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5301

'view_count': int,

5302

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5303

'modified_date': r're:\d{8}',

5304

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5305

'channel': 'Wickman',

5306

'tags': [],

5307

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5308

},

5309

'playlist_mincount': 29,

5310

}, {

5311

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5312

'info_dict': {

5313

'title': 'YDL_safe_search',

5314

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5315

},

5316

'playlist_count': 2,

5317

'skip': 'This playlist is private',

5318

}, {

5319

'note': 'embedded',

5320

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5325

'uploader': 'milan',

5326

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5327

'description': '',

5328

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5329

'tags': [],

5330

'modified_date': '20140919',

5331

'view_count': int,

5332

'channel': 'milan',

5333

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5334

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5335

},

5336

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5337

}, {

5338

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5339

'playlist_mincount': 654,

5340

'info_dict': {

5341

'title': '2018 Chinese New Singles (11/6 updated)',

5342

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5343

'uploader': 'LBK',

5344

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5345

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5346

'channel': 'LBK',

5347

'view_count': int,

5348

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5349

'tags': [],

5350

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5351

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5352

'modified_date': r're:\d{8}',

5353

},

5354

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5355

}, {

5356

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5357

'only_matching': True,

5358

}, {

5359

# music album playlist

5360

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5361

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5366

if YoutubeTabIE.suitable(url):

5367

return False

5368

from ..utils import parse_qs

5369

qs = parse_qs(url)

5370

if qs.get('v', [None])[0]:

5371

return False

5372

return super(YoutubePlaylistIE, cls).suitable(url)

5373

5374

def _real_extract(self, url):

5375

playlist_id = self._match_id(url)

5376

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5377

url = update_url_query(

5378

'https://www.youtube.com/playlist',

5379

parse_qs(url) or {'list': playlist_id})

5380

if is_music_url:

5381

url = smuggle_url(url, {'is_music_url': True})

5382

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5383

5384

5385

class YoutubeYtBeIE(InfoExtractor):

5386

IE_DESC = 'youtu.be'

5387

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5388

_TESTS = [{

5389

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5394

'uploader': 'Backus-Page House Museum',

5395

'uploader_id': 'backuspagemuseum',

5396

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5397

'upload_date': '20161008',

5398

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5399

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5404

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5405

'channel': 'Backus-Page House Museum',

5406

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5407

'live_status': 'not_live',

5408

'view_count': int,

5409

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5410

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5416

},

5417

}, {

5418

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5419

'only_matching': True,

5420

}]

5421

5422

def _real_extract(self, url):

5423

mobj = self._match_valid_url(url)

5424

video_id = mobj.group('id')

5425

playlist_id = mobj.group('playlist_id')

5426

return self.url_result(

5427

update_url_query('https://www.youtube.com/watch', {

5428

'v': video_id,

5429

'list': playlist_id,

5430

'feature': 'youtu.be',

5431

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5432

5433

5434

class YoutubeLivestreamEmbedIE(InfoExtractor):

5435

IE_DESC = 'YouTube livestream embeds'

5436

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5437

_TESTS = [{

5438

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5439

'only_matching': True,

5440

}]

5441

5442

def _real_extract(self, url):

5443

channel_id = self._match_id(url)

5444

return self.url_result(

5445

f'https://www.youtube.com/channel/{channel_id}/live',

5446

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5447

5448

5449

class YoutubeYtUserIE(InfoExtractor):

5450

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5451

IE_NAME = 'youtube:user'

5452

_VALID_URL = r'ytuser:(?P<id>.+)'

5453

_TESTS = [{

5454

'url': 'ytuser:phihag',

5455

'only_matching': True,

5456

}]

5457

5458

def _real_extract(self, url):

5459

user_id = self._match_id(url)

5460

return self.url_result(

5461

'https://www.youtube.com/user/%s/videos' % user_id,

5462

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5463

5464

5465

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5466

IE_NAME = 'youtube:favorites'

5467

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5468

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5469

_LOGIN_REQUIRED = True

5470

_TESTS = [{

5471

'url': ':ytfav',

5472

'only_matching': True,

5473

}, {

5474

'url': ':ytfavorites',

5475

'only_matching': True,

5476

}]

5477

5478

def _real_extract(self, url):

5479

return self.url_result(

5480

'https://www.youtube.com/playlist?list=LL',

5481

ie=YoutubeTabIE.ie_key())

5482

5483

5484

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5485

IE_DESC = 'YouTube search'

5486

IE_NAME = 'youtube:search'

5487

_SEARCH_KEY = 'ytsearch'

5488

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5489

_TESTS = [{

5490

'url': 'ytsearch5:youtube-dl test video',

5491

'playlist_count': 5,

5492

'info_dict': {

5493

'id': 'youtube-dl test video',

5494

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5500

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5501

_SEARCH_KEY = 'ytsearchdate'

5502

IE_DESC = 'YouTube search, newest videos first'

5503

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5504

_TESTS = [{

5505

'url': 'ytsearchdate5:youtube-dl test video',

5506

'playlist_count': 5,

5507

'info_dict': {

5508

'id': 'youtube-dl test video',

5509

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5515

IE_DESC = 'YouTube search URLs with sorting and filter support'

5516

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5517

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5518

_TESTS = [{

5519

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5520

'playlist_mincount': 5,

5521

'info_dict': {

5522

'id': 'youtube-dl test video',

5523

'title': 'youtube-dl test video',

5524

}

5525

}, {

5526

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5527

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5534

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5545

'only_matching': True,

5546

}]

5547

5548

def _real_extract(self, url):

5549

qs = parse_qs(url)

5550

query = (qs.get('search_query') or qs.get('q'))[0]

5551

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5552

5553

5554

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5555

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5556

IE_NAME = 'youtube:music:search_url'

5557

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5558

_TESTS = [{

5559

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5560

'playlist_count': 16,

5561

'info_dict': {

5562

'id': 'royalty free music',

5563

'title': 'royalty free music',

5564

}

5565

}, {

5566

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5567

'playlist_mincount': 30,

5568

'info_dict': {

5569

'id': 'royalty free music - songs',

5570

'title': 'royalty free music - songs',

5571

},

5572

'params': {'extract_flat': 'in_playlist'}

5573

}, {

5574

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5575

'playlist_mincount': 30,

5576

'info_dict': {

5577

'id': 'royalty free music - community playlists',

5578

'title': 'royalty free music - community playlists',

5579

},

5580

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5585

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5586

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5587

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5588

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5589

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5590

}

5591

5592

def _real_extract(self, url):

5593

qs = parse_qs(url)

5594

query = (qs.get('search_query') or qs.get('q'))[0]

5595

params = qs.get('sp', (None,))[0]

5596

if params:

5597

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5598

else:

5599

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5600

params = self._SECTIONS.get(section)

5601

if not params:

5602

section = None

5603

title = join_nonempty(query, section, delim=' - ')

5604

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5605

5606

5607

class YoutubeFeedsInfoExtractor(InfoExtractor):

5608

"""

5609

Base class for feed extractors

5610

Subclasses must define the _FEED_NAME property.

5611

"""

5612

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5618

5619

def _real_extract(self, url):

5620

return self.url_result(

5621

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5622

5623

5624

class YoutubeWatchLaterIE(InfoExtractor):

5625

IE_NAME = 'youtube:watchlater'

5626

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5627

_VALID_URL = r':ytwatchlater'

5628

_TESTS = [{

5629

'url': ':ytwatchlater',

5630

'only_matching': True,

5631

}]

5632

5633

def _real_extract(self, url):

5634

return self.url_result(

5635

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5636

5637

5638

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5639

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5640

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5641

_FEED_NAME = 'recommended'

5642

_LOGIN_REQUIRED = False

5643

_TESTS = [{

5644

'url': ':ytrec',

5645

'only_matching': True,

5646

}, {

5647

'url': ':ytrecommended',

5648

'only_matching': True,

5649

}, {

5650

'url': 'https://youtube.com',

5651

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5656

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5657

_VALID_URL = r':ytsub(?:scription)?s?'

5658

_FEED_NAME = 'subscriptions'

5659

_TESTS = [{

5660

'url': ':ytsubs',

5661

'only_matching': True,

5662

}, {

5663

'url': ':ytsubscriptions',

5664

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5669

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5670

_VALID_URL = r':ythis(?:tory)?'

5671

_FEED_NAME = 'history'

5672

_TESTS = [{

5673

'url': ':ythistory',

5674

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

5679

IE_NAME = 'youtube:truncated_url'

5680

IE_DESC = False # Do not list

5681

_VALID_URL = r'''(?x)

5682

(?:https?://)?

5683

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5684

(?:watch\?(?:

5685

feature=[a-z_]+|

5686

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5699

'only_matching': True,

5700

}, {

5701

'url': 'https://www.youtube.com/watch?',

5702

'only_matching': True,

5703

}, {

5704

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5705

'only_matching': True,

5706

}, {

5707

'url': 'https://www.youtube.com/watch?feature=foo',

5708

'only_matching': True,

5709

}, {

5710

'url': 'https://www.youtube.com/watch?hl=en-GB',

5711

'only_matching': True,

5712

}, {

5713

'url': 'https://www.youtube.com/watch?t=2372',

5714

'only_matching': True,

5715

}]

5716

5717

def _real_extract(self, url):

5718

raise ExtractorError(

5719

'Did you forget to quote the URL? Remember that & is a meta '

5720

'character in most shells, so you want to put the URL in quotes, '

5721

'like youtube-dl '

5722

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5723

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5728

IE_NAME = 'youtube:clip'

5729

IE_DESC = False # Do not list

5730

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5731

5732

def _real_extract(self, url):

5733

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5734

return self.url_result(url, 'Generic')

5735

5736

5737

class YoutubeTruncatedIDIE(InfoExtractor):

5738

IE_NAME = 'youtube:truncated_id'

5739

IE_DESC = False # Do not list

5740

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5741

5742

_TESTS = [{

5743

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5744

'only_matching': True,

5745

}]

5746

5747

def _real_extract(self, url):

5748

video_id = self._match_id(url)

5749

raise ExtractorError(

5750

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

5751

expected=True)