jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import calendar
	6	import copy
	7	import datetime
	8	import functools
	9	import hashlib
	10	import itertools
	11	import json
	12	import math
	13	import os.path
	14	import random
	15	import re
	16	import sys
	17	import time
	18	import traceback
	19	import threading
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from ..compat import (
	23	compat_chr,
	24	compat_HTTPError,
	25	compat_parse_qs,
	26	compat_str,
	27	compat_urllib_parse_unquote_plus,
	28	compat_urllib_parse_urlencode,
	29	compat_urllib_parse_urlparse,
	30	compat_urlparse,
	31	)
	32	from ..jsinterp import JSInterpreter
	33	from ..utils import (
	34	bug_reports_message,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	ExtractorError,
	40	float_or_none,
	41	format_field,
	42	get_first,
	43	int_or_none,
	44	is_html,
	45	join_nonempty,
	46	js_to_json,
	47	mimetype2ext,
	48	network_exceptions,
	49	NO_DEFAULT,
	50	orderedSet,
	51	parse_codecs,
	52	parse_count,
	53	parse_duration,
	54	parse_iso8601,
	55	parse_qs,
	56	qualities,
	57	remove_end,
	58	remove_start,
	59	smuggle_url,
	60	str_or_none,
	61	str_to_int,
	62	strftime_or_none,
	63	traverse_obj,
	64	try_get,
	65	unescapeHTML,
	66	unified_strdate,
	67	unified_timestamp,
	68	unsmuggle_url,
	69	update_url_query,
	70	url_or_none,
	71	urljoin,
	72	variadic,
	73	)
	74
	75
	76	# any clients starting with _ cannot be explicity requested by the user
	77	INNERTUBE_CLIENTS = {
	78	'web': {
	79	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	80	'INNERTUBE_CONTEXT': {
	81	'client': {
	82	'clientName': 'WEB',
	83	'clientVersion': '2.20211221.00.00',
	84	}
	85	},
	86	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	87	},
	88	'web_embedded': {
	89	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	90	'INNERTUBE_CONTEXT': {
	91	'client': {
	92	'clientName': 'WEB_EMBEDDED_PLAYER',
	93	'clientVersion': '1.20211215.00.01',
	94	},
	95	},
	96	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	97	},
	98	'web_music': {
	99	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	100	'INNERTUBE_HOST': 'music.youtube.com',
	101	'INNERTUBE_CONTEXT': {
	102	'client': {
	103	'clientName': 'WEB_REMIX',
	104	'clientVersion': '1.20211213.00.00',
	105	}
	106	},
	107	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	108	},
	109	'web_creator': {
	110	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	111	'INNERTUBE_CONTEXT': {
	112	'client': {
	113	'clientName': 'WEB_CREATOR',
	114	'clientVersion': '1.20211220.02.00',
	115	}
	116	},
	117	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	118	},
	119	'android': {
	120	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	121	'INNERTUBE_CONTEXT': {
	122	'client': {
	123	'clientName': 'ANDROID',
	124	'clientVersion': '16.49',
	125	}
	126	},
	127	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	128	'REQUIRE_JS_PLAYER': False
	129	},
	130	'android_embedded': {
	131	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	132	'INNERTUBE_CONTEXT': {
	133	'client': {
	134	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	135	'clientVersion': '16.49',
	136	},
	137	},
	138	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	139	'REQUIRE_JS_PLAYER': False
	140	},
	141	'android_music': {
	142	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	143	'INNERTUBE_CONTEXT': {
	144	'client': {
	145	'clientName': 'ANDROID_MUSIC',
	146	'clientVersion': '4.57',
	147	}
	148	},
	149	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	150	'REQUIRE_JS_PLAYER': False
	151	},
	152	'android_creator': {
	153	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	154	'INNERTUBE_CONTEXT': {
	155	'client': {
	156	'clientName': 'ANDROID_CREATOR',
	157	'clientVersion': '21.47',
	158	},
	159	},
	160	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	161	'REQUIRE_JS_PLAYER': False
	162	},
	163	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	164	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	165	'ios': {
	166	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	167	'INNERTUBE_CONTEXT': {
	168	'client': {
	169	'clientName': 'IOS',
	170	'clientVersion': '16.46',
	171	'deviceModel': 'iPhone14,3',
	172	}
	173	},
	174	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	175	'REQUIRE_JS_PLAYER': False
	176	},
	177	'ios_embedded': {
	178	'INNERTUBE_CONTEXT': {
	179	'client': {
	180	'clientName': 'IOS_MESSAGES_EXTENSION',
	181	'clientVersion': '16.46',
	182	'deviceModel': 'iPhone14,3',
	183	},
	184	},
	185	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	186	'REQUIRE_JS_PLAYER': False
	187	},
	188	'ios_music': {
	189	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	190	'INNERTUBE_CONTEXT': {
	191	'client': {
	192	'clientName': 'IOS_MUSIC',
	193	'clientVersion': '4.57',
	194	},
	195	},
	196	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	197	'REQUIRE_JS_PLAYER': False
	198	},
	199	'ios_creator': {
	200	'INNERTUBE_CONTEXT': {
	201	'client': {
	202	'clientName': 'IOS_CREATOR',
	203	'clientVersion': '21.47',
	204	},
	205	},
	206	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	207	'REQUIRE_JS_PLAYER': False
	208	},
	209	# mweb has 'ultralow' formats
	210	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	211	'mweb': {
	212	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	213	'INNERTUBE_CONTEXT': {
	214	'client': {
	215	'clientName': 'MWEB',
	216	'clientVersion': '2.20211221.01.00',
	217	}
	218	},
	219	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	220	},
	221	# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
	222	# See: https://github.com/zerodytrash/YouTube-Internal-Clients
	223	'tv_embedded': {
	224	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	225	'INNERTUBE_CONTEXT': {
	226	'client': {
	227	'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
	228	'clientVersion': '2.0',
	229	},
	230	},
	231	'INNERTUBE_CONTEXT_CLIENT_NAME': 85
	232	},
	233	}
	234
	235
	236	def _split_innertube_client(client_name):
	237	variant, *base = client_name.rsplit('.', 1)
	238	if base:
	239	return variant, base[0], variant
	240	base, *variant = client_name.split('_', 1)
	241	return client_name, base, variant[0] if variant else None
	242
	243
	244	def build_innertube_clients():
	245	THIRD_PARTY = {
	246	'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
	247	}
	248	BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
	249	priority = qualities(BASE_CLIENTS[::-1])
	250
	251	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	252	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	253	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	254	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	255	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	256
	257	_, base_client, variant = _split_innertube_client(client)
	258	ytcfg['priority'] = 10 * priority(base_client)
	259
	260	if not variant:
	261	INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
	262	embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	263	embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	264	embedscreen['priority'] -= 3
	265	elif variant == 'embedded':
	266	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
	267	ytcfg['priority'] -= 2
	268	else:
	269	ytcfg['priority'] -= 3
	270
	271
	272	build_innertube_clients()
	273
	274
	275	class YoutubeBaseInfoExtractor(InfoExtractor):
	276	"""Provide base functions for Youtube extractors"""
	277
	278	_RESERVED_NAMES = (
	279	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	280	r'shorts\|movies\|results\|search\|shared\|hashtag\|trending\|explore\|feed\|feeds\|'
	281	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	282	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	283
	284	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	285
	286	# _NETRC_MACHINE = 'youtube'
	287
	288	# If True it will raise an error if no login info is provided
	289	_LOGIN_REQUIRED = False
	290
	291	_INVIDIOUS_SITES = (
	292	# invidious-redirect websites
	293	r'(?:www\.)?redirect\.invidious\.io',
	294	r'(?:(?:www\|dev)\.)?invidio\.us',
	295	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
	296	r'(?:www\.)?invidious\.pussthecat\.org',
	297	r'(?:www\.)?invidious\.zee\.li',
	298	r'(?:www\.)?invidious\.ethibox\.fr',
	299	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	300	r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
	301	r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
	302	# youtube-dl invidious instances list
	303	r'(?:(?:www\|no)\.)?invidiou\.sh',
	304	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	305	r'(?:www\.)?invidious\.kabi\.tk',
	306	r'(?:www\.)?invidious\.mastodon\.host',
	307	r'(?:www\.)?invidious\.zapashcanon\.fr',
	308	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	309	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	310	r'(?:www\.)?invidious\.himiko\.cloud',
	311	r'(?:www\.)?invidious\.reallyancient\.tech',
	312	r'(?:www\.)?invidious\.tube',
	313	r'(?:www\.)?invidiou\.site',
	314	r'(?:www\.)?invidious\.site',
	315	r'(?:www\.)?invidious\.xyz',
	316	r'(?:www\.)?invidious\.nixnet\.xyz',
	317	r'(?:www\.)?invidious\.048596\.xyz',
	318	r'(?:www\.)?invidious\.drycat\.fr',
	319	r'(?:www\.)?inv\.skyn3t\.in',
	320	r'(?:www\.)?tube\.poal\.co',
	321	r'(?:www\.)?tube\.connect\.cafe',
	322	r'(?:www\.)?vid\.wxzm\.sx',
	323	r'(?:www\.)?vid\.mint\.lgbt',
	324	r'(?:www\.)?vid\.puffyan\.us',
	325	r'(?:www\.)?yewtu\.be',
	326	r'(?:www\.)?yt\.elukerio\.org',
	327	r'(?:www\.)?yt\.lelux\.fi',
	328	r'(?:www\.)?invidious\.ggc-project\.de',
	329	r'(?:www\.)?yt\.maisputain\.ovh',
	330	r'(?:www\.)?ytprivate\.com',
	331	r'(?:www\.)?invidious\.13ad\.de',
	332	r'(?:www\.)?invidious\.toot\.koeln',
	333	r'(?:www\.)?invidious\.fdn\.fr',
	334	r'(?:www\.)?watch\.nettohikari\.com',
	335	r'(?:www\.)?invidious\.namazso\.eu',
	336	r'(?:www\.)?invidious\.silkky\.cloud',
	337	r'(?:www\.)?invidious\.exonip\.de',
	338	r'(?:www\.)?invidious\.riverside\.rocks',
	339	r'(?:www\.)?invidious\.blamefran\.net',
	340	r'(?:www\.)?invidious\.moomoo\.de',
	341	r'(?:www\.)?ytb\.trom\.tf',
	342	r'(?:www\.)?yt\.cyberhost\.uk',
	343	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	344	r'(?:www\.)?qklhadlycap4cnod\.onion',
	345	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	346	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	347	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	348	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	349	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	350	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	351	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	352	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	353	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	354	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	355	)
	356
	357	def _initialize_consent(self):
	358	cookies = self._get_cookies('https://www.youtube.com/')
	359	if cookies.get('__Secure-3PSID'):
	360	return
	361	consent_id = None
	362	consent = cookies.get('CONSENT')
	363	if consent:
	364	if 'YES' in consent.value:
	365	return
	366	consent_id = self._search_regex(
	367	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	368	if not consent_id:
	369	consent_id = random.randint(100, 999)
	370	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	371
	372	def _initialize_pref(self):
	373	cookies = self._get_cookies('https://www.youtube.com/')
	374	pref_cookie = cookies.get('PREF')
	375	pref = {}
	376	if pref_cookie:
	377	try:
	378	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	379	except ValueError:
	380	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	381	pref.update({'hl': 'en', 'tz': 'UTC'})
	382	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	383
	384	def _real_initialize(self):
	385	self._initialize_pref()
	386	self._initialize_consent()
	387	if (self._LOGIN_REQUIRED
	388	and self.get_param('cookiefile') is None
	389	and self.get_param('cookiesfrombrowser') is None):
	390	self.raise_login_required('Login details are needed to download this content', method='cookies')
	391
	392	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	393	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	394	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	395
	396	def _get_default_ytcfg(self, client='web'):
	397	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	398
	399	def _get_innertube_host(self, client='web'):
	400	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	401
	402	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	403	# try_get but with fallback to default ytcfg client values when present
	404	_func = lambda y: try_get(y, getter, expected_type)
	405	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	406
	407	def _extract_client_name(self, ytcfg, default_client='web'):
	408	return self._ytcfg_get_safe(
	409	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	410	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	411
	412	def _extract_client_version(self, ytcfg, default_client='web'):
	413	return self._ytcfg_get_safe(
	414	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	415	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	416
	417	def _extract_api_key(self, ytcfg=None, default_client='web'):
	418	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	419
	420	def _extract_context(self, ytcfg=None, default_client='web'):
	421	context = get_first(
	422	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	423	# Enforce language and tz for extraction
	424	client_context = traverse_obj(context, 'client', expected_type=dict, default={})
	425	client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
	426	return context
	427
	428	_SAPISID = None
	429
	430	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	431	time_now = round(time.time())
	432	if self._SAPISID is None:
	433	yt_cookies = self._get_cookies('https://www.youtube.com')
	434	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	435	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	436	sapisid_cookie = dict_get(
	437	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	438	if sapisid_cookie and sapisid_cookie.value:
	439	self._SAPISID = sapisid_cookie.value
	440	self.write_debug('Extracted SAPISID cookie')
	441	# SAPISID cookie is required if not already present
	442	if not yt_cookies.get('SAPISID'):
	443	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	444	self._set_cookie(
	445	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	446	else:
	447	self._SAPISID = False
	448	if not self._SAPISID:
	449	return None
	450	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	451	sapisidhash = hashlib.sha1(
	452	f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
	453	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	454
	455	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	456	note='Downloading API JSON', errnote='Unable to download API page',
	457	context=None, api_key=None, api_hostname=None, default_client='web'):
	458
	459	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	460	data.update(query)
	461	real_headers = self.generate_api_headers(default_client=default_client)
	462	real_headers.update({'content-type': 'application/json'})
	463	if headers:
	464	real_headers.update(headers)
	465	return self._download_json(
	466	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	467	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	468	data=json.dumps(data).encode('utf8'), headers=real_headers,
	469	query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
	470
	471	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	472	data = self._search_regex(
	473	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	474	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	475	if data:
	476	return self._parse_json(data, item_id, fatal=fatal)
	477
	478	@staticmethod
	479	def _extract_session_index(*data):
	480	"""
	481	Index of current account in account list.
	482	See: https://github.com/yt-dlp/yt-dlp/pull/519
	483	"""
	484	for ytcfg in data:
	485	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	486	if session_index is not None:
	487	return session_index
	488
	489	# Deprecated?
	490	def _extract_identity_token(self, ytcfg=None, webpage=None):
	491	if ytcfg:
	492	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	493	if token:
	494	return token
	495	if webpage:
	496	return self._search_regex(
	497	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	498	'identity token', default=None, fatal=False)
	499
	500	@staticmethod

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import time

import traceback

import threading

from .common import InfoExtractor, SearchInfoExtractor

22

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

28

compat_urllib_parse_urlencode,

29

compat_urllib_parse_urlparse,

30

compat_urlparse,

31

)

32

from ..jsinterp import JSInterpreter

33

from ..utils import (

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

format_field,

get_first,

int_or_none,

is_html,

join_nonempty,

js_to_json,

mimetype2ext,

network_exceptions,

NO_DEFAULT,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

# any clients starting with _ cannot be explicity requested by the user

77

INNERTUBE_CLIENTS = {

78

'web': {

79

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

80

'INNERTUBE_CONTEXT': {

81

'client': {

82

'clientName': 'WEB',

83

'clientVersion': '2.20211221.00.00',

84

}

85

},

86

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

87

},

88

'web_embedded': {

89

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

90

'INNERTUBE_CONTEXT': {

91

'client': {

92

'clientName': 'WEB_EMBEDDED_PLAYER',

93

'clientVersion': '1.20211215.00.01',

94

},

95

},

96

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

97

},

98

'web_music': {

99

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

100

'INNERTUBE_HOST': 'music.youtube.com',

101

'INNERTUBE_CONTEXT': {

102

'client': {

103

'clientName': 'WEB_REMIX',

104

'clientVersion': '1.20211213.00.00',

105

}

106

},

107

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

108

},

109

'web_creator': {

110

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

111

'INNERTUBE_CONTEXT': {

112

'client': {

113

'clientName': 'WEB_CREATOR',

114

'clientVersion': '1.20211220.02.00',

115

}

116

},

117

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

118

},

119

'android': {

120

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

121

'INNERTUBE_CONTEXT': {

122

'client': {

123

'clientName': 'ANDROID',

124

'clientVersion': '16.49',

125

}

126

},

127

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

128

'REQUIRE_JS_PLAYER': False

129

},

130

'android_embedded': {

131

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

132

'INNERTUBE_CONTEXT': {

133

'client': {

134

'clientName': 'ANDROID_EMBEDDED_PLAYER',

135

'clientVersion': '16.49',

136

},

137

},

138

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

139

'REQUIRE_JS_PLAYER': False

140

},

141

'android_music': {

142

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

143

'INNERTUBE_CONTEXT': {

144

'client': {

145

'clientName': 'ANDROID_MUSIC',

146

'clientVersion': '4.57',

147

}

148

},

149

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

150

'REQUIRE_JS_PLAYER': False

151

},

152

'android_creator': {

153

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

154

'INNERTUBE_CONTEXT': {

155

'client': {

156

'clientName': 'ANDROID_CREATOR',

157

'clientVersion': '21.47',

158

},

159

},

160

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

161

'REQUIRE_JS_PLAYER': False

162

},

163

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

164

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

165

'ios': {

166

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

167

'INNERTUBE_CONTEXT': {

168

'client': {

169

'clientName': 'IOS',

170

'clientVersion': '16.46',

171

'deviceModel': 'iPhone14,3',

172

}

173

},

174

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

175

'REQUIRE_JS_PLAYER': False

176

},

177

'ios_embedded': {

178

'INNERTUBE_CONTEXT': {

179

'client': {

180

'clientName': 'IOS_MESSAGES_EXTENSION',

181

'clientVersion': '16.46',

182

'deviceModel': 'iPhone14,3',

183

},

184

},

185

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

186

'REQUIRE_JS_PLAYER': False

187

},

188

'ios_music': {

189

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

190

'INNERTUBE_CONTEXT': {

191

'client': {

192

'clientName': 'IOS_MUSIC',

193

'clientVersion': '4.57',

194

},

195

},

196

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

197

'REQUIRE_JS_PLAYER': False

198

},

199

'ios_creator': {

200

'INNERTUBE_CONTEXT': {

201

'client': {

202

'clientName': 'IOS_CREATOR',

203

'clientVersion': '21.47',

204

},

205

},

206

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

207

'REQUIRE_JS_PLAYER': False

208

},

209

# mweb has 'ultralow' formats

210

# See: https://github.com/yt-dlp/yt-dlp/pull/557

211

'mweb': {

212

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

213

'INNERTUBE_CONTEXT': {

214

'client': {

215

'clientName': 'MWEB',

216

'clientVersion': '2.20211221.01.00',

217

}

218

},

219

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

220

},

221

# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)

222

# See: https://github.com/zerodytrash/YouTube-Internal-Clients

223

'tv_embedded': {

224

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

225

'INNERTUBE_CONTEXT': {

226

'client': {

227

'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',

228

'clientVersion': '2.0',

229

},

230

},

231

'INNERTUBE_CONTEXT_CLIENT_NAME': 85

},

}

def _split_innertube_client(client_name):

237

variant, *base = client_name.rsplit('.', 1)

238

if base:

239

return variant, base[0], variant

240

base, *variant = client_name.split('_', 1)

241

return client_name, base, variant[0] if variant else None

242

243

244

def build_innertube_clients():

245

THIRD_PARTY = {

246

'embedUrl': 'https://www.youtube.com/', # Can be any valid URL

247

}

248

BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')

249

priority = qualities(BASE_CLIENTS[::-1])

250

251

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

252

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

253

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

254

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

255

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

256

257

_, base_client, variant = _split_innertube_client(client)

258

ytcfg['priority'] = 10 * priority(base_client)

259

260

if not variant:

261

INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)

262

embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

263

embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

264

embedscreen['priority'] -= 3

265

elif variant == 'embedded':

266

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY

267

ytcfg['priority'] -= 2

268

else:

269

ytcfg['priority'] -= 3

270

271

272

build_innertube_clients()

273

274

275

class YoutubeBaseInfoExtractor(InfoExtractor):

276

"""Provide base functions for Youtube extractors"""

277

278

_RESERVED_NAMES = (

279

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

285

286

# _NETRC_MACHINE = 'youtube'

287

288

# If True it will raise an error if no login info is provided

289

_LOGIN_REQUIRED = False

290

291

_INVIDIOUS_SITES = (

292

# invidious-redirect websites

293

r'(?:www\.)?redirect\.invidious\.io',

294

r'(?:(?:www|dev)\.)?invidio\.us',

295

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

296

r'(?:www\.)?invidious\.pussthecat\.org',

297

r'(?:www\.)?invidious\.zee\.li',

298

r'(?:www\.)?invidious\.ethibox\.fr',

299

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

300

r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',

301

r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',

302

# youtube-dl invidious instances list

303

r'(?:(?:www|no)\.)?invidiou\.sh',

304

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

305

r'(?:www\.)?invidious\.kabi\.tk',

306

r'(?:www\.)?invidious\.mastodon\.host',

307

r'(?:www\.)?invidious\.zapashcanon\.fr',

308

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

309

r'(?:www\.)?invidious\.tinfoil-hat\.net',

310

r'(?:www\.)?invidious\.himiko\.cloud',

311

r'(?:www\.)?invidious\.reallyancient\.tech',

312

r'(?:www\.)?invidious\.tube',

313

r'(?:www\.)?invidiou\.site',

314

r'(?:www\.)?invidious\.site',

315

r'(?:www\.)?invidious\.xyz',

316

r'(?:www\.)?invidious\.nixnet\.xyz',

317

r'(?:www\.)?invidious\.048596\.xyz',

318

r'(?:www\.)?invidious\.drycat\.fr',

319

r'(?:www\.)?inv\.skyn3t\.in',

320

r'(?:www\.)?tube\.poal\.co',

321

r'(?:www\.)?tube\.connect\.cafe',

322

r'(?:www\.)?vid\.wxzm\.sx',

323

r'(?:www\.)?vid\.mint\.lgbt',

324

r'(?:www\.)?vid\.puffyan\.us',

325

r'(?:www\.)?yewtu\.be',

326

r'(?:www\.)?yt\.elukerio\.org',

327

r'(?:www\.)?yt\.lelux\.fi',

328

r'(?:www\.)?invidious\.ggc-project\.de',

329

r'(?:www\.)?yt\.maisputain\.ovh',

330

r'(?:www\.)?ytprivate\.com',

331

r'(?:www\.)?invidious\.13ad\.de',

332

r'(?:www\.)?invidious\.toot\.koeln',

333

r'(?:www\.)?invidious\.fdn\.fr',

334

r'(?:www\.)?watch\.nettohikari\.com',

335

r'(?:www\.)?invidious\.namazso\.eu',

336

r'(?:www\.)?invidious\.silkky\.cloud',

337

r'(?:www\.)?invidious\.exonip\.de',

338

r'(?:www\.)?invidious\.riverside\.rocks',

339

r'(?:www\.)?invidious\.blamefran\.net',

340

r'(?:www\.)?invidious\.moomoo\.de',

341

r'(?:www\.)?ytb\.trom\.tf',

342

r'(?:www\.)?yt\.cyberhost\.uk',

343

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

344

r'(?:www\.)?qklhadlycap4cnod\.onion',

345

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

346

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

347

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

348

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

349

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

350

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

351

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

352

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

353

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

354

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

355

)

356

357

def _initialize_consent(self):

358

cookies = self._get_cookies('https://www.youtube.com/')

359

if cookies.get('__Secure-3PSID'):

360

return

361

consent_id = None

362

consent = cookies.get('CONSENT')

363

if consent:

364

if 'YES' in consent.value:

365

return

366

consent_id = self._search_regex(

367

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

368

if not consent_id:

369

consent_id = random.randint(100, 999)

370

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

371

372

def _initialize_pref(self):

373

cookies = self._get_cookies('https://www.youtube.com/')

374

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

379

except ValueError:

380

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

381

pref.update({'hl': 'en', 'tz': 'UTC'})

382

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

383

384

def _real_initialize(self):

385

self._initialize_pref()

386

self._initialize_consent()

387

if (self._LOGIN_REQUIRED

388

and self.get_param('cookiefile') is None

389

and self.get_param('cookiesfrombrowser') is None):

390

self.raise_login_required('Login details are needed to download this content', method='cookies')

391

392

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

393

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

394

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

395

396

def _get_default_ytcfg(self, client='web'):

397

return copy.deepcopy(INNERTUBE_CLIENTS[client])

398

399

def _get_innertube_host(self, client='web'):

400

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

401

402

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

403

# try_get but with fallback to default ytcfg client values when present

404

_func = lambda y: try_get(y, getter, expected_type)

405

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

406

407

def _extract_client_name(self, ytcfg, default_client='web'):

408

return self._ytcfg_get_safe(

409

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

410

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

411

412

def _extract_client_version(self, ytcfg, default_client='web'):

413

return self._ytcfg_get_safe(

414

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

415

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

416

417

def _extract_api_key(self, ytcfg=None, default_client='web'):

418

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

419

420

def _extract_context(self, ytcfg=None, default_client='web'):

421

context = get_first(

422

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

423

# Enforce language and tz for extraction

424

client_context = traverse_obj(context, 'client', expected_type=dict, default={})

425

client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

431

time_now = round(time.time())

432

if self._SAPISID is None:

433

yt_cookies = self._get_cookies('https://www.youtube.com')

434

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

435

# See: https://github.com/yt-dlp/yt-dlp/issues/393

436

sapisid_cookie = dict_get(

437

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

438

if sapisid_cookie and sapisid_cookie.value:

439

self._SAPISID = sapisid_cookie.value

440

self.write_debug('Extracted SAPISID cookie')

441

# SAPISID cookie is required if not already present

442

if not yt_cookies.get('SAPISID'):

443

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

444

self._set_cookie(

445

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

446

else:

447

self._SAPISID = False

448

if not self._SAPISID:

449

return None

450

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

451

sapisidhash = hashlib.sha1(

452

f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()

453

return f'SAPISIDHASH {time_now}_{sapisidhash}'

454

455

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

456

note='Downloading API JSON', errnote='Unable to download API page',

457

context=None, api_key=None, api_hostname=None, default_client='web'):

458

459

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

460

data.update(query)

461

real_headers = self.generate_api_headers(default_client=default_client)

462

real_headers.update({'content-type': 'application/json'})

463

if headers:

464

real_headers.update(headers)

465

return self._download_json(

466

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

467

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

468

data=json.dumps(data).encode('utf8'), headers=real_headers,

469

query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})

470

471

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

472

data = self._search_regex(

473

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

474

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

475

if data:

476

return self._parse_json(data, item_id, fatal=fatal)

477

478

@staticmethod

479

def _extract_session_index(*data):

480

"""

481

Index of current account in account list.

482

See: https://github.com/yt-dlp/yt-dlp/pull/519

483

"""

484

for ytcfg in data:

485

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

486

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

491

if ytcfg:

492

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

497

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

498

'identity token', default=None, fatal=False)

499

500

@staticmethod

501

def _extract_account_syncid(*args):

502

"""

503

Extract syncId required to download private playlists of secondary channels

504

@params response and/or ytcfg

505

"""

506

for data in args:

507

# ytcfg includes channel_syncid if on secondary channel

508

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

513

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

514

if len(sync_ids) >= 2 and sync_ids[1]:

515

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

516

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

521

"""

522

Extracts visitorData from an API response or ytcfg

523

Appears to be used to track session state

524

"""

525

return get_first(

526

args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],

expected_type=str)

@property

def is_authenticated(self):

531

return bool(self._generate_sapisidhash_header())

532

533

def extract_ytcfg(self, video_id, webpage):

534

if not webpage:

535

return {}

536

return self._parse_json(

537

self._search_regex(

538

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

539

default='{}'), video_id, fatal=False) or {}

540

541

def generate_api_headers(

542

self, *, ytcfg=None, account_syncid=None, session_index=None,

543

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

544

545

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

546

headers = {

547

'X-YouTube-Client-Name': compat_str(

548

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

549

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

550

'Origin': origin,

551

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

552

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

553

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

554

}

555

if session_index is None:

556

session_index = self._extract_session_index(ytcfg)

557

if account_syncid or session_index is not None:

558

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

559

560

auth = self._generate_sapisidhash_header(origin)

561

if auth is not None:

562

headers['Authorization'] = auth

563

headers['X-Origin'] = origin

564

return {h: v for h, v in headers.items() if v is not None}

565

566

@staticmethod

567

def _build_api_continuation_query(continuation, ctp=None):

568

query = {

569

'continuation': continuation

570

}

571

# TODO: Inconsistency with clickTrackingParams.

572

# Currently we have a fixed ctp contained within context (from ytcfg)

573

# and a ctp in root query for continuation.

574

if ctp:

575

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

580

next_continuation = try_get(

581

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

582

lambda x: x['continuation']['reloadContinuationData']), dict)

583

if not next_continuation:

584

return

585

continuation = next_continuation.get('continuation')

586

if not continuation:

587

return

588

ctp = next_continuation.get('clickTrackingParams')

589

return cls._build_api_continuation_query(continuation, ctp)

590

591

@classmethod

592

def _extract_continuation_ep_data(cls, continuation_ep: dict):

593

if isinstance(continuation_ep, dict):

594

continuation = try_get(

595

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

596

if not continuation:

597

return

598

ctp = continuation_ep.get('clickTrackingParams')

599

return cls._build_api_continuation_query(continuation, ctp)

600

601

@classmethod

602

def _extract_continuation(cls, renderer):

603

next_continuation = cls._extract_next_continuation_data(renderer)

604

if next_continuation:

605

return next_continuation

606

607

contents = []

608

for key in ('contents', 'items'):

609

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

610

611

for content in contents:

612

if not isinstance(content, dict):

613

continue

614

continuation_ep = try_get(

615

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

616

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

617

dict)

618

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

624

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

625

if not isinstance(alert_dict, dict):

626

continue

627

for alert in alert_dict.values():

628

alert_type = alert.get('type')

629

if not alert_type:

630

continue

631

message = cls._get_text(alert, 'text')

632

if message:

633

yield alert_type, message

634

635

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

636

errors = []

637

warnings = []

638

for alert_type, alert_message in alerts:

639

if alert_type.lower() == 'error' and fatal:

640

errors.append([alert_type, alert_message])

641

else:

642

warnings.append([alert_type, alert_message])

643

644

for alert_type, alert_message in (warnings + errors[:-1]):

645

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)

646

if errors:

647

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

648

649

def _extract_and_report_alerts(self, data, *args, **kwargs):

650

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

651

652

def _extract_badges(self, renderer: dict):

653

badges = set()

654

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

655

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

656

if label:

657

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

662

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

667

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

668

obj = [obj]

669

for item in obj:

670

text = try_get(item, lambda x: x['simpleText'], compat_str)

671

if text:

672

return text

673

runs = try_get(item, lambda x: x['runs'], list) or []

674

if not runs and isinstance(item, list):

675

runs = item

676

677

runs = runs[:min(len(runs), max_runs or len(runs))]

678

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

683

count_text = self._get_text(data, *path_list) or ''

684

count = parse_count(count_text)

685

if count is None:

686

count = str_to_int(

687

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

692

"""

693

Extract thumbnails from thumbnails dict

694

@param path_list: path list to level that contains 'thumbnails' key

695

"""

696

thumbnails = []

697

for path in path_list or [()]:

698

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

699

thumbnail_url = url_or_none(thumbnail.get('url'))

700

if not thumbnail_url:

701

continue

702

# Sometimes youtube gives a wrong thumbnail URL. See:

703

# https://github.com/yt-dlp/yt-dlp/issues/233

704

# https://github.com/ytdl-org/youtube-dl/issues/28023

705

if 'maxresdefault' in thumbnail_url:

706

thumbnail_url = thumbnail_url.split('?')[0]

707

thumbnails.append({

708

'url': thumbnail_url,

709

'height': int_or_none(thumbnail.get('height')),

710

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

716

"""

717

Extracts a relative time from string and converts to dt object

718

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

723

if start:

724

return datetime_from_str(start)

725

try:

726

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

731

text = self._get_text(renderer, *path_list) or ''

732

dt = self.extract_relative_time(text)

733

timestamp = None

734

if isinstance(dt, datetime.datetime):

735

timestamp = calendar.timegm(dt.timetuple())

736

737

if timestamp is None:

738

timestamp = (

739

unified_timestamp(text) or unified_timestamp(

740

self._search_regex(

741

(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),

742

text.lower(), 'time text', default=None)))

743

744

if text and timestamp is None:

745

self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)

746

return timestamp, text

747

748

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

749

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

750

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

755

if check_get_keys is None:

756

check_get_keys = []

757

while count < retries:

758

count += 1

759

if last_error:

760

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

761

try:

762

response = self._call_api(

763

ep=ep, fatal=True, headers=headers,

764

video_id=item_id, query=query,

765

context=self._extract_context(ytcfg, default_client),

766

api_key=self._extract_api_key(ytcfg, default_client),

767

api_hostname=api_hostname, default_client=default_client,

768

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

769

except ExtractorError as e:

770

if isinstance(e.cause, network_exceptions):

771

if isinstance(e.cause, compat_HTTPError):

772

first_bytes = e.cause.read(512)

773

if not is_html(first_bytes):

774

yt_error = try_get(

775

self._parse_json(

776

self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),

777

lambda x: x['error']['message'], compat_str)

778

if yt_error:

779

self._report_alerts([('ERROR', yt_error)], fatal=False)

780

# Downloading page may result in intermittent 5xx HTTP error

781

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

782

# We also want to catch all other network exceptions since errors in later pages can be troublesome

783

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

784

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

785

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

797

except ExtractorError as e:

798

# YouTube servers may return errors we want to retry on in a 200 OK response

799

# See: https://github.com/yt-dlp/yt-dlp/issues/839

800

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

806

return

807

if not check_get_keys or dict_get(response, check_get_keys):

808

break

809

# Youtube sometimes sends incomplete data

810

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

811

last_error = 'Incomplete data received'

812

if count >= retries:

813

if fatal:

814

raise ExtractorError(last_error)

815

else:

816

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

822

return re.match(r'https?://music\.youtube\.com/', url) is not None

823

824

def _extract_video(self, renderer):

825

video_id = renderer.get('videoId')

826

title = self._get_text(renderer, 'title')

827

description = self._get_text(renderer, 'descriptionSnippet')

828

duration = parse_duration(self._get_text(

829

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

830

if duration is None:

831

duration = parse_duration(self._search_regex(

832

r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',

833

traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),

834

video_id, default=None, group='duration'))

835

836

view_count = self._get_count(renderer, 'viewCountText')

837

838

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

839

channel_id = traverse_obj(

840

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),

841

expected_type=str, get_all=False)

842

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

843

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

844

overlay_style = traverse_obj(

845

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),

846

get_all=False, expected_type=str)

847

badges = self._extract_badges(renderer)

848

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

849

navigation_url = urljoin('https://www.youtube.com/', traverse_obj(

850

renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),

851

expected_type=str)) or ''

852

url = f'https://www.youtube.com/watch?v={video_id}'

853

if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:

854

url = f'https://www.youtube.com/shorts/{video_id}'

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': url,

'title': title,

'description': description,

863

'duration': duration,

864

'view_count': view_count,

865

'uploader': uploader,

866

'channel_id': channel_id,

867

'thumbnails': thumbnails,

868

'upload_date': (strftime_or_none(timestamp, '%Y%m%d')

869

if self._configuration_arg('approximate_date', ie_key='youtubetab')

870

else None),

871

'live_status': ('is_upcoming' if scheduled_timestamp is not None

872

else 'was_live' if 'streamed' in time_text.lower()

873

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

874

else None),

875

'release_timestamp': scheduled_timestamp,

876

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

881

IE_DESC = 'YouTube'

882

_VALID_URL = r"""(?x)^

883

(

884

(?:https?://|//) # http(s):// or protocol-independent URL

885

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

886

(?:www\.)?deturl\.com/www\.youtube\.com|

887

(?:www\.)?pwnyoutube\.com|

888

(?:www\.)?hooktube\.com|

889

(?:www\.)?yourepeat\.com|

890

tube\.majestyc\.net|

891

%(invidious)s|

892

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

893

(?:.*?\#/)? # handle anchor (#/) redirect urls

894

(?: # the various things that can precede the ID:

895

(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/

896

|(?: # or the v= param in all its forms

897

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

898

(?:\?|\#!?) # the params delimiter ? or # or #!

899

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

905

vid\.plus| # or vid.plus/xxxx

906

zwearz\.com/watch| # or zwearz.com/watch/xxxx

907

%(invidious)s

908

)/

909

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

910

)

911

)? # all until now is optional -> you can pass the naked ID

912

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

913

(?(1).+)? # if we found the ID, everything can follow

914

(?:\#|$)""" % {

915

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

916

}

917

_PLAYER_INFO_RE = (

918

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

919

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

920

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

921

)

922

_formats = {

923

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

924

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

925

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

926

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

927

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

928

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

929

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

930

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

931

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

932

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

933

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

934

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

935

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

936

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

937

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

938

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

939

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

940

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

945

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

946

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

947

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

948

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

949

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

950

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

951

952

# Apple HTTP Live Streaming

953

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

954

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

955

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

956

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

957

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

958

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

959

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

960

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

961

962

# DASH mp4 video

963

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

964

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

965

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

966

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

967

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

968

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

969

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

970

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

971

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

972

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

973

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

974

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

975

976

# Dash mp4 audio

977

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

978

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

979

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

980

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

981

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

982

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

983

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

984

985

# Dash webm

986

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

987

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

988

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

989

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

990

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

991

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

992

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

993

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

994

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

995

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

996

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

997

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

998

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

999

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1000

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1001

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

1002

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1003

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1004

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1005

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1006

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1007

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

1008

1009

# Dash webm audio

1010

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

1011

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

1012

1013

# Dash webm audio with opus inside

1014

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

1015

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

1016

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

1017

1018

# RTMP (unnamed)

1019

'_rtmp': {'protocol': 'rtmp'},

1020

1021

# av01 video only formats sometimes served with "unknown" codecs

1022

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1023

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

1024

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

1025

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

1026

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

1027

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

1028

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1029

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1030

}

1031

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1043

'uploader': 'Philipp Hagemeister',

1044

'uploader_id': 'phihag',

1045

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1046

'channel': 'Philipp Hagemeister',

1047

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1048

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1049

'upload_date': '20121002',

1050

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1051

'categories': ['Science & Technology'],

1052

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1057

'playable_in_embed': True,

1058

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1059

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

'channel_follower_count': int

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1068

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1073

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1074

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1075

'uploader': 'SET India',

1076

'uploader_id': 'setindia',

1077

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1078

'age_limit': 18,

1079

},

1080

'skip': 'Private video',

1081

},

1082

{

1083

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1084

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1089

'uploader': 'Philipp Hagemeister',

1090

'uploader_id': 'phihag',

1091

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1092

'channel': 'Philipp Hagemeister',

1093

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1094

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1095

'upload_date': '20121002',

1096

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1097

'categories': ['Science & Technology'],

1098

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1103

'playable_in_embed': True,

1104

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1105

'live_status': 'not_live',

1106

'age_limit': 0,

1107

'channel_follower_count': int

1108

},

1109

'params': {

1110

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1115

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1120

'uploader_id': '8KVIDEO',

1121

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1122

'description': '',

1123

'uploader': '8KVIDEO',

1124

'title': 'UHDTV TEST 8K VIDEO.mp4'

1125

},

1126

'params': {

1127

'youtube_include_dash_manifest': True,

1128

'format': '141',

1129

},

1130

'skip': 'format 141 not served anymore',

1131

},

1132

# DASH manifest with encrypted signature

1133

{

1134

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1139

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1140

'duration': 244,

1141

'uploader': 'AfrojackVEVO',

1142

'uploader_id': 'AfrojackVEVO',

1143

'upload_date': '20131011',

1144

'abr': 129.495,

1145

'like_count': int,

1146

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1147

'playable_in_embed': True,

1148

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1149

'view_count': int,

1150

'track': 'The Spark',

1151

'live_status': 'not_live',

1152

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1153

'channel': 'Afrojack',

1154

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1155

'tags': 'count:19',

1156

'availability': 'public',

1157

'categories': ['Music'],

1158

'age_limit': 0,

1159

'alt_title': 'The Spark',

1160

'channel_follower_count': int

1161

},

1162

'params': {

1163

'youtube_include_dash_manifest': True,

1164

'format': '141/bestaudio[ext=m4a]',

1165

},

1166

},

1167

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1168

{

1169

'note': 'Embed allowed age-gate video',

1170

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1175

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1176

'duration': 142,

1177

'uploader': 'The Witcher',

1178

'uploader_id': 'WitcherGame',

1179

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1180

'upload_date': '20140605',

1181

'age_limit': 18,

1182

'categories': ['Gaming'],

1183

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1184

'availability': 'needs_auth',

1185

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1186

'like_count': int,

1187

'channel': 'The Witcher',

1188

'live_status': 'not_live',

1189

'tags': 'count:17',

1190

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1191

'playable_in_embed': True,

1192

'view_count': int,

1193

'channel_follower_count': int

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1198

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1203

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1204

'upload_date': '20200408',

1205

'uploader_id': 'FlyingKitty900',

1206

'uploader': 'FlyingKitty',

1207

'age_limit': 18,

1208

'availability': 'needs_auth',

1209

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1210

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1211

'channel': 'FlyingKitty',

1212

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1213

'view_count': int,

1214

'categories': ['Entertainment'],

1215

'live_status': 'not_live',

1216

'tags': ['Flyingkitty', 'godzilla 2'],

1217

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1218

'like_count': int,

1219

'duration': 177,

1220

'playable_in_embed': True,

1221

'channel_follower_count': int

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1226

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1227

'info_dict': {

1228

'id': 'Tq92D6wQ1mg',

1229

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1230

'ext': 'mp4',

1231

'upload_date': '20191228',

1232

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1233

'uploader': 'Projekt Melody',

1234

'description': 'md5:17eccca93a786d51bc67646756894066',

1235

'age_limit': 18,

1236

'like_count': int,

1237

'availability': 'needs_auth',

1238

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1239

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1240

'view_count': int,

1241

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1242

'channel': 'Projekt Melody',

1243

'live_status': 'not_live',

1244

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1245

'playable_in_embed': True,

1246

'categories': ['Entertainment'],

1247

'duration': 106,

1248

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1249

'channel_follower_count': int

},

},

{

'note': 'Non-Agegated non-embeddable video',

1254

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1259

'uploader': 'Herr Lurik',

1260

'uploader_id': 'st3in234',

1261

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1262

'upload_date': '20130730',

1263

'track': 'Such mich find mich',

1264

'age_limit': 0,

1265

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1266

'like_count': int,

1267

'playable_in_embed': False,

1268

'creator': 'OOMPH!',

1269

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1270

'view_count': int,

1271

'alt_title': 'Such mich find mich',

1272

'duration': 210,

1273

'channel': 'Herr Lurik',

1274

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1275

'categories': ['Music'],

1276

'availability': 'public',

1277

'uploader_url': 'http://www.youtube.com/user/st3in234',

1278

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1279

'live_status': 'not_live',

1280

'artist': 'OOMPH!',

1281

'channel_follower_count': int

},

},

{

'note': 'Non-bypassable age-gated video',

1286

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1287

'only_matching': True,

1288

},

1289

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1290

# YouTube Red ad is not captured for creator

1291

{

1292

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1298

'uploader_id': 'deadmau5',

1299

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1300

'creator': 'deadmau5',

1301

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1302

'uploader': 'deadmau5',

1303

'title': 'Deadmau5 - Some Chords (HD)',

1304

'alt_title': 'Some Chords',

1305

'availability': 'public',

1306

'tags': 'count:14',

1307

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1308

'view_count': int,

1309

'live_status': 'not_live',

1310

'channel': 'deadmau5',

1311

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1312

'like_count': int,

1313

'track': 'Some Chords',

1314

'artist': 'deadmau5',

1315

'playable_in_embed': True,

1316

'age_limit': 0,

1317

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1318

'categories': ['Music'],

1319

'album': 'Some Chords',

1320

'channel_follower_count': int

1321

},

1322

'expected_warnings': [

1323

'DASH manifest missing',

1324

]

1325

},

1326

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1327

{

1328

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1334

'uploader_id': 'olympic',

1335

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1336

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1337

'uploader': 'Olympics',

1338

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1339

'like_count': int,

1340

'release_timestamp': 1343767800,

1341

'playable_in_embed': True,

1342

'categories': ['Sports'],

1343

'release_date': '20120731',

1344

'channel': 'Olympics',

1345

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1346

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1347

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1348

'age_limit': 0,

1349

'availability': 'public',

1350

'live_status': 'was_live',

1351

'view_count': int,

1352

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1353

'channel_follower_count': int

1354

},

1355

'params': {

1356

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1366

'duration': 85,

1367

'upload_date': '20110310',

1368

'uploader_id': 'AllenMeow',

1369

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1370

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1371

'uploader': '孫ᄋᄅ',

1372

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1373

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1378

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1379

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1380

'view_count': int,

1381

'categories': ['People & Blogs'],

1382

'like_count': int,

1383

'live_status': 'not_live',

1384

'availability': 'unlisted',

1385

'channel_follower_count': int

1386

},

1387

},

1388

# url_encoded_fmt_stream_map is empty string

1389

{

1390

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1395

'description': '',

1396

'upload_date': '20150404',

1397

'uploader_id': 'spbelect',

1398

'uploader': 'Наблюдатели Петербурга',

1399

},

1400

'params': {

1401

'skip_download': 'requires avconv',

1402

},

1403

'skip': 'This live event has ended.',

1404

},

1405

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1406

{

1407

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1412

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1413

'duration': 220,

1414

'upload_date': '20150625',

1415

'uploader_id': 'dorappi2000',

1416

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1417

'uploader': 'dorappi2000',

1418

'formats': 'mincount:31',

1419

},

1420

'skip': 'not actual anymore',

1421

},

1422

# DASH manifest with segment_list

1423

{

1424

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1425

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1430

'uploader': 'Airtek',

1431

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1432

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1433

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1434

},

1435

'params': {

1436

'youtube_include_dash_manifest': True,

1437

'format': '135', # bestvideo

1438

},

1439

'skip': 'This live event has ended.',

1440

},

1441

{

1442

# Multifeed videos (multiple cameras), URL is for Main Camera

1443

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1444

'info_dict': {

1445

'id': 'jvGDaLqkpTg',

1446

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1447

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1454

'description': 'md5:e03b909557865076822aa169218d6a5d',

1455

'duration': 10643,

1456

'upload_date': '20161111',

1457

'uploader': 'Team PGP',

1458

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1459

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1466

'description': 'md5:e03b909557865076822aa169218d6a5d',

1467

'duration': 10991,

1468

'upload_date': '20161111',

1469

'uploader': 'Team PGP',

1470

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1471

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1478

'description': 'md5:e03b909557865076822aa169218d6a5d',

1479

'duration': 10995,

1480

'upload_date': '20161111',

1481

'uploader': 'Team PGP',

1482

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1483

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1490

'description': 'md5:e03b909557865076822aa169218d6a5d',

1491

'duration': 10990,

1492

'upload_date': '20161111',

1493

'uploader': 'Team PGP',

1494

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1495

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1500

},

1501

'skip': 'Not multifeed anymore',

1502

},

1503

{

1504

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1505

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1506

'info_dict': {

1507

'id': 'gVfLd0zydlo',

1508

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1509

},

1510

'playlist_count': 2,

1511

'skip': 'Not multifeed anymore',

1512

},

1513

{

1514

'url': 'https://vid.plus/FlRa-iH7PGw',

1515

'only_matching': True,

1516

},

1517

{

1518

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1519

'only_matching': True,

1520

},

1521

{

1522

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1523

# Also tests cut-off URL expansion in video description (see

1524

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1525

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1526

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1531

'alt_title': 'Dark Walk',

1532

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1533

'duration': 133,

1534

'upload_date': '20151119',

1535

'uploader_id': 'IronSoulElf',

1536

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1537

'uploader': 'IronSoulElf',

1538

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1539

'track': 'Dark Walk',

1540

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1541

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1542

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1543

'categories': ['Film & Animation'],

1544

'view_count': int,

1545

'live_status': 'not_live',

1546

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1547

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1548

'tags': 'count:13',

1549

'availability': 'public',

1550

'channel': 'IronSoulElf',

1551

'playable_in_embed': True,

1552

'like_count': int,

1553

'age_limit': 0,

1554

'channel_follower_count': int

1555

},

1556

'params': {

1557

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1562

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1563

'only_matching': True,

1564

},

1565

{

1566

# Video with yt:stretch=17:0

1567

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1572

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1573

'upload_date': '20151107',

1574

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1575

'uploader': 'CH GAMER DROID',

1576

},

1577

'params': {

1578

'skip_download': True,

1579

},

1580

'skip': 'This video does not exist.',

1581

},

1582

{

1583

# Video with incomplete 'yt:stretch=16:'

1584

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1585

'only_matching': True,

1586

},

1587

{

1588

# Video licensed under Creative Commons

1589

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1594

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1595

'duration': 721,

1596

'upload_date': '20150128',

1597

'uploader_id': 'BerkmanCenter',

1598

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1599

'uploader': 'The Berkman Klein Center for Internet & Society',

1600

'license': 'Creative Commons Attribution license (reuse allowed)',

1601

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1602

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1603

'like_count': int,

1604

'age_limit': 0,

1605

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1606

'channel': 'The Berkman Klein Center for Internet & Society',

1607

'availability': 'public',

1608

'view_count': int,

1609

'categories': ['Education'],

1610

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1611

'live_status': 'not_live',

1612

'playable_in_embed': True,

1613

'channel_follower_count': int

1614

},

1615

'params': {

1616

'skip_download': True,

},

},

{

# Channel-like uploader_url

1621

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1626

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1627

'duration': 4060,

1628

'upload_date': '20151120',

1629

'uploader': 'Bernie Sanders',

1630

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1631

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1632

'license': 'Creative Commons Attribution license (reuse allowed)',

1633

'playable_in_embed': True,

1634

'tags': 'count:12',

1635

'like_count': int,

1636

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1637

'age_limit': 0,

1638

'availability': 'public',

1639

'categories': ['News & Politics'],

1640

'channel': 'Bernie Sanders',

1641

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1642

'view_count': int,

1643

'live_status': 'not_live',

1644

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1645

'channel_follower_count': int

1646

},

1647

'params': {

1648

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1653

'only_matching': True,

1654

},

1655

{

1656

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1657

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1658

'only_matching': True,

1659

},

1660

{

1661

# Rental video preview

1662

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1667

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1668

'upload_date': '20150811',

1669

'uploader': 'FlixMatrix',

1670

'uploader_id': 'FlixMatrixKaravan',

1671

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1672

'license': 'Standard YouTube License',

1673

},

1674

'params': {

1675

'skip_download': True,

1676

},

1677

'skip': 'This video is not available.',

1678

},

1679

{

1680

# YouTube Red video with episode data

1681

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1686

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1687

'duration': 2085,

1688

'upload_date': '20170118',

1689

'uploader': 'Vsauce',

1690

'uploader_id': 'Vsauce',

1691

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1692

'series': 'Mind Field',

1693

'season_number': 1,

1694

'episode_number': 1,

1695

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1696

'tags': 'count:12',

1697

'view_count': int,

1698

'availability': 'public',

1699

'age_limit': 0,

1700

'channel': 'Vsauce',

1701

'episode': 'Episode 1',

1702

'categories': ['Entertainment'],

1703

'season': 'Season 1',

1704

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1705

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1706

'like_count': int,

1707

'playable_in_embed': True,

1708

'live_status': 'not_live',

1709

'channel_follower_count': int

1710

},

1711

'params': {

1712

'skip_download': True,

1713

},

1714

'expected_warnings': [

1715

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1720

# as inappropriate or offensive to some audiences.

1721

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1726

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1727

'duration': 965,

1728

'upload_date': '20140124',

1729

'uploader': 'New Century Foundation',

1730

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1731

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1732

},

1733

'params': {

1734

'skip_download': True,

1735

},

1736

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1741

'only_matching': True,

1742

},

1743

{

1744

# geo restricted to JP

1745

'url': 'sJL6WA-aGkQ',

1746

'only_matching': True,

1747

},

1748

{

1749

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1750

'only_matching': True,

1751

},

1752

{

1753

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1754

'only_matching': True,

1755

},

1756

{

1757

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1758

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1759

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1764

'only_matching': True,

1765

},

1766

{

1767

# Video with unsupported adaptive stream type formats

1768

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1773

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1774

'duration': 433,

1775

'upload_date': '20130923',

1776

'uploader': 'Amelia Putri Harwita',

1777

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1778

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1779

'formats': 'maxcount:10',

1780

},

1781

'params': {

1782

'skip_download': True,

1783

'youtube_include_dash_manifest': False,

1784

},

1785

'skip': 'not actual anymore',

1786

},

1787

{

1788

# Youtube Music Auto-generated description

1789

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1794

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1795

'upload_date': '20190312',

1796

'uploader': 'Stephen - Topic',

1797

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1798

'artist': 'Stephen',

1799

'track': 'Voyeur Girl',

1800

'album': 'it\'s too much love to know my dear',

1801

'release_date': '20190313',

1802

'release_year': 2019,

1803

'alt_title': 'Voyeur Girl',

1804

'view_count': int,

1805

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1806

'playable_in_embed': True,

1807

'like_count': int,

1808

'categories': ['Music'],

1809

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1810

'channel': 'Stephen',

1811

'availability': 'public',

1812

'creator': 'Stephen',

1813

'duration': 169,

1814

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1815

'age_limit': 0,

1816

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1817

'tags': 'count:11',

1818

'live_status': 'not_live',

1819

'channel_follower_count': int

1820

},

1821

'params': {

1822

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1827

'only_matching': True,

1828

},

1829

{

1830

# invalid -> valid video id redirection

1831

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1836

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1837

'upload_date': '20090125',

1838

'uploader': 'Prochorowka',

1839

'uploader_id': 'Prochorowka',

1840

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1841

'artist': 'Panjabi MC',

1842

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1843

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1844

},

1845

'params': {

1846

'skip_download': True,

1847

},

1848

'skip': 'Video unavailable',

1849

},

1850

{

1851

# empty description results in an empty string

1852

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1859

'uploader_id': 'ElevageOrVert',

1860

'uploader': 'ElevageOrVert',

1861

'view_count': int,

1862

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1863

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1864

'like_count': int,

1865

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1866

'tags': [],

1867

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1868

'availability': 'public',

1869

'age_limit': 0,

1870

'categories': ['Pets & Animals'],

1871

'duration': 7,

1872

'playable_in_embed': True,

1873

'live_status': 'not_live',

1874

'channel': 'ElevageOrVert',

1875

'channel_follower_count': int

1876

},

1877

'params': {

1878

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1883

# see [2] for an example with '};' inside ytInitialPlayerResponse

1884

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1885

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1886

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1891

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1892

'upload_date': '20130831',

1893

'uploader_id': 'kudvenkat',

1894

'uploader': 'kudvenkat',

1895

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1896

'like_count': int,

1897

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1898

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1899

'live_status': 'not_live',

1900

'categories': ['Education'],

1901

'availability': 'public',

1902

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1903

'tags': 'count:12',

1904

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1909

'channel_follower_count': int

1910

},

1911

'params': {

1912

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1917

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1918

'only_matching': True,

1919

},

1920

{

1921

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1922

'only_matching': True,

1923

},

1924

{

1925

# https://github.com/ytdl-org/youtube-dl/pull/28094

1926

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1932

'upload_date': '20141120',

1933

'uploader': 'The Cinematic Orchestra - Topic',

1934

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1935

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1936

'artist': 'The Cinematic Orchestra',

1937

'track': 'Burn Out',

1938

'album': 'Every Day',

1939

'like_count': int,

1940

'live_status': 'not_live',

1941

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1946

'creator': 'The Cinematic Orchestra',

1947

'channel': 'The Cinematic Orchestra',

1948

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1949

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1950

'availability': 'public',

1951

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1952

'categories': ['Music'],

1953

'playable_in_embed': True,

1954

'channel_follower_count': int

1955

},

1956

'params': {

1957

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1962

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1963

'only_matching': True,

1964

},

1965

{

1966

# controversial video, requires bpctr/contentCheckOk

1967

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1972

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1973

'uploader': 'CBS Mornings',

1974

'uploader_id': 'CBSThisMorning',

1975

'upload_date': '20140716',

1976

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1977

'duration': 170,

1978

'categories': ['News & Politics'],

1979

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1980

'view_count': int,

1981

'channel': 'CBS Mornings',

1982

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1983

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1984

'age_limit': 18,

1985

'availability': 'needs_auth',

1986

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1987

'like_count': int,

1988

'live_status': 'not_live',

1989

'playable_in_embed': True,

1990

'channel_follower_count': int

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1995

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

2000

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

2001

'upload_date': '20201120',

2002

'uploader': 'Walk around Japan',

2003

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2004

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2005

'duration': 1456,

2006

'categories': ['Travel & Events'],

2007

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

2008

'view_count': int,

2009

'channel': 'Walk around Japan',

2010

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

2011

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

2012

'age_limit': 0,

2013

'availability': 'public',

2014

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

2015

'live_status': 'not_live',

2016

'playable_in_embed': True,

2017

'channel_follower_count': int

2018

},

2019

'params': {

2020

'skip_download': True,

2021

},

2022

}, {

2023

# Has multiple audio streams

2024

'url': 'WaOKSUlf4TM',

2025

'only_matching': True

2026

}, {

2027

# Requires Premium: has format 141 when requested using YTM url

2028

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

2029

'only_matching': True

2030

}, {

2031

# multiple subtitles with same lang_code

2032

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

2033

'only_matching': True,

2034

}, {

2035

# Force use android client fallback

2036

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

2037

'info_dict': {

2038

'id': 'YOelRv7fMxY',

2039

'title': 'DIGGING A SECRET TUNNEL Part 1',

2040

'ext': '3gp',

2041

'upload_date': '20210624',

2042

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

2043

'uploader': 'colinfurze',

2044

'uploader_id': 'colinfurze',

2045

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

2046

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

2047

'duration': 596,

2048

'categories': ['Entertainment'],

2049

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2050

'view_count': int,

2051

'channel': 'colinfurze',

2052

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2053

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2054

'age_limit': 0,

2055

'availability': 'public',

2056

'like_count': int,

2057

'live_status': 'not_live',

2058

'playable_in_embed': True,

2059

'channel_follower_count': int

2060

},

2061

'params': {

2062

'format': '17', # 3gp format available on android

2063

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2068

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2069

'only_matching': True,

2070

'params': {

2071

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2076

'only_matching': True,

2077

}, {

2078

'note': 'Storyboards',

2079

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2085

'uploader_id': 'scishow',

2086

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2087

'upload_date': '20140324',

2088

'uploader': 'SciShow',

2089

'like_count': int,

2090

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2091

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2092

'view_count': int,

2093

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2094

'playable_in_embed': True,

2095

'tags': 'count:12',

2096

'uploader_url': 'http://www.youtube.com/user/scishow',

2097

'availability': 'public',

2098

'channel': 'SciShow',

2099

'live_status': 'not_live',

2100

'duration': 248,

2101

'categories': ['Education'],

2102

'age_limit': 0,

2103

'channel_follower_count': int

2104

}, 'params': {'format': 'mhtml', 'skip_download': True}

2105

}, {

2106

# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)

2107

'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',

'info_dict': {

'id': '2NUZ8W2llS4',

'ext': 'mp4',

'title': 'The NP that test your phone performance 🙂',

2112

'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',

2113

'uploader': 'Leon Nguyen',

2114

'uploader_id': 'VNSXIII',

2115

'uploader_url': 'http://www.youtube.com/user/VNSXIII',

2116

'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',

2117

'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',

'duration': 21,

'view_count': int,

'age_limit': 0,

'categories': ['Gaming'],

2122

'tags': 'count:23',

2123

'playable_in_embed': True,

2124

'live_status': 'not_live',

2125

'upload_date': '20220103',

2126

'like_count': int,

2127

'availability': 'public',

2128

'channel': 'Leon Nguyen',

2129

'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',

2130

'channel_follower_count': int

2131

}

2132

}, {

2133

# date text is premiered video, ensure upload date in UTC (published 1641172509)

2134

'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',

'info_dict': {

'id': 'mzZzzBU6lrM',

'ext': 'mp4',

'title': 'I Met GeorgeNotFound In Real Life...',

2139

'description': 'md5:cca98a355c7184e750f711f3a1b22c84',

2140

'uploader': 'Quackity',

2141

'uploader_id': 'QuackityHQ',

2142

'uploader_url': 'http://www.youtube.com/user/QuackityHQ',

2143

'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',

2144

'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',

'duration': 955,

'view_count': int,

'age_limit': 0,

'categories': ['Entertainment'],

2149

'tags': 'count:26',

2150

'playable_in_embed': True,

2151

'live_status': 'not_live',

2152

'release_timestamp': 1641172509,

2153

'release_date': '20220103',

2154

'upload_date': '20220103',

2155

'like_count': int,

2156

'availability': 'public',

2157

'channel': 'Quackity',

2158

'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',

2159

'channel_follower_count': int

2160

}

2161

},

2162

{ # continuous livestream. Microformat upload date should be preferred.

2163

# Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27

2164

'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',

2165

'info_dict': {

2166

'id': 'kgx4WGK0oNU',

2167

'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

2168

'ext': 'mp4',

2169

'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2170

'availability': 'public',

2171

'age_limit': 0,

2172

'release_timestamp': 1637975704,

2173

'upload_date': '20210619',

2174

'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2175

'live_status': 'is_live',

2176

'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',

2177

'uploader': '阿鲍Abao',

2178

'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',

2179

'channel': 'Abao in Tokyo',

2180

'channel_follower_count': int,

2181

'release_date': '20211127',

2182

'tags': 'count:39',

2183

'categories': ['People & Blogs'],

2184

'like_count': int,

2185

'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',

2186

'view_count': int,

2187

'playable_in_embed': True,

2188

'description': 'md5:2ef1d002cad520f65825346e2084e49d',

2189

},

2190

'params': {'skip_download': True}

},

]

@classmethod

def suitable(cls, url):

2196

from ..utils import parse_qs

2197

2198

qs = parse_qs(url)

2199

if qs.get('list', [None])[0]:

2200

return False

2201

return super(YoutubeIE, cls).suitable(url)

2202

2203

def __init__(self, *args, **kwargs):

2204

super(YoutubeIE, self).__init__(*args, **kwargs)

2205

self._code_cache = {}

2206

self._player_cache = {}

2207

2208

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2209

lock = threading.Lock()

2210

2211

is_live = True

2212

start_time = time.time()

2213

formats = [f for f in formats if f.get('is_from_start')]

2214

2215

def refetch_manifest(format_id, delay):

2216

nonlocal formats, start_time, is_live

2217

if time.time() <= start_time + delay:

2218

return

2219

2220

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2221

video_details = traverse_obj(

2222

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2223

microformats = traverse_obj(

2224

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2225

expected_type=dict, default=[])

2226

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2227

start_time = time.time()

2228

2229

def mpd_feed(format_id, delay):

2230

"""

2231

@returns (manifest_url, manifest_stream_number, is_live) or None

2232

"""

2233

with lock:

2234

refetch_manifest(format_id, delay)

2235

2236

f = next((f for f in formats if f['format_id'] == format_id), None)

2237

if not f:

2238

if not is_live:

2239

self.to_screen(f'{video_id}: Video is no longer live')

2240

else:

2241

self.report_warning(

2242

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2243

return None

2244

return f['manifest_url'], f['manifest_stream_number'], is_live

for f in formats:

f['is_live'] = True

f['protocol'] = 'http_dash_segments_generator'

2249

f['fragments'] = functools.partial(

2250

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2251

2252

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2253

FETCH_SPAN, MAX_DURATION = 5, 432000

2254

2255

mpd_url, stream_number, is_live = None, None, True

2256

2257

begin_index = 0

2258

download_start_time = ctx.get('start') or time.time()

2259

2260

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2261

if lack_early_segments:

2262

self.report_warning(bug_reports_message(

2263

'Starting download from the last 120 hours of the live stream since '

2264

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2265

lack_early_segments = True

2266

2267

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2268

fragments, fragment_base_url = None, None

2269

2270

def _extract_sequence_from_mpd(refresh_sequence, immediate):

2271

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2272

# Obtain from MPD's maximum seq value

2273

old_mpd_url = mpd_url

2274

last_error = ctx.pop('last_error', None)

2275

expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2276

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2277

or (mpd_url, stream_number, False))

2278

if not refresh_sequence:

2279

if expire_fast and not is_live:

2280

return False, last_seq

2281

elif old_mpd_url == mpd_url:

2282

return True, last_seq

2283

try:

2284

fmts, _ = self._extract_mpd_formats_and_subtitles(

2285

mpd_url, None, note=False, errnote=False, fatal=False)

2286

except ExtractorError:

2287

fmts = None

2288

if not fmts:

2289

no_fragment_score += 2

2290

return False, last_seq

2291

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2292

fragments = fmt_info['fragments']

2293

fragment_base_url = fmt_info['fragment_base_url']

2294

assert fragment_base_url

2295

2296

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2297

return True, _last_seq

2298

2299

while is_live:

2300

fetch_time = time.time()

2301

if no_fragment_score > 30:

2302

return

2303

if last_segment_url:

2304

# Obtain from "X-Head-Seqnum" header value from each segment

2305

try:

2306

urlh = self._request_webpage(

2307

last_segment_url, None, note=False, errnote=False, fatal=False)

2308

except ExtractorError:

2309

urlh = None

2310

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2311

if last_seq is None:

2312

no_fragment_score += 2

2313

last_segment_url = None

2314

continue

2315

else:

2316

should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)

2317

no_fragment_score += 2

2318

if not should_continue:

2319

continue

2320

2321

if known_idx > last_seq:

2322

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2328

# skip from the start when it's negative value

2329

known_idx = last_seq + begin_index

2330

if lack_early_segments:

2331

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2332

try:

2333

for idx in range(known_idx, last_seq):

2334

# do not update sequence here or you'll get skipped some part of it

2335

should_continue, _ = _extract_sequence_from_mpd(False, False)

2336

if not should_continue:

2337

known_idx = idx - 1

2338

raise ExtractorError('breaking out of outer loop')

2339

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2340

yield {

2341

'url': last_segment_url,

2342

}

2343

if known_idx == last_seq:

2344

no_fragment_score += 5

2345

else:

2346

no_fragment_score = 0

2347

known_idx = last_seq

2348

except ExtractorError:

2349

continue

2350

2351

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2352

2353

def _extract_player_url(self, *ytcfgs, webpage=None):

2354

player_url = traverse_obj(

2355

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2356

get_all=False, expected_type=compat_str)

2357

if not player_url:

2358

return

2359

return urljoin('https://www.youtube.com', player_url)

2360

2361

def _download_player_url(self, video_id, fatal=False):

2362

res = self._download_webpage(

2363

'https://www.youtube.com/iframe_api',

2364

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2365

if res:

2366

player_version = self._search_regex(

2367

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2368

if player_version:

2369

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2370

2371

def _signature_cache_id(self, example_sig):

2372

""" Return a string representation of a signature """

2373

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2374

2375

@classmethod

2376

def _extract_player_info(cls, player_url):

2377

for player_re in cls._PLAYER_INFO_RE:

2378

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2383

return id_m.group('id')

2384

2385

def _load_player(self, video_id, player_url, fatal=True):

2386

player_id = self._extract_player_info(player_url)

2387

if player_id not in self._code_cache:

2388

code = self._download_webpage(

2389

player_url, video_id, fatal=fatal,

2390

note='Downloading player ' + player_id,

2391

errnote='Download of %s failed' % player_url)

2392

if code:

2393

self._code_cache[player_id] = code

2394

return self._code_cache.get(player_id)

2395

2396

def _extract_signature_function(self, video_id, player_url, example_sig):

2397

player_id = self._extract_player_info(player_url)

2398

2399

# Read from filesystem cache

2400

func_id = 'js_%s_%s' % (

2401

player_id, self._signature_cache_id(example_sig))

2402

assert os.path.basename(func_id) == func_id

2403

2404

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2405

if cache_spec is not None:

2406

return lambda s: ''.join(s[i] for i in cache_spec)

2407

2408

code = self._load_player(video_id, player_url)

2409

if code:

2410

res = self._parse_sig_js(code)

2411

2412

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2413

cache_res = res(test_string)

2414

cache_spec = [ord(c) for c in cache_res]

2415

2416

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2417

return res

2418

2419

def _print_sig_code(self, func, example_sig):

2420

if not self.get_param('youtube_print_sig_code'):

2421

return

2422

2423

def gen_sig_code(idxs):

2424

def _genslice(start, end, step):

2425

starts = '' if start == 0 else str(start)

2426

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2427

steps = '' if step == 1 else (':%d' % step)

2428

return 's[%s%s%s]' % (starts, ends, steps)

2429

2430

step = None

2431

# Quelch pyflakes warnings - start will be set when step is set

2432

start = '(Never used)'

2433

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2438

step = None

2439

continue

2440

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2450

2451

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2452

cache_res = func(test_string)

2453

cache_spec = [ord(c) for c in cache_res]

2454

expr_code = ' + '.join(gen_sig_code(cache_spec))

2455

signature_id_tuple = '(%s)' % (

2456

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2457

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2458

' return %s\n') % (signature_id_tuple, expr_code)

2459

self.to_screen('Extracted signature function:\n' + code)

2460

2461

def _parse_sig_js(self, jscode):

2462

funcname = self._search_regex(

2463

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2464

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2465

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2466

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2467

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2468

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2469

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2470

# Obsolete patterns

2471

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2472

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2473

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2474

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2475

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2476

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2477

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2478

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2479

jscode, 'Initial JS player signature function name', group='sig')

2480

2481

jsi = JSInterpreter(jscode)

2482

initial_function = jsi.extract_function(funcname)

2483

return lambda s: initial_function([s])

2484

2485

def _decrypt_signature(self, s, video_id, player_url):

2486

"""Turn the encrypted s field into a working signature"""

2487

2488

if player_url is None:

2489

raise ExtractorError('Cannot decrypt signature without player_url')

2490

2491

try:

2492

player_id = (player_url, self._signature_cache_id(s))

2493

if player_id not in self._player_cache:

2494

func = self._extract_signature_function(

2495

video_id, player_url, s

2496

)

2497

self._player_cache[player_id] = func

2498

func = self._player_cache[player_id]

2499

self._print_sig_code(func, s)

2500

return func(s)

2501

except Exception as e:

2502

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2503

2504

def _decrypt_nsig(self, s, video_id, player_url):

2505

"""Turn the encrypted n field into a working signature"""

2506

if player_url is None:

2507

raise ExtractorError('Cannot decrypt nsig without player_url')

2508

player_url = urljoin('https://www.youtube.com', player_url)

2509

2510

sig_id = ('nsig_value', s)

2511

if sig_id in self._player_cache:

2512

return self._player_cache[sig_id]

2513

2514

try:

2515

player_id = ('nsig', player_url)

2516

if player_id not in self._player_cache:

2517

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2518

func = self._player_cache[player_id]

2519

self._player_cache[sig_id] = func(s)

2520

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2521

return self._player_cache[sig_id]

2522

except Exception as e:

2523

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2524

2525

def _extract_n_function_name(self, jscode):

2526

nfunc, idx = self._search_regex(

2527

r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]$',

2528

jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))

2529

if not idx:

2530

return nfunc

2531

return json.loads(js_to_json(self._search_regex(

2532

rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,

2533

f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]

2534

2535

def _extract_n_function(self, video_id, player_url):

2536

player_id = self._extract_player_info(player_url)

2537

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2538

2539

if func_code:

2540

jsi = JSInterpreter(func_code)

2541

else:

2542

jscode = self._load_player(video_id, player_url)

2543

funcname = self._extract_n_function_name(jscode)

2544

jsi = JSInterpreter(jscode)

2545

func_code = jsi.extract_function_code(funcname)

2546

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2547

2548

if self.get_param('youtube_print_sig_code'):

2549

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2550

2551

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2552

2553

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2554

"""

2555

Extract signatureTimestamp (sts)

2556

Required to tell API what sig/player version is in use.

2557

"""

2558

sts = None

2559

if isinstance(ytcfg, dict):

2560

sts = int_or_none(ytcfg.get('STS'))

2561

2562

if not sts:

2563

# Attempt to extract from player

2564

if player_url is None:

2565

error_msg = 'Cannot extract signature timestamp without player_url.'

2566

if fatal:

2567

raise ExtractorError(error_msg)

2568

self.report_warning(error_msg)

2569

return

2570

code = self._load_player(video_id, player_url, fatal=fatal)

2571

if code:

2572

sts = int_or_none(self._search_regex(

2573

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2574

'JS player signature timestamp', group='sts', fatal=fatal))

2575

return sts

2576

2577

def _mark_watched(self, video_id, player_responses):

2578

playback_url = get_first(

2579

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2580

expected_type=url_or_none)

2581

if not playback_url:

2582

self.report_warning('Unable to mark watched')

2583

return

2584

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2585

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2586

2587

# cpn generation algorithm is reverse engineered from base.js.

2588

# In fact it works even with dummy cpn.

2589

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2590

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2597

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2598

2599

self._download_webpage(

2600

playback_url, video_id, 'Marking watched',

2601

'Unable to mark watched', fatal=False)

2602

2603

@staticmethod

2604

def _extract_urls(webpage):

2605

# Embedded YouTube player

2606

entries = [

2607

unescapeHTML(mobj.group('url'))

2608

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2619

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2620

\1''', webpage)]

2621

2622

# lazyYT YouTube embed

2623

entries.extend(list(map(

2624

unescapeHTML,

2625

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2626

2627

# Wordpress "YouTube Video Importer" plugin

2628

matches = re.findall(r'''(?x)<div[^>]+

2629

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2630

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2631

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2637

urls = YoutubeIE._extract_urls(webpage)

2638

return urls[0] if urls else None

2639

2640

@classmethod

2641

def extract_id(cls, url):

2642

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2643

if mobj is None:

2644

raise ExtractorError('Invalid URL: %s' % url)

2645

return mobj.group('id')

2646

2647

def _extract_chapters_from_json(self, data, duration):

2648

chapter_list = traverse_obj(

2649

data, (

2650

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2651

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2652

), expected_type=list)

2653

2654

return self._extract_chapters(

2655

chapter_list,

2656

chapter_time=lambda chapter: float_or_none(

2657

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2658

chapter_title=lambda chapter: traverse_obj(

2659

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2660

duration=duration)

2661

2662

def _extract_chapters_from_engagement_panel(self, data, duration):

2663

content_list = traverse_obj(

2664

data,

2665

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2666

expected_type=list, default=[])

2667

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2668

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2673

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2674

chapter_time, chapter_title, duration)

2675

for contents in content_list

2676

))), [])

2677

2678

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2679

chapters = []

2680

last_chapter = {'start_time': 0}

2681

for idx, chapter in enumerate(chapter_list or []):

2682

title = chapter_title(chapter)

2683

start_time = chapter_time(chapter)

2684

if start_time is None:

2685

continue

2686

last_chapter['end_time'] = start_time

2687

if start_time < last_chapter['start_time']:

2688

if idx == 1:

2689

chapters.pop()

2690

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2691

else:

2692

self.report_warning(f'Invalid start time for chapter "{title}"')

2693

continue

2694

last_chapter = {'start_time': start_time, 'title': title}

2695

chapters.append(last_chapter)

2696

last_chapter['end_time'] = duration

2697

return chapters

2698

2699

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2700

return self._parse_json(self._search_regex(

2701

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

2702

regex), webpage, name, default='{}'), video_id, fatal=False)

2703

2704

def _extract_comment(self, comment_renderer, parent=None):

2705

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2710

2711

# note: timestamp is an estimate calculated from the current time and time_text

2712

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2713

author = self._get_text(comment_renderer, 'authorText')

2714

author_id = try_get(comment_renderer,

2715

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2716

2717

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2718

lambda x: x['likeCount']), compat_str)) or 0

2719

author_thumbnail = try_get(comment_renderer,

2720

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2721

2722

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2723

is_favorited = 'creatorHeart' in (try_get(

2724

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2729

'time_text': time_text,

2730

'like_count': votes,

2731

'is_favorited': is_favorited,

2732

'author': author,

2733

'author_id': author_id,

2734

'author_thumbnail': author_thumbnail,

2735

'author_is_uploader': author_is_uploader,

2736

'parent': parent or 'root'

2737

}

2738

2739

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2740

2741

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2742

2743

def extract_header(contents):

2744

_continuation = None

2745

for content in contents:

2746

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2747

expected_comment_count = self._get_count(

2748

comments_header_renderer, 'countText', 'commentsCount')

2749

2750

if expected_comment_count:

2751

tracker['est_total'] = expected_comment_count

2752

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2753

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2754

2755

sort_menu_item = try_get(

2756

comments_header_renderer,

2757

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2758

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2759

2760

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2761

if not _continuation:

2762

continue

2763

2764

sort_text = str_or_none(sort_menu_item.get('title'))

2765

if not sort_text:

2766

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2767

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2772

if not parent:

2773

tracker['current_page_thread'] = 0

2774

for content in contents:

2775

if not parent and tracker['total_parent_comments'] >= max_parents:

2776

yield

2777

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2778

comment_renderer = get_first(

2779

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2780

expected_type=dict, default={})

2781

2782

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2787

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2788

yield comment

2789

2790

# Attempt to get the replies

2791

comment_replies_renderer = try_get(

2792

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2793

2794

if comment_replies_renderer:

2795

tracker['current_page_thread'] += 1

2796

comment_entries_iter = self._comment_entries(

2797

comment_replies_renderer, ytcfg, video_id,

2798

parent=comment.get('id'), tracker=tracker)

2799

for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):

2800

yield reply_comment

2801

2802

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2808

total_parent_comments=0,

2809

total_reply_comments=0)

2810

2811

# TODO: Deprecated

2812

# YouTube comments have a max depth of 2

2813

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2814

if max_depth:

2815

self._downloader.deprecation_warning(

2816

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2817

if max_depth == 1 and parent:

2818

return

2819

2820

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2821

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2822

2823

continuation = self._extract_continuation(root_continuation_data)

2824

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2825

if message and not parent:

2826

self.report_warning(message, video_id=video_id)

2827

2828

response = None

2829

is_first_continuation = parent is None

2830

2831

for page_num in itertools.count(0):

2832

if not continuation:

2833

break

2834

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2835

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2836

if page_num == 0:

2837

if is_first_continuation:

2838

note_prefix = 'Downloading comment section API JSON'

2839

else:

2840

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2841

tracker['current_page_thread'], comment_prog_str)

2842

else:

2843

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2844

' ' if parent else '', ' replies' if parent else '',

2845

page_num, comment_prog_str)

2846

2847

response = self._extract_response(

2848

item_id=None, query=continuation,

2849

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2850

check_get_keys='onResponseReceivedEndpoints')

2851

2852

continuation_contents = traverse_obj(

2853

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2854

2855

continuation = None

2856

for continuation_section in continuation_contents:

2857

continuation_items = traverse_obj(

2858

continuation_section,

2859

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2860

get_all=False, expected_type=list) or []

2861

if is_first_continuation:

2862

continuation = extract_header(continuation_items)

2863

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

def _get_comments(self, ytcfg, video_id, contents, webpage):

2877

"""Entry for comment extraction"""

2878

def _real_comment_extract(contents):

2879

renderer = next((

2880

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2881

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2882

yield from self._comment_entries(renderer, ytcfg, video_id)

2883

2884

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2885

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2886

2887

@staticmethod

2888

def _get_checkok_params():

2889

return {'contentCheckOk': True, 'racyCheckOk': True}

2890

2891

@classmethod

2892

def _generate_player_context(cls, sts=None):

2893

context = {

2894

'html5Preference': 'HTML5_PREF_WANTS',

2895

}

2896

if sts is not None:

2897

context['signatureTimestamp'] = sts

2898

return {

2899

'playbackContext': {

2900

'contentPlaybackContext': context

2901

},

2902

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2907

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2908

return True

2909

2910

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2911

AGE_GATE_REASONS = (

2912

'confirm your age', 'age-restricted', 'inappropriate', # reason

2913

'age_verification_required', 'age_check_required', # status

2914

)

2915

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2916

2917

@staticmethod

2918

def _is_unplayable(player_response):

2919

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2920

2921

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2922

2923

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2924

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2925

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2926

headers = self.generate_api_headers(

2927

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

2928

2929

yt_query = {'videoId': video_id}

2930

yt_query.update(self._generate_player_context(sts))

2931

return self._extract_response(

2932

item_id=video_id, ep='player', query=yt_query,

2933

ytcfg=player_ytcfg, headers=headers, fatal=True,

2934

default_client=client,

2935

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2936

) or None

2937

2938

def _get_requested_clients(self, url, smuggled_data):

2939

requested_clients = []

2940

default = ['android', 'web']

2941

allowed_clients = sorted(

2942

[client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],

2943

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

2944

for client in self._configuration_arg('player_client'):

2945

if client in allowed_clients:

2946

requested_clients.append(client)

2947

elif client == 'default':

2948

requested_clients.extend(default)

2949

elif client == 'all':

2950

requested_clients.extend(allowed_clients)

2951

else:

2952

self.report_warning(f'Skipping unsupported client {client}')

2953

if not requested_clients:

2954

requested_clients = default

2955

2956

if smuggled_data.get('is_music_url') or self.is_music_url(url):

2957

requested_clients.extend(

2958

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

2959

2960

return orderedSet(requested_clients)

2961

2962

def _extract_player_ytcfg(self, client, video_id):

2963

url = {

2964

'web_music': 'https://music.youtube.com',

2965

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())

2970

return self.extract_ytcfg(video_id, webpage) or {}

2971

2972

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

2973

initial_pr = None

2974

if webpage:

2975

initial_pr = self._extract_yt_initial_variable(

2976

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2977

video_id, 'initial player response')

2978

2979

all_clients = set(clients)

2980

clients = clients[::-1]

2981

prs = []

2982

2983

def append_client(*client_names):

2984

""" Append the first client name that exists but not already used """

2985

for client_name in client_names:

2986

actual_client = _split_innertube_client(client_name)[0]

2987

if actual_client in INNERTUBE_CLIENTS:

2988

if actual_client not in all_clients:

2989

clients.append(client_name)

2990

all_clients.add(actual_client)

2991

return

2992

2993

# Android player_response does not have microFormats which are needed for

2994

# extraction of some data. So we return the initial_pr with formats

2995

# stripped out even if not requested by the user

2996

# See: https://github.com/yt-dlp/yt-dlp/issues/501

2997

if initial_pr:

2998

pr = dict(initial_pr)

2999

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

3004

player_url = None

3005

while clients:

3006

client, base_client, variant = _split_innertube_client(clients.pop())

3007

player_ytcfg = master_ytcfg if client == 'web' else {}

3008

if 'configs' not in self._configuration_arg('player_skip'):

3009

player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg

3010

3011

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

3012

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

3013

if 'js' in self._configuration_arg('player_skip'):

3014

require_js_player = False

3015

player_url = None

3016

3017

if not player_url and not tried_iframe_fallback and require_js_player:

3018

player_url = self._download_player_url(video_id)

3019

tried_iframe_fallback = True

3020

3021

try:

3022

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

3023

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

3024

except ExtractorError as e:

3025

if last_error:

3026

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

3034

if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:

3035

append_client(f'{base_client}_creator')

3036

elif self._is_agegated(pr):

3037

if variant == 'tv_embedded':

3038

append_client(f'{base_client}_embedded')

3039

elif not variant:

3040

append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

3046

return prs, player_url

3047

3048

def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):

3049

itags, stream_ids = {}, []

3050

itag_qualities, res_qualities = {}, {}

3051

q = qualities([

3052

# Normally tiny is the smallest video-only formats. But

3053

# audio-only formats with unknown quality may get tagged as tiny

3054

'tiny',

3055

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

3056

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

3057

])

3058

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

3059

3060

for fmt in streaming_formats:

3061

if fmt.get('targetDurationSec'):

3062

continue

3063

3064

itag = str_or_none(fmt.get('itag'))

3065

audio_track = fmt.get('audioTrack') or {}

3066

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

3067

if stream_id in stream_ids:

3068

continue

3069

3070

quality = fmt.get('quality')

3071

height = int_or_none(fmt.get('height'))

3072

if quality == 'tiny' or not quality:

3073

quality = fmt.get('audioQuality', '').lower() or quality

3074

# The 3gp format (17) in android client has a quality of "small",

3075

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

3081

if height:

3082

res_qualities[height] = quality

3083

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

3084

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

3085

# number of fragment that would subsequently requested with (`&sq=N`)

3086

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

3087

continue

3088

3089

fmt_url = fmt.get('url')

3090

if not fmt_url:

3091

sc = compat_parse_qs(fmt.get('signatureCipher'))

3092

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

3093

encrypted_sig = try_get(sc, lambda x: x['s'][0])

3094

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

3099

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

3100

fmt_url += '&' + sp + '=' + signature

3101

3102

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

3107

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

3108

except ExtractorError as e:

3109

self.report_warning(

3110

f'nsig extraction failed: You may experience throttling for some formats\n'

3111

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

3116

stream_ids.append(stream_id)

3117

3118

tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

3119

language_preference = (

3120

10 if audio_track.get('audioIsDefault') and 10

3121

else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10

3122

else -1)

3123

# Some formats may have much smaller duration than others (possibly damaged during encoding)

3124

# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823

3125

# Make sure to avoid false positives with small duration differences.

3126

# Eg: __2ABJjxzNo, ySuUZEjARPY

3127

is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)

3128

if is_damaged:

3129

self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)

3130

dct = {

3131

'asr': int_or_none(fmt.get('audioSampleRate')),

3132

'filesize': int_or_none(fmt.get('contentLength')),

3133

'format_id': itag,

3134

'format_note': join_nonempty(

3135

'%s%s' % (audio_track.get('displayName') or '',

3136

' (default)' if language_preference > 0 else ''),

3137

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

3138

throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),

3139

'source_preference': -10 if throttled else -1,

3140

'fps': int_or_none(fmt.get('fps')) or None,

3141

'height': height,

3142

'quality': q(quality),

3143

'has_drm': bool(fmt.get('drmFamilies')),

3144

'tbr': tbr,

3145

'url': fmt_url,

3146

'width': int_or_none(fmt.get('width')),

3147

'language': join_nonempty(audio_track.get('id', '').split('.')[0],

3148

'desc' if language_preference < -1 else ''),

3149

'language_preference': language_preference,

3150

# Strictly de-prioritize damaged and 3gp formats

3151

'preference': -10 if is_damaged else -2 if itag == '17' else None,

3152

}

3153

mime_mobj = re.match(

3154

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

3155

if mime_mobj:

3156

dct['ext'] = mimetype2ext(mime_mobj.group(1))

3157

dct.update(parse_codecs(mime_mobj.group(2)))

3158

no_audio = dct.get('acodec') == 'none'

3159

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3165

dct['downloader_options'] = {

3166

# Youtube throttles chunks >~10M

3167

'http_chunk_size': 10485760,

3168

}

3169

if dct.get('ext'):

3170

dct['container'] = dct['ext'] + '_dash'

3171

yield dct

3172

3173

live_from_start = is_live and self.get_param('live_from_start')

3174

skip_manifests = self._configuration_arg('skip')

3175

if not self.get_param('youtube_include_hls_manifest', True):

3176

skip_manifests.append('hls')

3177

get_dash = 'dash' not in skip_manifests and (

3178

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3179

get_hls = not live_from_start and 'hls' not in skip_manifests

3180

3181

def process_manifest_format(f, proto, itag):

3182

if itag in itags:

3183

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3184

return False

3185

itag = f'{itag}-{proto}'

3186

if itag:

3187

f['format_id'] = itag

3188

itags[itag] = proto

3189

3190

f['quality'] = next((

3191

q(qdict[val])

3192

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3193

if val in qdict), -1)

3194

return True

3195

3196

for sd in streaming_data:

3197

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3198

if hls_manifest_url:

3199

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3200

if process_manifest_format(f, 'hls', self._search_regex(

3201

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3202

yield f

3203

3204

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3205

if dash_manifest_url:

3206

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3207

if process_manifest_format(f, 'dash', f['format_id']):

3208

f['filesize'] = int_or_none(self._search_regex(

3209

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3210

if live_from_start:

3211

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3216

spec = get_first(

3217

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3218

base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))

if not base_url:

return

L = len(spec) - 1

for i, args in enumerate(spec):

3223

args = args.split('#')

3224

counts = list(map(int_or_none, args[:5]))

3225

if len(args) != 8 or not all(counts):

3226

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3227

continue

3228

width, height, frame_count, cols, rows = counts

3229

N, sigh = args[6:]

3230

3231

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3232

fragment_count = frame_count / (cols * rows)

3233

fragment_duration = duration / fragment_count

3234

yield {

3235

'format_id': f'sb{i}',

3236

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'url': url.replace('$M', str(j)),

3246

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3247

} for j in range(math.ceil(fragment_count))],

3248

}

3249

3250

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3251

webpage = None

3252

if 'webpage' not in self._configuration_arg('player_skip'):

3253

webpage = self._download_webpage(

3254

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

3255

3256

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3257

3258

player_responses, player_url = self._extract_player_responses(

3259

self._get_requested_clients(url, smuggled_data),

3260

video_id, webpage, master_ytcfg)

3261

3262

return webpage, master_ytcfg, player_responses, player_url

3263

3264

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):

3265

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3266

is_live = get_first(video_details, 'isLive')

3267

if is_live is None:

3268

is_live = get_first(live_broadcast_details, 'isLiveNow')

3269

3270

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3271

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))

3272

3273

return live_broadcast_details, is_live, streaming_data, formats

3274

3275

def _real_extract(self, url):

3276

url, smuggled_data = unsmuggle_url(url, {})

3277

video_id = self._match_id(url)

3278

3279

base_url = self.http_scheme() + '//www.youtube.com/'

3280

webpage_url = base_url + 'watch?v=' + video_id

3281

3282

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3283

3284

playability_statuses = traverse_obj(

3285

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3286

3287

trailer_video_id = get_first(

3288

playability_statuses,

3289

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3290

expected_type=str)

3291

if trailer_video_id:

3292

return self.url_result(

3293

trailer_video_id, self.ie_key(), trailer_video_id)

3294

3295

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3296

if webpage else (lambda x: None))

3297

3298

video_details = traverse_obj(

3299

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3300

microformats = traverse_obj(

3301

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3302

expected_type=dict, default=[])

3303

video_title = (

3304

get_first(video_details, 'title')

3305

or self._get_text(microformats, (..., 'title'))

3306

or search_meta(['og:title', 'twitter:title', 'title']))

3307

video_description = get_first(video_details, 'shortDescription')

3308

3309

multifeed_metadata_list = get_first(

3310

player_responses,

3311

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3312

expected_type=str)

3313

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3314

if self.get_param('noplaylist'):

3315

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3320

# Unquote should take place before split on comma (,) since textual

3321

# fields may contain comma as well (see

3322

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3323

feed_data = compat_parse_qs(

3324

compat_urllib_parse_unquote_plus(feed))

3325

3326

def feed_entry(name):

3327

return try_get(

3328

feed_data, lambda x: x[name][0], compat_str)

3329

3330

feed_id = feed_entry('id')

3331

if not feed_id:

3332

continue

3333

feed_title = feed_entry('title')

3334

title = video_title

3335

if feed_title:

3336

title += ' (%s)' % feed_title

3337

entries.append({

3338

'_type': 'url_transparent',

3339

'ie_key': 'Youtube',

3340

'url': smuggle_url(

3341

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3342

{'force_singlefeed': True}),

3343

'title': title,

3344

})

3345

feed_ids.append(feed_id)

3346

self.to_screen(

3347

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3348

% (', '.join(feed_ids), video_id))

3349

return self.playlist_result(

3350

entries, video_id, video_title, video_description)

3351

3352

duration = int_or_none(

3353

get_first(video_details, 'lengthSeconds')

3354

or get_first(microformats, 'lengthSeconds')

3355

or parse_duration(search_meta('duration'))) or None

3356

3357

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(

3358

video_id, microformats, video_details, player_responses, player_url, duration)

3359

3360

if not formats:

3361

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3362

self.report_drm(video_id)

3363

pemr = get_first(

3364

playability_statuses,

3365

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3366

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3367

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3368

if subreason:

3369

if subreason == 'The uploader has not made this video available in your country.':

3370

countries = get_first(microformats, 'availableCountries')

3371

if not countries:

3372

regions_allowed = search_meta('regionsAllowed')

3373

countries = regions_allowed.split(',') if regions_allowed else None

3374

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3375

reason += f'. {subreason}'

3376

if reason:

3377

self.raise_no_formats(reason, expected=True)

3378

3379

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3380

if not keywords and webpage:

3381

keywords = [

3382

unescapeHTML(m.group('content'))

3383

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3384

for keyword in keywords:

3385

if keyword.startswith('yt:stretch='):

3386

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3387

if mobj:

3388

# NB: float is intentional for forcing float division

3389

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3394

f['stretched_ratio'] = ratio

3395

break

3396

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3397

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3398

if thumbnail_url:

3399

thumbnails.append({

3400

'url': thumbnail_url,

3401

})

3402

original_thumbnails = thumbnails.copy()

3403

3404

# The best resolution thumbnails sometimes does not appear in the webpage

3405

# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340

3406

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3407

thumbnail_names = [

3408

'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',

3409

'hqdefault', 'hq1', 'hq2', 'hq3', '0',

3410

'mqdefault', 'mq1', 'mq2', 'mq3',

3411

'default', '1', '2', '3'

3412

]

3413

n_thumbnail_names = len(thumbnail_names)

3414

thumbnails.extend({

3415

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3416

video_id=video_id, name=name, ext=ext,

3417

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3418

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3419

for thumb in thumbnails:

3420

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3421

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3422

self._remove_duplicate_formats(thumbnails)

3423

self._downloader._sort_thumbnails(original_thumbnails)

3424

3425

category = get_first(microformats, 'category') or search_meta('genre')

3426

channel_id = str_or_none(

3427

get_first(video_details, 'channelId')

3428

or get_first(microformats, 'externalChannelId')

3429

or search_meta('channelId'))

3430

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3431

3432

live_content = get_first(video_details, 'isLiveContent')

3433

is_upcoming = get_first(video_details, 'isUpcoming')

3434

if is_live is None:

3435

if is_upcoming or live_content is False:

3436

is_live = False

3437

if is_upcoming is None and (live_content or is_live):

3438

is_upcoming = False

3439

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3440

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3441

if not duration and live_end_time and live_start_time:

3442

duration = live_end_time - live_start_time

3443

3444

if is_live and self.get_param('live_from_start'):

3445

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3446

3447

formats.extend(self._extract_storyboard(player_responses, duration))

3448

3449

# Source is given priority since formats that throttle are given lower source_preference

3450

# When throttling issue is fully fixed, remove this

3451

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3456

'formats': formats,

3457

'thumbnails': thumbnails,

3458

# The best thumbnail that we are sure exists. Prevents unnecessary

3459

# URL checking if user don't care about getting the best possible thumbnail

3460

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3461

'description': video_description,

3462

'uploader': get_first(video_details, 'author'),

3463

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3464

'uploader_url': owner_profile_url,

3465

'channel_id': channel_id,

3466

'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),

3467

'duration': duration,

3468

'view_count': int_or_none(

3469

get_first((video_details, microformats), (..., 'viewCount'))

3470

or search_meta('interactionCount')),

3471

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3472

'age_limit': 18 if (

3473

get_first(microformats, 'isFamilySafe') is False

3474

or search_meta('isFamilyFriendly') == 'false'

3475

or search_meta('og:restrictions:age') == '18+') else 0,

3476

'webpage_url': webpage_url,

3477

'categories': [category] if category else None,

3478

'tags': keywords,

3479

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3480

'is_live': is_live,

3481

'was_live': (False if is_live or is_upcoming or live_content is False

3482

else None if is_live is None or is_upcoming is None

3483

else live_content),

3484

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3485

'release_timestamp': live_start_time,

3486

}

3487

3488

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3489

if pctr:

3490

def get_lang_code(track):

3491

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3492

or track.get('languageCode'))

3493

3494

# Converted into dicts to remove duplicates

3495

captions = {

3496

get_lang_code(sub): sub

3497

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3498

translation_languages = {

3499

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3500

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3501

3502

def process_language(container, base_url, lang_code, sub_name, query):

3503

lang_subs = container.setdefault(lang_code, [])

3504

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3515

for lang_code, caption_track in captions.items():

3516

base_url = caption_track.get('baseUrl')

3517

orig_lang = parse_qs(base_url).get('lang', [None])[-1]

3518

if not base_url:

3519

continue

3520

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3521

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3526

if not caption_track.get('isTranslatable'):

3527

continue

3528

for trans_code, trans_name in translation_languages.items():

3529

if not trans_code:

3530

continue

3531

orig_trans_code = trans_code

3532

if caption_track.get('kind') != 'asr':

3533

if 'translated_subs' in self._configuration_arg('skip'):

3534

continue

3535

trans_code += f'-{lang_code}'

3536

trans_name += format_field(lang_name, template=' from %s')

3537

# Add an "-orig" label to the original language so that it can be distinguished.

3538

# The subs are returned without "-orig" as well for compatibility

3539

if lang_code == f'a-{orig_trans_code}':

3540

process_language(

3541

automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})

3542

# Setting tlang=lang returns damaged subtitles.

3543

process_language(automatic_captions, base_url, trans_code, trans_name,

3544

{} if orig_lang == orig_trans_code else {'tlang': trans_code})

3545

info['automatic_captions'] = automatic_captions

3546

info['subtitles'] = subtitles

3547

3548

parsed_url = compat_urllib_parse_urlparse(url)

3549

for component in [parsed_url.fragment, parsed_url.query]:

3550

query = compat_parse_qs(component)

3551

for k, v in query.items():

3552

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3553

d_k += '_time'

3554

if d_k not in info and k in s_ks:

3555

info[d_k] = parse_duration(query[k][0])

3556

3557

# Youtube Music Auto-generated description

3558

if video_description:

3559

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3560

if mobj:

3561

release_year = mobj.group('release_year')

3562

release_date = mobj.group('release_date')

3563

if release_date:

3564

release_date = release_date.replace('-', '')

3565

if not release_year:

3566

release_year = release_date[:4]

3567

info.update({

3568

'album': mobj.group('album'.strip()),

3569

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3570

'track': mobj.group('track').strip(),

3571

'release_date': release_date,

3572

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3578

webpage, self._YT_INITIAL_DATA_RE, video_id,

3579

'yt initial data')

3580

if not initial_data:

3581

query = {'videoId': video_id}

3582

query.update(self._get_checkok_params())

3583

initial_data = self._extract_response(

3584

item_id=video_id, ep='next', fatal=False,

3585

ytcfg=master_ytcfg, query=query,

3586

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3587

note='Downloading initial data API JSON')

3588

3589

try:

3590

# This will error if there is no livechat

3591

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3592

info.setdefault('subtitles', {})['live_chat'] = [{

3593

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

3594

'video_id': video_id,

3595

'ext': 'json',

3596

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

3597

}]

3598

except (KeyError, IndexError, TypeError):

pass

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3604

or self._extract_chapters_from_engagement_panel(initial_data, duration)

3605

or None)

3606

3607

contents = traverse_obj(

3608

initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),

3609

expected_type=list, default=[])

3610

3611

vpir = get_first(contents, 'videoPrimaryInfoRenderer')

3612

if vpir:

3613

stl = vpir.get('superTitleLink')

3614

if stl:

3615

stl = self._get_text(stl)

3616

if try_get(

3617

vpir,

3618

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3619

info['location'] = stl

3620

else:

3621

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

3622

if mobj:

3623

info.update({

3624

'series': mobj.group(1),

3625

'season_number': int(mobj.group(2)),

3626

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3631

list) or []):

3632

tbr = tlb.get('toggleButtonRenderer') or {}

3633

for getter, regex in [(

3634

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3635

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3636

lambda x: x['accessibility'],

3637

lambda x: x['accessibilityData']['accessibilityData'],

3638

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3639

label = (try_get(tbr, getter, dict) or {}).get('label')

3640

if label:

3641

mobj = re.match(regex, label)

3642

if mobj:

3643

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3644

break

3645

sbr_tooltip = try_get(

3646

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3647

if sbr_tooltip:

3648

like_count, dislike_count = sbr_tooltip.split(' / ')

3649

info.update({

3650

'like_count': str_to_int(like_count),

3651

'dislike_count': str_to_int(dislike_count),

3652

})

3653

vsir = get_first(contents, 'videoSecondaryInfoRenderer')

3654

if vsir:

3655

vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))

3656

info.update({

3657

'channel': self._get_text(vor, 'title'),

3658

'channel_follower_count': self._get_count(vor, 'subscriberCountText')})

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3663

list) or []

3664

multiple_songs = False

3665

for row in rows:

3666

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3667

multiple_songs = True

3668

break

3669

for row in rows:

3670

mrr = row.get('metadataRowRenderer') or {}

3671

mrr_title = mrr.get('title')

3672

if not mrr_title:

3673

continue

3674

mrr_title = self._get_text(mrr, 'title')

3675

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3676

if mrr_title == 'License':

3677

info['license'] = mrr_contents_text

3678

elif not multiple_songs:

3679

if mrr_title == 'Album':

3680

info['album'] = mrr_contents_text

3681

elif mrr_title == 'Artist':

3682

info['artist'] = mrr_contents_text

3683

elif mrr_title == 'Song':

3684

info['track'] = mrr_contents_text

3685

3686

fallbacks = {

3687

'channel': 'uploader',

3688

'channel_id': 'uploader_id',

3689

'channel_url': 'uploader_url',

3690

}

3691

3692

# The upload date for scheduled, live and past live streams / premieres in microformats

3693

# may be different from the stream date. Although not in UTC, we will prefer it in this case.

3694

# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139

3695

upload_date = (

3696

unified_strdate(get_first(microformats, 'uploadDate'))

3697

or unified_strdate(search_meta('uploadDate')))

3698

if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):

3699

upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')

3700

info['upload_date'] = upload_date

3701

3702

for to, frm in fallbacks.items():

3703

if not info.get(to):

3704

info[to] = info.get(frm)

3705

3706

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3712

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3713

is_membersonly = None

3714

is_premium = None

3715

if initial_data and is_private is not None:

3716

is_membersonly = False

3717

is_premium = False

3718

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3719

badge_labels = set()

3720

for content in contents:

3721

if not isinstance(content, dict):

3722

continue

3723

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3724

for badge_label in badge_labels:

3725

if badge_label.lower() == 'members only':

3726

is_membersonly = True

3727

elif badge_label.lower() == 'premium':

3728

is_premium = True

3729

elif badge_label.lower() == 'unlisted':

3730

is_unlisted = True

3731

3732

info['availability'] = self._availability(

3733

is_private=is_private,

3734

needs_premium=is_premium,

3735

needs_subscription=is_membersonly,

3736

needs_auth=info['age_limit'] >= 18,

3737

is_unlisted=None if is_private is None else is_unlisted)

3738

3739

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3740

3741

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3747

3748

@staticmethod

3749

def passthrough_smuggled_data(func):

3750

def _smuggle(entries, smuggled_data):

3751

for entry in entries:

3752

# TODO: Convert URL to music.youtube instead.

3753

# Do we need to passthrough any other smuggled_data?

3754

entry['url'] = smuggle_url(entry['url'], smuggled_data)

3755

yield entry

3756

3757

@functools.wraps(func)

3758

def wrapper(self, url):

3759

url, smuggled_data = unsmuggle_url(url, {})

3760

if self.is_music_url(url):

3761

smuggled_data['is_music_url'] = True

3762

info_dict = func(self, url, smuggled_data)

3763

if smuggled_data and info_dict.get('entries'):

3764

info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)

return info_dict

return wrapper

def _extract_channel_id(self, webpage):

3769

channel_id = self._html_search_meta(

3770

'channelId', webpage, 'channel id', default=None)

3771

if channel_id:

3772

return channel_id

3773

channel_url = self._html_search_meta(

3774

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3775

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3776

'twitter:app:url:googleplay'), webpage, 'channel url')

3777

return self._search_regex(

3778

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3779

channel_url, 'channel id')

3780

3781

@staticmethod

3782

def _extract_basic_item_renderer(item):

3783

# Modified from _extract_grid_item_renderer

3784

known_basic_renderers = (

3785

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'

3786

)

3787

for key, renderer in item.items():

3788

if not isinstance(renderer, dict):

3789

continue

3790

elif key in known_basic_renderers:

3791

return renderer

3792

elif key.startswith('grid') and key.endswith('Renderer'):

3793

return renderer

3794

3795

def _grid_entries(self, grid_renderer):

3796

for item in grid_renderer['items']:

3797

if not isinstance(item, dict):

3798

continue

3799

renderer = self._extract_basic_item_renderer(item)

3800

if not isinstance(renderer, dict):

3801

continue

3802

title = self._get_text(renderer, 'title')

3803

3804

# playlist

3805

playlist_id = renderer.get('playlistId')

3806

if playlist_id:

3807

yield self.url_result(

3808

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3809

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3814

if video_id:

3815

yield self._extract_video(renderer)

3816

continue

3817

# channel

3818

channel_id = renderer.get('channelId')

3819

if channel_id:

3820

yield self.url_result(

3821

'https://www.youtube.com/channel/%s' % channel_id,

3822

ie=YoutubeTabIE.ie_key(), video_title=title)

3823

continue

3824

# generic endpoint URL support

3825

ep_url = urljoin('https://www.youtube.com/', try_get(

3826

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3827

compat_str))

3828

if ep_url:

3829

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3830

if ie.suitable(ep_url):

3831

yield self.url_result(

3832

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3833

break

3834

3835

def _music_reponsive_list_entry(self, renderer):

3836

video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))

3837

if video_id:

3838

return self.url_result(f'https://music.youtube.com/watch?v={video_id}',

3839

ie=YoutubeIE.ie_key(), video_id=video_id)

3840

playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))

3841

if playlist_id:

3842

video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))

3843

if video_id:

3844

return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',

3845

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3846

return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',

3847

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3848

browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))

3849

if browse_id:

3850

return self.url_result(f'https://music.youtube.com/browse/{browse_id}',

3851

ie=YoutubeTabIE.ie_key(), video_id=browse_id)

3852

3853

def _shelf_entries_from_content(self, shelf_renderer):

3854

content = shelf_renderer.get('content')

3855

if not isinstance(content, dict):

3856

return

3857

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3858

if renderer:

3859

# TODO: add support for nested playlists so each shelf is processed

3860

# as separate playlist

3861

# TODO: this includes only first N items

3862

for entry in self._grid_entries(renderer):

3863

yield entry

3864

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3870

ep = try_get(

3871

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3872

compat_str)

3873

shelf_url = urljoin('https://www.youtube.com', ep)

3874

if shelf_url:

3875

# Skipping links to another channels, note that checking for

3876

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3877

# will not work

3878

if skip_channels and '/channels?' in shelf_url:

3879

return

3880

title = self._get_text(shelf_renderer, 'title')

3881

yield self.url_result(shelf_url, video_title=title)

3882

# Shelf may not contain shelf URL, fallback to extraction from content

3883

for entry in self._shelf_entries_from_content(shelf_renderer):

3884

yield entry

3885

3886

def _playlist_entries(self, video_list_renderer):

3887

for content in video_list_renderer['contents']:

3888

if not isinstance(content, dict):

3889

continue

3890

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3891

if not isinstance(renderer, dict):

3892

continue

3893

video_id = renderer.get('videoId')

3894

if not video_id:

3895

continue

3896

yield self._extract_video(renderer)

3897

3898

def _rich_entries(self, rich_grid_renderer):

3899

renderer = try_get(

3900

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3901

video_id = renderer.get('videoId')

3902

if not video_id:

3903

return

3904

yield self._extract_video(renderer)

3905

3906

def _video_entry(self, video_renderer):

3907

video_id = video_renderer.get('videoId')

3908

if video_id:

3909

return self._extract_video(video_renderer)

3910

3911

def _hashtag_tile_entry(self, hashtag_tile_renderer):

3912

url = urljoin('https://youtube.com', traverse_obj(

3913

hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))

3914

if url:

3915

return self.url_result(

3916

url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))

3917

3918

def _post_thread_entries(self, post_thread_renderer):

3919

post_renderer = try_get(

3920

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3921

if not post_renderer:

3922

return

3923

# video attachment

3924

video_renderer = try_get(

3925

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3926

video_id = video_renderer.get('videoId')

3927

if video_id:

3928

entry = self._extract_video(video_renderer)

3929

if entry:

3930

yield entry

3931

# playlist attachment

3932

playlist_id = try_get(

3933

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3934

if playlist_id:

3935

yield self.url_result(

3936

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3937

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3938

# inline video links

3939

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3940

for run in runs:

3941

if not isinstance(run, dict):

3942

continue

3943

ep_url = try_get(

3944

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3945

if not ep_url:

3946

continue

3947

if not YoutubeIE.suitable(ep_url):

3948

continue

3949

ep_video_id = YoutubeIE._match_id(ep_url)

3950

if video_id == ep_video_id:

3951

continue

3952

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3953

3954

def _post_thread_continuation_entries(self, post_thread_continuation):

3955

contents = post_thread_continuation.get('contents')

3956

if not isinstance(contents, list):

3957

return

3958

for content in contents:

3959

renderer = content.get('backstagePostThreadRenderer')

3960

if not isinstance(renderer, dict):

3961

continue

3962

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3967

for content in contents:

3968

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3969

if video_renderer:

3970

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

3976

# continuation_list is modified in-place with continuation_list = [continuation_token]

3977

continuation_list[:] = [None]

3978

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3979

for content in contents:

3980

if not isinstance(content, dict):

3981

continue

3982

is_renderer = traverse_obj(

3983

content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',

3984

expected_type=dict)

3985

if not is_renderer:

3986

renderer = content.get('richItemRenderer')

3987

if renderer:

3988

for entry in self._rich_entries(renderer):

3989

yield entry

3990

continuation_list[0] = self._extract_continuation(parent_renderer)

3991

continue

3992

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3993

for isr_content in isr_contents:

3994

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3999

'gridRenderer': self._grid_entries,

4000

'reelShelfRenderer': self._grid_entries,

4001

'shelfRenderer': self._shelf_entries,

4002

'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],

4003

'backstagePostThreadRenderer': self._post_thread_entries,

4004

'videoRenderer': lambda x: [self._video_entry(x)],

4005

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

4006

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

4007

'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]

4008

}

4009

for key, renderer in isr_content.items():

4010

if key not in known_renderers:

4011

continue

4012

for entry in known_renderers[key](renderer):

4013

if entry:

4014

yield entry

4015

continuation_list[0] = self._extract_continuation(renderer)

4016

break

4017

4018

if not continuation_list[0]:

4019

continuation_list[0] = self._extract_continuation(is_renderer)

4020

4021

if not continuation_list[0]:

4022

continuation_list[0] = self._extract_continuation(parent_renderer)

4023

4024

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

4025

continuation_list = [None]

4026

extract_entries = lambda x: self._extract_entries(x, continuation_list)

4027

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

4032

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

4033

for entry in extract_entries(parent_renderer):

4034

yield entry

4035

continuation = continuation_list[0]

4036

4037

for page_num in itertools.count(1):

4038

if not continuation:

4039

break

4040

headers = self.generate_api_headers(

4041

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

4042

response = self._extract_response(

4043

item_id='%s page %s' % (item_id, page_num),

4044

query=continuation, headers=headers, ytcfg=ytcfg,

4045

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

4050

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

4051

visitor_data = self._extract_visitor_data(response) or visitor_data

4052

4053

known_continuation_renderers = {

4054

'playlistVideoListContinuation': self._playlist_entries,

4055

'gridContinuation': self._grid_entries,

4056

'itemSectionContinuation': self._post_thread_continuation_entries,

4057

'sectionListContinuation': extract_entries, # for feeds

4058

}

4059

continuation_contents = try_get(

4060

response, lambda x: x['continuationContents'], dict) or {}

4061

continuation_renderer = None

4062

for key, value in continuation_contents.items():

4063

if key not in known_continuation_renderers:

4064

continue

4065

continuation_renderer = value

4066

continuation_list = [None]

4067

for entry in known_continuation_renderers[key](continuation_renderer):

4068

yield entry

4069

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

4070

break

4071

if continuation_renderer:

continue

known_renderers = {

'videoRenderer': (self._grid_entries, 'items'), # for membership tab

4076

'gridPlaylistRenderer': (self._grid_entries, 'items'),

4077

'gridVideoRenderer': (self._grid_entries, 'items'),

4078

'gridChannelRenderer': (self._grid_entries, 'items'),

4079

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

4080

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

4081

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

4082

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

4083

}

4084

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

4085

continuation_items = try_get(

4086

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

4087

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

4088

video_items_renderer = None

4089

for key, value in continuation_item.items():

4090

if key not in known_renderers:

4091

continue

4092

video_items_renderer = {known_renderers[key][1]: continuation_items}

4093

continuation_list = [None]

4094

for entry in known_renderers[key][0](video_items_renderer):

4095

yield entry

4096

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

4097

break

4098

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs, fatal=True):

4104

for tab in tabs:

4105

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

4106

if renderer.get('selected') is True:

return renderer

else:

if fatal:

raise ExtractorError('Unable to find selected tab')

4111

4112

def _extract_uploader(self, data):

4113

uploader = {}

4114

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

4115

owner = try_get(

4116

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

4117

if owner:

4118

owner_text = owner.get('text')

4119

uploader['uploader'] = self._search_regex(

4120

r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)

4121

uploader['uploader_id'] = try_get(

4122

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

4123

uploader['uploader_url'] = urljoin(

4124

'https://www.youtube.com/',

4125

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

4126

return {k: v for k, v in uploader.items() if v is not None}

4127

4128

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

4129

playlist_id = title = description = channel_url = channel_name = channel_id = None

4130

tags = []

4131

4132

selected_tab = self._extract_selected_tab(tabs)

4133

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4134

renderer = try_get(

4135

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

4136

if renderer:

4137

channel_name = renderer.get('title')

4138

channel_url = renderer.get('channelUrl')

4139

channel_id = renderer.get('externalId')

4140

else:

4141

renderer = try_get(

4142

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

4143

4144

if renderer:

4145

title = renderer.get('title')

4146

description = renderer.get('description', '')

4147

playlist_id = channel_id

4148

tags = renderer.get('keywords', '').split()

4149

4150

# We can get the uncropped banner/avatar by replacing the crop params with '=s0'

4151

# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714

4152

def _get_uncropped(url):

4153

return url_or_none((url or '').split('=')[0] + '=s0')

4154

4155

avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')

4156

if avatar_thumbnails:

4157

uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])

4158

if uncropped_avatar:

4159

avatar_thumbnails.append({

4160

'url': uncropped_avatar,

4161

'id': 'avatar_uncropped',

'preference': 1

})

channel_banners = self._extract_thumbnails(

4166

data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))

4167

for banner in channel_banners:

4168

banner['preference'] = -10

4169

4170

if channel_banners:

4171

uncropped_banner = _get_uncropped(channel_banners[0]['url'])

4172

if uncropped_banner:

4173

channel_banners.append({

4174

'url': uncropped_banner,

4175

'id': 'banner_uncropped',

'preference': -5

})

primary_thumbnails = self._extract_thumbnails(

4180

primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))

4181

4182

if playlist_id is None:

4183

playlist_id = item_id

4184

4185

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

4186

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

4187

if title is None:

4188

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

4189

title += format_field(selected_tab, 'title', ' - %s')

4190

title += format_field(selected_tab, 'expandedText', ' - %s')

4191

4192

metadata = {

4193

'playlist_id': playlist_id,

4194

'playlist_title': title,

4195

'playlist_description': description,

4196

'uploader': channel_name,

4197

'uploader_id': channel_id,

4198

'uploader_url': channel_url,

4199

'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,

4200

'tags': tags,

4201

'view_count': self._get_count(playlist_stats, 1),

4202

'availability': self._extract_availability(data),

4203

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

4204

'playlist_count': self._get_count(playlist_stats, 0),

4205

'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),

4206

}

4207

if not channel_id:

4208

metadata.update(self._extract_uploader(data))

4209

metadata.update({

4210

'channel': metadata['uploader'],

4211

'channel_id': metadata['uploader_id'],

4212

'channel_url': metadata['uploader_url']})

4213

return self.playlist_result(

4214

self._entries(

4215

selected_tab, playlist_id, ytcfg,

4216

self._extract_account_syncid(ytcfg, data),

4217

self._extract_visitor_data(data, ytcfg)),

4218

**metadata)

4219

4220

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):

4221

first_id = last_id = response = None

4222

for page_num in itertools.count(1):

4223

videos = list(self._playlist_entries(playlist))

4224

if not videos:

4225

return

4226

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

4227

if start >= len(videos):

4228

return

4229

for video in videos[start:]:

4230

if video['id'] == first_id:

4231

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

4232

return

4233

yield video

4234

first_id = first_id or videos[0]['id']

4235

last_id = videos[-1]['id']

4236

watch_endpoint = try_get(

4237

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

4238

headers = self.generate_api_headers(

4239

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4240

visitor_data=self._extract_visitor_data(response, data, ytcfg))

4241

query = {

4242

'playlistId': playlist_id,

4243

'videoId': watch_endpoint.get('videoId') or last_id,

4244

'index': watch_endpoint.get('index') or len(videos),

4245

'params': watch_endpoint.get('params') or 'OAE%3D'

4246

}

4247

response = self._extract_response(

4248

item_id='%s page %d' % (playlist_id, page_num),

4249

query=query, ep='next', headers=headers, ytcfg=ytcfg,

4250

check_get_keys='contents'

4251

)

4252

playlist = try_get(

4253

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4254

4255

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

4256

title = playlist.get('title') or try_get(

4257

data, lambda x: x['titleText']['simpleText'], compat_str)

4258

playlist_id = playlist.get('playlistId') or item_id

4259

4260

# Delegating everything except mix playlists to regular tab-based playlist URL

4261

playlist_url = urljoin(url, try_get(

4262

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

4263

compat_str))

4264

if playlist_url and playlist_url != url:

4265

return self.url_result(

4266

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4267

video_title=title)

4268

4269

return self.playlist_result(

4270

self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),

4271

playlist_id=playlist_id, playlist_title=title)

4272

4273

def _extract_availability(self, data):

4274

"""

4275

Gets the availability of a given playlist/tab.

4276

Note: Unless YouTube tells us explicitly, we do not assume it is public

4277

@param data: response

4278

"""

4279

is_private = is_unlisted = None

4280

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4281

badge_labels = self._extract_badges(renderer)

4282

4283

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4284

privacy_dropdown_entries = try_get(

4285

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4286

for renderer_dict in privacy_dropdown_entries:

4287

is_selected = try_get(

4288

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4289

if not is_selected:

4290

continue

4291

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4292

if label:

4293

badge_labels.add(label.lower())

4294

break

4295

4296

for badge_label in badge_labels:

4297

if badge_label == 'unlisted':

4298

is_unlisted = True

4299

elif badge_label == 'private':

4300

is_private = True

4301

elif badge_label == 'public':

4302

is_unlisted = is_private = False

4303

return self._availability(is_private, False, False, False, is_unlisted)

4304

4305

@staticmethod

4306

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4307

sidebar_renderer = try_get(

4308

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4309

for item in sidebar_renderer:

4310

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4315

"""

4316

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4317

"""

4318

browse_id = params = None

4319

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4320

if not renderer:

4321

return

4322

menu_renderer = try_get(

4323

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4324

for menu_item in menu_renderer:

4325

if not isinstance(menu_item, dict):

4326

continue

4327

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4328

text = try_get(

4329

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4330

if not text or text.lower() != 'show unavailable videos':

4331

continue

4332

browse_endpoint = try_get(

4333

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4334

browse_id = browse_endpoint.get('browseId')

4335

params = browse_endpoint.get('params')

4336

break

4337

4338

headers = self.generate_api_headers(

4339

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4340

visitor_data=self._extract_visitor_data(data, ytcfg))

4341

query = {

4342

'params': params or 'wgYCCAA=',

4343

'browseId': browse_id or 'VL%s' % item_id

4344

}

4345

return self._extract_response(

4346

item_id=item_id, headers=headers, query=query,

4347

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4348

note='Downloading API JSON with unavailable videos')

4349

4350

def _extract_webpage(self, url, item_id, fatal=True):

4351

retries = self.get_param('extractor_retries', 3)

4352

count = -1

4353

webpage = data = last_error = None

4354

while count < retries:

4355

count += 1

4356

# Sometimes youtube returns a webpage with incomplete ytInitialData

4357

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4358

if last_error:

4359

self.report_warning('%s. Retrying ...' % last_error)

4360

try:

4361

webpage = self._download_webpage(

4362

url, item_id,

4363

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4364

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4365

except ExtractorError as e:

4366

if isinstance(e.cause, network_exceptions):

4367

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4368

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4378

except ExtractorError as e:

4379

if fatal:

4380

raise

4381

self.report_warning(error_to_compat_str(e))

4382

break

4383

4384

if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):

4385

break

4386

4387

last_error = 'Incomplete yt initial data received'

4388

if count >= retries:

4389

if fatal:

4390

raise ExtractorError(last_error)

4391

self.report_warning(last_error)

break

return webpage, data

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4397

data = None

4398

if 'webpage' not in self._configuration_arg('skip'):

4399

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4400

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4401

# Reject webpage data if redirected to home page without explicitly requesting

4402

selected_tab = self._extract_selected_tab(traverse_obj(

4403

data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}

4404

if (url != 'https://www.youtube.com/feed/recommended'

4405

and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page

4406

and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):

4407

msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'

4408

if fatal:

4409

raise ExtractorError(msg, expected=True)

4410

self.report_warning(msg, only_once=True)

4411

if not data:

4412

if not ytcfg and self.is_authenticated:

4413

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

4414

if 'authcheck' not in self._configuration_arg('skip') and fatal:

4415

raise ExtractorError(

4416

msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'

4417

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4418

expected=True)

4419

self.report_warning(msg, only_once=True)

4420

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4421

return data, ytcfg

4422

4423

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4424

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4425

resolve_response = self._extract_response(

4426

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4427

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4428

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4429

for ep_key, ep in endpoints.items():

4430

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4431

if params:

4432

return self._extract_response(

4433

item_id=item_id, query=params, ep=ep, headers=headers,

4434

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4435

check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))

4436

err_note = 'Failed to resolve url (does the playlist exist?)'

4437

if fatal:

4438

raise ExtractorError(err_note, expected=True)

4439

self.report_warning(err_note, item_id)

4440

4441

_SEARCH_PARAMS = None

4442

4443

def _search_results(self, query, params=NO_DEFAULT, default_client='web'):

4444

data = {'query': query}

4445

if params is NO_DEFAULT:

4446

params = self._SEARCH_PARAMS

4447

if params:

4448

data['params'] = params

4449

4450

content_keys = (

4451

('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),

4452

('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),

4453

# ytmusic search

4454

('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),

4455

('continuationContents', ),

4456

)

4457

check_get_keys = tuple(set(keys[0] for keys in content_keys))

4458

4459

continuation_list = [None]

4460

for page_num in itertools.count(1):

4461

data.update(continuation_list[0] or {})

4462

search = self._extract_response(

4463

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4464

default_client=default_client, check_get_keys=check_get_keys)

4465

slr_contents = traverse_obj(search, *content_keys)

4466

yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)

4467

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4472

IE_DESC = 'YouTube Tabs'

4473

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4482

(?P<not_channel>

4483

feed/|hashtag/|

4484

(?:playlist|watch)\?.*?\blist=

4485

)|

4486

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4491

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4492

}

4493

IE_NAME = 'youtube:tab'

4494

4495

_TESTS = [{

4496

'note': 'playlists, multipage',

4497

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4498

'playlist_mincount': 94,

4499

'info_dict': {

4500

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4501

'title': 'Igor Kleiner - Playlists',

4502

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4503

'uploader': 'Igor Kleiner',

4504

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4505

'channel': 'Igor Kleiner',

4506

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4507

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4508

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4509

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4510

'channel_follower_count': int

4511

},

4512

}, {

4513

'note': 'playlists, multipage, different order',

4514

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4515

'playlist_mincount': 94,

4516

'info_dict': {

4517

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4518

'title': 'Igor Kleiner - Playlists',

4519

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4520

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4521

'uploader': 'Igor Kleiner',

4522

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4523

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4524

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4525

'channel': 'Igor Kleiner',

4526

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4527

'channel_follower_count': int

4528

},

4529

}, {

4530

'note': 'playlists, series',

4531

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4532

'playlist_mincount': 5,

4533

'info_dict': {

4534

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4535

'title': '3Blue1Brown - Playlists',

4536

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4537

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4538

'uploader': '3Blue1Brown',

4539

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4540

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4541

'channel': '3Blue1Brown',

4542

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4543

'tags': ['Mathematics'],

4544

'channel_follower_count': int

4545

},

4546

}, {

4547

'note': 'playlists, singlepage',

4548

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4549

'playlist_mincount': 4,

4550

'info_dict': {

4551

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4552

'title': 'ThirstForScience - Playlists',

4553

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4554

'uploader': 'ThirstForScience',

4555

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4556

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4557

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4558

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4559

'tags': 'count:13',

4560

'channel': 'ThirstForScience',

4561

'channel_follower_count': int

4562

}

4563

}, {

4564

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4565

'only_matching': True,

4566

}, {

4567

'note': 'basic, single video playlist',

4568

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4569

'info_dict': {

4570

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4571

'uploader': 'Sergey M.',

4572

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4573

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4578

'channel': 'Sergey M.',

4579

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4580

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4581

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4586

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4587

'info_dict': {

4588

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4589

'uploader': 'Sergey M.',

4590

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4591

'title': 'youtube-dl empty playlist',

4592

'tags': [],

4593

'channel': 'Sergey M.',

4594

'description': '',

4595

'modified_date': '20160902',

4596

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4597

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4598

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4604

'info_dict': {

4605

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4606

'title': 'lex will - Home',

4607

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4608

'uploader': 'lex will',

4609

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4610

'channel': 'lex will',

4611

'tags': ['bible', 'history', 'prophesy'],

4612

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4613

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4614

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4615

'channel_follower_count': int

4616

},

4617

'playlist_mincount': 2,

4618

}, {

4619

'note': 'Videos tab',

4620

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4621

'info_dict': {

4622

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4623

'title': 'lex will - Videos',

4624

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4625

'uploader': 'lex will',

4626

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4627

'tags': ['bible', 'history', 'prophesy'],

4628

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4629

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4630

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4631

'channel': 'lex will',

4632

'channel_follower_count': int

4633

},

4634

'playlist_mincount': 975,

4635

}, {

4636

'note': 'Videos tab, sorted by popular',

4637

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4638

'info_dict': {

4639

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4640

'title': 'lex will - Videos',

4641

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4642

'uploader': 'lex will',

4643

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4644

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4645

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4646

'channel': 'lex will',

4647

'tags': ['bible', 'history', 'prophesy'],

4648

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4649

'channel_follower_count': int

4650

},

4651

'playlist_mincount': 199,

4652

}, {

4653

'note': 'Playlists tab',

4654

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4655

'info_dict': {

4656

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4657

'title': 'lex will - Playlists',

4658

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4659

'uploader': 'lex will',

4660

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4661

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4662

'channel': 'lex will',

4663

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4664

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4665

'tags': ['bible', 'history', 'prophesy'],

4666

'channel_follower_count': int

4667

},

4668

'playlist_mincount': 17,

4669

}, {

4670

'note': 'Community tab',

4671

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4672

'info_dict': {

4673

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4674

'title': 'lex will - Community',

4675

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4676

'uploader': 'lex will',

4677

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4678

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4679

'channel': 'lex will',

4680

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4681

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4682

'tags': ['bible', 'history', 'prophesy'],

4683

'channel_follower_count': int

4684

},

4685

'playlist_mincount': 18,

4686

}, {

4687

'note': 'Channels tab',

4688

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4689

'info_dict': {

4690

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4691

'title': 'lex will - Channels',

4692

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4693

'uploader': 'lex will',

4694

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4695

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4696

'channel': 'lex will',

4697

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4698

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4699

'tags': ['bible', 'history', 'prophesy'],

4700

'channel_follower_count': int

4701

},

4702

'playlist_mincount': 12,

4703

}, {

4704

'note': 'Search tab',

4705

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4706

'playlist_mincount': 40,

4707

'info_dict': {

4708

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4709

'title': '3Blue1Brown - Search - linear algebra',

4710

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4711

'uploader': '3Blue1Brown',

4712

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4713

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4714

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4715

'tags': ['Mathematics'],

4716

'channel': '3Blue1Brown',

4717

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4718

'channel_follower_count': int

4719

},

4720

}, {

4721

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4722

'only_matching': True,

4723

}, {

4724

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4725

'only_matching': True,

4726

}, {

4727

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4728

'only_matching': True,

4729

}, {

4730

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4731

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4732

'info_dict': {

4733

'title': '29C3: Not my department',

4734

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4735

'uploader': 'Christiaan008',

4736

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4737

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4738

'tags': [],

4739

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4740

'view_count': int,

4741

'modified_date': '20150605',

4742

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4743

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4744

'channel': 'Christiaan008',

4745

},

4746

'playlist_count': 96,

4747

}, {

4748

'note': 'Large playlist',

4749

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4750

'info_dict': {

4751

'title': 'Uploads from Cauchemar',

4752

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4753

'uploader': 'Cauchemar',

4754

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4755

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4756

'tags': [],

4757

'modified_date': r're:\d{8}',

4758

'channel': 'Cauchemar',

4759

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4760

'view_count': int,

4761

'description': '',

4762

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4763

},

4764

'playlist_mincount': 1123,

4765

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4766

}, {

4767

'note': 'even larger playlist, 8832 videos',

4768

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4769

'only_matching': True,

4770

}, {

4771

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4772

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4773

'info_dict': {

4774

'title': 'Uploads from Interstellar Movie',

4775

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4776

'uploader': 'Interstellar Movie',

4777

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4778

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4779

'tags': [],

4780

'view_count': int,

4781

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4782

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4783

'channel': 'Interstellar Movie',

4784

'description': '',

4785

'modified_date': r're:\d{8}',

4786

},

4787

'playlist_mincount': 21,

4788

}, {

4789

'note': 'Playlist with "show unavailable videos" button',

4790

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4791

'info_dict': {

4792

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4793

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4794

'uploader': 'Phim Siêu Nhân Nhật Bản',

4795

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4796

'view_count': int,

4797

'channel': 'Phim Siêu Nhân Nhật Bản',

4798

'tags': [],

4799

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4800

'description': '',

4801

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4802

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4803

'modified_date': r're:\d{8}',

4804

},

4805

'playlist_mincount': 200,

4806

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4807

}, {

4808

'note': 'Playlist with unavailable videos in page 7',

4809

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4810

'info_dict': {

4811

'title': 'Uploads from BlankTV',

4812

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4813

'uploader': 'BlankTV',

4814

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4815

'channel': 'BlankTV',

4816

'channel_url': 'https://www.youtube.com/c/blanktv',

4817

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4818

'view_count': int,

4819

'tags': [],

4820

'uploader_url': 'https://www.youtube.com/c/blanktv',

4821

'modified_date': r're:\d{8}',

4822

'description': '',

4823

},

4824

'playlist_mincount': 1000,

4825

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4826

}, {

4827

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4828

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4829

'info_dict': {

4830

'title': 'Data Analysis with Dr Mike Pound',

4831

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4832

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4833

'uploader': 'Computerphile',

4834

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4835

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4836

'tags': [],

4837

'view_count': int,

4838

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4839

'channel_url': 'https://www.youtube.com/user/Computerphile',

4840

'channel': 'Computerphile',

4841

},

4842

'playlist_mincount': 11,

4843

}, {

4844

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4845

'only_matching': True,

4846

}, {

4847

'note': 'Playlist URL that does not actually serve a playlist',

4848

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4853

'uploader': 'STREEM',

4854

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4855

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4856

'upload_date': '20150526',

4857

'license': 'Standard YouTube License',

4858

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4859

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4866

},

4867

'skip': 'This video is not available.',

4868

'add_ie': [YoutubeIE.ie_key()],

4869

}, {

4870

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4871

'only_matching': True,

4872

}, {

4873

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4874

'only_matching': True,

4875

}, {

4876

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4877

'info_dict': {

4878

'id': 'GgL890LIznQ', # This will keep changing

4879

'ext': 'mp4',

4880

'title': str,

4881

'uploader': 'Sky News',

4882

'uploader_id': 'skynews',

4883

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4884

'upload_date': r're:\d{8}',

4885

'description': str,

4886

'categories': ['News & Politics'],

4887

'tags': list,

4888

'like_count': int,

4889

'release_timestamp': 1642502819,

4890

'channel': 'Sky News',

4891

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4892

'age_limit': 0,

4893

'view_count': int,

4894

'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',

4895

'playable_in_embed': True,

4896

'release_date': '20220118',

4897

'availability': 'public',

4898

'live_status': 'is_live',

4899

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4900

'channel_follower_count': int

4901

},

4902

'params': {

4903

'skip_download': True,

4904

},

4905

'expected_warnings': ['Ignoring subtitle tracks found in '],

4906

}, {

4907

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4912

'uploader': 'The Young Turks',

4913

'uploader_id': 'TheYoungTurks',

4914

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4915

'upload_date': '20150715',

4916

'license': 'Standard YouTube License',

4917

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4918

'categories': ['News & Politics'],

4919

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4924

},

4925

'only_matching': True,

4926

}, {

4927

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4928

'only_matching': True,

4929

}, {

4930

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4931

'only_matching': True,

4932

}, {

4933

'note': 'A channel that is not live. Should raise error',

4934

'url': 'https://www.youtube.com/user/numberphile/live',

4935

'only_matching': True,

4936

}, {

4937

'url': 'https://www.youtube.com/feed/trending',

4938

'only_matching': True,

4939

}, {

4940

'url': 'https://www.youtube.com/feed/library',

4941

'only_matching': True,

4942

}, {

4943

'url': 'https://www.youtube.com/feed/history',

4944

'only_matching': True,

4945

}, {

4946

'url': 'https://www.youtube.com/feed/subscriptions',

4947

'only_matching': True,

4948

}, {

4949

'url': 'https://www.youtube.com/feed/watch_later',

4950

'only_matching': True,

4951

}, {

4952

'note': 'Recommended - redirects to home page.',

4953

'url': 'https://www.youtube.com/feed/recommended',

4954

'only_matching': True,

4955

}, {

4956

'note': 'inline playlist with not always working continuations',

4957

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

4958

'only_matching': True,

4959

}, {

4960

'url': 'https://www.youtube.com/course',

4961

'only_matching': True,

4962

}, {

4963

'url': 'https://www.youtube.com/zsecurity',

4964

'only_matching': True,

4965

}, {

4966

'url': 'http://www.youtube.com/NASAgovVideo/videos',

4967

'only_matching': True,

4968

}, {

4969

'url': 'https://www.youtube.com/TheYoungTurks/live',

4970

'only_matching': True,

4971

}, {

4972

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

4979

}, {

4980

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

4981

'only_matching': True,

4982

}, {

4983

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

4984

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4985

'only_matching': True

4986

}, {

4987

'note': '/browse/ should redirect to /channel/',

4988

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

4989

'only_matching': True

4990

}, {

4991

'note': 'VLPL, should redirect to playlist?list=PL...',

4992

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4993

'info_dict': {

4994

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4995

'uploader': 'NoCopyrightSounds',

4996

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

4997

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4998

'title': 'NCS Releases',

4999

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5000

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

5001

'modified_date': r're:\d{8}',

5002

'view_count': int,

5003

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

5004

'tags': [],

5005

'channel': 'NoCopyrightSounds',

5006

},

5007

'playlist_mincount': 166,

5008

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5009

}, {

5010

'note': 'Topic, should redirect to playlist?list=UU...',

5011

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5012

'info_dict': {

5013

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5014

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5015

'title': 'Uploads from Royalty Free Music - Topic',

5016

'uploader': 'Royalty Free Music - Topic',

5017

'tags': [],

5018

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5019

'channel': 'Royalty Free Music - Topic',

5020

'view_count': int,

5021

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5022

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5023

'modified_date': r're:\d{8}',

5024

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5025

'description': '',

5026

},

5027

'expected_warnings': [

5028

'The URL does not have a videos tab',

5029

r'[Uu]navailable videos (are|will be) hidden',

5030

],

5031

'playlist_mincount': 101,

5032

}, {

5033

'note': 'Topic without a UU playlist',

5034

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

5035

'info_dict': {

5036

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

5037

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

5038

'tags': [],

5039

},

5040

'expected_warnings': [

5041

'the playlist redirect gave error',

5042

],

5043

'playlist_mincount': 9,

5044

}, {

5045

'note': 'Youtube music Album',

5046

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

5047

'info_dict': {

5048

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

5049

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

5054

'modified_date': r're:\d{8}',

5055

},

5056

'playlist_count': 50,

5057

}, {

5058

'note': 'unlisted single video playlist',

5059

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5060

'info_dict': {

5061

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5062

'uploader': 'colethedj',

5063

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

5064

'title': 'yt-dlp unlisted playlist test',

5065

'availability': 'unlisted',

5066

'tags': [],

5067

'modified_date': '20211208',

5068

'channel': 'colethedj',

5069

'view_count': int,

5070

'description': '',

5071

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

5072

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

5073

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

5078

'url': 'https://www.youtube.com/feed/recommended',

5079

'info_dict': {

5080

'id': 'recommended',

5081

'title': 'recommended',

5082

'tags': [],

5083

},

5084

'playlist_mincount': 50,

5085

'params': {

5086

'skip_download': True,

5087

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5088

},

5089

}, {

5090

'note': 'API Fallback: /videos tab, sorted by oldest first',

5091

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

5092

'info_dict': {

5093

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5094

'title': 'Cody\'sLab - Videos',

5095

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

5096

'uploader': 'Cody\'sLab',

5097

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5098

'channel': 'Cody\'sLab',

5099

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

5100

'tags': [],

5101

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5102

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

5103

'channel_follower_count': int

5104

},

5105

'playlist_mincount': 650,

5106

'params': {

5107

'skip_download': True,

5108

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5109

},

5110

}, {

5111

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

5112

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

5113

'info_dict': {

5114

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

5115

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5116

'title': 'Uploads from Royalty Free Music - Topic',

5117

'uploader': 'Royalty Free Music - Topic',

5118

'modified_date': r're:\d{8}',

5119

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

5120

'description': '',

5121

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5122

'tags': [],

5123

'channel': 'Royalty Free Music - Topic',

5124

'view_count': int,

5125

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

5126

},

5127

'expected_warnings': [

5128

'does not have a videos tab',

5129

r'[Uu]navailable videos (are|will be) hidden',

5130

],

5131

'playlist_mincount': 101,

5132

'params': {

5133

'skip_download': True,

5134

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

5135

},

5136

}, {

5137

'note': 'non-standard redirect to regional channel',

5138

'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',

5139

'only_matching': True

5140

}, {

5141

'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',

5142

'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5143

'info_dict': {

5144

'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',

5145

'modified_date': '20220407',

5146

'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5147

'tags': [],

5148

'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5149

'uploader': 'pukkandan',

5150

'availability': 'unlisted',

5151

'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',

5152

'channel': 'pukkandan',

5153

'description': 'Test for collaborative playlist',

5154

'title': 'yt-dlp test - collaborative playlist',

5155

'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',

5156

},

5157

'playlist_mincount': 2

}]

@classmethod

def suitable(cls, url):

5162

return False if YoutubeIE.suitable(url) else super(

5163

YoutubeTabIE, cls).suitable(url)

5164

5165

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')

5166

5167

@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data

5168

def _real_extract(self, url, smuggled_data):

5169

item_id = self._match_id(url)

5170

url = compat_urlparse.urlunparse(

5171

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

5172

compat_opts = self.get_param('compat_opts', [])

5173

5174

def get_mobj(url):

5175

mobj = self._URL_RE.match(url).groupdict()

5176

mobj.update((k, '') for k, v in mobj.items() if v is None)

5177

return mobj

5178

5179

mobj, redirect_warning = get_mobj(url), None

5180

# Youtube returns incomplete data if tabname is not lower case

5181

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

5182

if is_channel:

5183

if smuggled_data.get('is_music_url'):

5184

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

5185

item_id = item_id[2:]

5186

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

5187

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

5188

mdata = self._extract_tab_endpoint(

5189

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

5190

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

5191

get_all=False, expected_type=compat_str)

5192

if not murl:

5193

raise ExtractorError('Failed to resolve album to playlist')

5194

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

5195

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

5196

pre = f'https://www.youtube.com/channel/{item_id}'

5197

5198

original_tab_name = tab

5199

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

5200

# Home URLs should redirect to /videos/

5201

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

5202

'To download only the videos in the home page, add a "/featured" to the URL')

5203

tab = '/videos'

5204

5205

url = ''.join((pre, tab, post))

5206

mobj = get_mobj(url)

5207

5208

# Handle both video/playlist URLs

5209

qs = parse_qs(url)

5210

video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]

5211

5212

if not video_id and mobj['not_channel'].startswith('watch'):

5213

if not playlist_id:

5214

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

5215

raise ExtractorError('Unable to recognize tab page')

5216

# Common mistake: https://www.youtube.com/watch?list=playlist_id

5217

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

5218

url = f'https://www.youtube.com/playlist?list={playlist_id}'

5219

mobj = get_mobj(url)

5220

5221

if video_id and playlist_id:

5222

if self.get_param('noplaylist'):

5223

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

5224

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5225

ie=YoutubeIE.ie_key(), video_id=video_id)

5226

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

5227

5228

data, ytcfg = self._extract_data(url, item_id)

5229

5230

# YouTube may provide a non-standard redirect to the regional channel

5231

# See: https://github.com/yt-dlp/yt-dlp/issues/2694

5232

redirect_url = traverse_obj(

5233

data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)

5234

if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:

5235

redirect_url = ''.join((

5236

urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))

5237

self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')

5238

return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())

5239

5240

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5241

if tabs:

5242

selected_tab = self._extract_selected_tab(tabs)

5243

selected_tab_name = selected_tab.get('title', '').lower()

5244

if selected_tab_name == 'home':

5245

selected_tab_name = 'featured'

5246

requested_tab_name = mobj['tab'][1:]

5247

if 'no-youtube-channel-redirect' not in compat_opts:

5248

if requested_tab_name == 'live':

5249

# Live tab should have redirected to the video

5250

raise ExtractorError('The channel is not currently live', expected=True)

5251

if requested_tab_name not in ('', selected_tab_name):

5252

redirect_warning = f'The channel does not have a {requested_tab_name} tab'

5253

if not original_tab_name:

5254

if item_id[:2] == 'UC':

5255

# Topic channels don't have /videos. Use the equivalent playlist instead

5256

pl_id = f'UU{item_id[2:]}'

5257

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

5258

try:

5259

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)

5260

except ExtractorError:

5261

redirect_warning += ' and the playlist redirect gave error'

5262

else:

5263

item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name

5264

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

5265

if selected_tab_name and selected_tab_name != requested_tab_name:

5266

redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'

5267

else:

5268

raise ExtractorError(redirect_warning, expected=True)

5269

5270

if redirect_warning:

5271

self.to_screen(redirect_warning)

5272

self.write_debug(f'Final URL: {url}')

5273

5274

# YouTube sometimes provides a button to reload playlist with unavailable videos.

5275

if 'no-youtube-unavailable-videos' not in compat_opts:

5276

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

5277

self._extract_and_report_alerts(data, only_once=True)

5278

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

5279

if tabs:

5280

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

5281

5282

playlist = traverse_obj(

5283

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

5284

if playlist:

5285

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

5286

5287

video_id = traverse_obj(

5288

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

5289

if video_id:

5290

if mobj['tab'] != '/live': # live tab is expected to redirect to video

5291

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

5292

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

5293

ie=YoutubeIE.ie_key(), video_id=video_id)

5294

5295

raise ExtractorError('Unable to recognize tab page')

5296

5297

5298

class YoutubePlaylistIE(InfoExtractor):

5299

IE_DESC = 'YouTube playlists'

5300

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

5311

)''' % {

5312

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

5313

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

5314

}

5315

IE_NAME = 'youtube:playlist'

5316

_TESTS = [{

5317

'note': 'issue #673',

5318

'url': 'PLBB231211A4F62143',

5319

'info_dict': {

5320

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5321

'id': 'PLBB231211A4F62143',

5322

'uploader': 'Wickman',

5323

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5324

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5325

'view_count': int,

5326

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5327

'modified_date': r're:\d{8}',

5328

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5329

'channel': 'Wickman',

5330

'tags': [],

5331

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5332

},

5333

'playlist_mincount': 29,

5334

}, {

5335

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5336

'info_dict': {

5337

'title': 'YDL_safe_search',

5338

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5339

},

5340

'playlist_count': 2,

5341

'skip': 'This playlist is private',

5342

}, {

5343

'note': 'embedded',

5344

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5349

'uploader': 'milan',

5350

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5351

'description': '',

5352

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5353

'tags': [],

5354

'modified_date': '20140919',

5355

'view_count': int,

5356

'channel': 'milan',

5357

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5358

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5359

},

5360

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5361

}, {

5362

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5363

'playlist_mincount': 654,

5364

'info_dict': {

5365

'title': '2018 Chinese New Singles (11/6 updated)',

5366

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5367

'uploader': 'LBK',

5368

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5369

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5370

'channel': 'LBK',

5371

'view_count': int,

5372

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5373

'tags': [],

5374

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5375

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5376

'modified_date': r're:\d{8}',

5377

},

5378

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5379

}, {

5380

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5381

'only_matching': True,

5382

}, {

5383

# music album playlist

5384

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5385

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5390

if YoutubeTabIE.suitable(url):

5391

return False

5392

from ..utils import parse_qs

5393

qs = parse_qs(url)

5394

if qs.get('v', [None])[0]:

5395

return False

5396

return super(YoutubePlaylistIE, cls).suitable(url)

5397

5398

def _real_extract(self, url):

5399

playlist_id = self._match_id(url)

5400

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5401

url = update_url_query(

5402

'https://www.youtube.com/playlist',

5403

parse_qs(url) or {'list': playlist_id})

5404

if is_music_url:

5405

url = smuggle_url(url, {'is_music_url': True})

5406

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5407

5408

5409

class YoutubeYtBeIE(InfoExtractor):

5410

IE_DESC = 'youtu.be'

5411

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5412

_TESTS = [{

5413

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5418

'uploader': 'Backus-Page House Museum',

5419

'uploader_id': 'backuspagemuseum',

5420

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5421

'upload_date': '20161008',

5422

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5423

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5428

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5429

'channel': 'Backus-Page House Museum',

5430

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5431

'live_status': 'not_live',

5432

'view_count': int,

5433

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5434

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5440

},

5441

}, {

5442

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5443

'only_matching': True,

5444

}]

5445

5446

def _real_extract(self, url):

5447

mobj = self._match_valid_url(url)

5448

video_id = mobj.group('id')

5449

playlist_id = mobj.group('playlist_id')

5450

return self.url_result(

5451

update_url_query('https://www.youtube.com/watch', {

5452

'v': video_id,

5453

'list': playlist_id,

5454

'feature': 'youtu.be',

5455

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5456

5457

5458

class YoutubeLivestreamEmbedIE(InfoExtractor):

5459

IE_DESC = 'YouTube livestream embeds'

5460

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'

5461

_TESTS = [{

5462

'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',

5463

'only_matching': True,

5464

}]

5465

5466

def _real_extract(self, url):

5467

channel_id = self._match_id(url)

5468

return self.url_result(

5469

f'https://www.youtube.com/channel/{channel_id}/live',

5470

ie=YoutubeTabIE.ie_key(), video_id=channel_id)

5471

5472

5473

class YoutubeYtUserIE(InfoExtractor):

5474

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5475

IE_NAME = 'youtube:user'

5476

_VALID_URL = r'ytuser:(?P<id>.+)'

5477

_TESTS = [{

5478

'url': 'ytuser:phihag',

5479

'only_matching': True,

5480

}]

5481

5482

def _real_extract(self, url):

5483

user_id = self._match_id(url)

5484

return self.url_result(

5485

'https://www.youtube.com/user/%s/videos' % user_id,

5486

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5487

5488

5489

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5490

IE_NAME = 'youtube:favorites'

5491

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5492

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5493

_LOGIN_REQUIRED = True

5494

_TESTS = [{

5495

'url': ':ytfav',

5496

'only_matching': True,

5497

}, {

5498

'url': ':ytfavorites',

5499

'only_matching': True,

5500

}]

5501

5502

def _real_extract(self, url):

5503

return self.url_result(

5504

'https://www.youtube.com/playlist?list=LL',

5505

ie=YoutubeTabIE.ie_key())

5506

5507

5508

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5509

IE_DESC = 'YouTube search'

5510

IE_NAME = 'youtube:search'

5511

_SEARCH_KEY = 'ytsearch'

5512

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

5513

_TESTS = [{

5514

'url': 'ytsearch5:youtube-dl test video',

5515

'playlist_count': 5,

5516

'info_dict': {

5517

'id': 'youtube-dl test video',

5518

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5524

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5525

_SEARCH_KEY = 'ytsearchdate'

5526

IE_DESC = 'YouTube search, newest videos first'

5527

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5528

_TESTS = [{

5529

'url': 'ytsearchdate5:youtube-dl test video',

5530

'playlist_count': 5,

5531

'info_dict': {

5532

'id': 'youtube-dl test video',

5533

'title': 'youtube-dl test video',

}

}]

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5539

IE_DESC = 'YouTube search URLs with sorting and filter support'

5540

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5541

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5542

_TESTS = [{

5543

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5544

'playlist_mincount': 5,

5545

'info_dict': {

5546

'id': 'youtube-dl test video',

5547

'title': 'youtube-dl test video',

5548

}

5549

}, {

5550

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5551

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?search_query=%23cats',

5558

'playlist_mincount': 1,

'info_dict': {

'id': '#cats',

'title': '#cats',

'entries': [{

'url': r're:https://(www\.)?youtube\.com/hashtag/cats',

'title': '#cats',

}],

},

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5569

'only_matching': True,

5570

}]

5571

5572

def _real_extract(self, url):

5573

qs = parse_qs(url)

5574

query = (qs.get('search_query') or qs.get('q'))[0]

5575

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5576

5577

5578

class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):

5579

IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'

5580

IE_NAME = 'youtube:music:search_url'

5581

_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'

5582

_TESTS = [{

5583

'url': 'https://music.youtube.com/search?q=royalty+free+music',

5584

'playlist_count': 16,

5585

'info_dict': {

5586

'id': 'royalty free music',

5587

'title': 'royalty free music',

5588

}

5589

}, {

5590

'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',

5591

'playlist_mincount': 30,

5592

'info_dict': {

5593

'id': 'royalty free music - songs',

5594

'title': 'royalty free music - songs',

5595

},

5596

'params': {'extract_flat': 'in_playlist'}

5597

}, {

5598

'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',

5599

'playlist_mincount': 30,

5600

'info_dict': {

5601

'id': 'royalty free music - community playlists',

5602

'title': 'royalty free music - community playlists',

5603

},

5604

'params': {'extract_flat': 'in_playlist'}

}]

_SECTIONS = {

'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',

5609

'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',

5610

'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',

5611

'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',

5612

'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',

5613

'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',

5614

}

5615

5616

def _real_extract(self, url):

5617

qs = parse_qs(url)

5618

query = (qs.get('search_query') or qs.get('q'))[0]

5619

params = qs.get('sp', (None,))[0]

5620

if params:

5621

section = next((k for k, v in self._SECTIONS.items() if v == params), params)

5622

else:

5623

section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()

5624

params = self._SECTIONS.get(section)

5625

if not params:

5626

section = None

5627

title = join_nonempty(query, section, delim=' - ')

5628

return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)

5629

5630

5631

class YoutubeFeedsInfoExtractor(InfoExtractor):

5632

"""

5633

Base class for feed extractors

5634

Subclasses must define the _FEED_NAME property.

5635

"""

5636

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5642

5643

def _real_extract(self, url):

5644

return self.url_result(

5645

f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())

5646

5647

5648

class YoutubeWatchLaterIE(InfoExtractor):

5649

IE_NAME = 'youtube:watchlater'

5650

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5651

_VALID_URL = r':ytwatchlater'

5652

_TESTS = [{

5653

'url': ':ytwatchlater',

5654

'only_matching': True,

5655

}]

5656

5657

def _real_extract(self, url):

5658

return self.url_result(

5659

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5660

5661

5662

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5663

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5664

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5665

_FEED_NAME = 'recommended'

5666

_LOGIN_REQUIRED = False

5667

_TESTS = [{

5668

'url': ':ytrec',

5669

'only_matching': True,

5670

}, {

5671

'url': ':ytrecommended',

5672

'only_matching': True,

5673

}, {

5674

'url': 'https://youtube.com',

5675

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5680

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5681

_VALID_URL = r':ytsub(?:scription)?s?'

5682

_FEED_NAME = 'subscriptions'

5683

_TESTS = [{

5684

'url': ':ytsubs',

5685

'only_matching': True,

5686

}, {

5687

'url': ':ytsubscriptions',

5688

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5693

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5694

_VALID_URL = r':ythis(?:tory)?'

5695

_FEED_NAME = 'history'

5696

_TESTS = [{

5697

'url': ':ythistory',

5698

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

5703

IE_NAME = 'youtube:truncated_url'

5704

IE_DESC = False # Do not list

5705

_VALID_URL = r'''(?x)

5706

(?:https?://)?

5707

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5708

(?:watch\?(?:

5709

feature=[a-z_]+|

5710

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5723

'only_matching': True,

5724

}, {

5725

'url': 'https://www.youtube.com/watch?',

5726

'only_matching': True,

5727

}, {

5728

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5729

'only_matching': True,

5730

}, {

5731

'url': 'https://www.youtube.com/watch?feature=foo',

5732

'only_matching': True,

5733

}, {

5734

'url': 'https://www.youtube.com/watch?hl=en-GB',

5735

'only_matching': True,

5736

}, {

5737

'url': 'https://www.youtube.com/watch?t=2372',

5738

'only_matching': True,

5739

}]

5740

5741

def _real_extract(self, url):

5742

raise ExtractorError(

5743

'Did you forget to quote the URL? Remember that & is a meta '

5744

'character in most shells, so you want to put the URL in quotes, '

5745

'like youtube-dl '

5746

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5747

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5752

IE_NAME = 'youtube:clip'

5753

IE_DESC = False # Do not list

5754

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5755

5756

def _real_extract(self, url):

5757

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5758

return self.url_result(url, 'Generic')

5759

5760

5761

class YoutubeTruncatedIDIE(InfoExtractor):

5762

IE_NAME = 'youtube:truncated_id'

5763

IE_DESC = False # Do not list

5764

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5765

5766

_TESTS = [{

5767

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5768

'only_matching': True,

5769

}]

5770

5771

def _real_extract(self, url):

5772

video_id = self._match_id(url)

5773

raise ExtractorError(

5774

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

5775

expected=True)