jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import calendar
	6	import copy
	7	import datetime
	8	import functools
	9	import hashlib
	10	import itertools
	11	import json
	12	import math
	13	import os.path
	14	import random
	15	import re
	16	import sys
	17	import time
	18	import traceback
	19	import threading
	20
	21	from .common import InfoExtractor, SearchInfoExtractor
	22	from ..compat import (
	23	compat_chr,
	24	compat_HTTPError,
	25	compat_parse_qs,
	26	compat_str,
	27	compat_urllib_parse_unquote_plus,
	28	compat_urllib_parse_urlencode,
	29	compat_urllib_parse_urlparse,
	30	compat_urlparse,
	31	)
	32	from ..jsinterp import JSInterpreter
	33	from ..utils import (
	34	bug_reports_message,
	35	clean_html,
	36	datetime_from_str,
	37	dict_get,
	38	error_to_compat_str,
	39	ExtractorError,
	40	float_or_none,
	41	format_field,
	42	int_or_none,
	43	is_html,
	44	join_nonempty,
	45	mimetype2ext,
	46	network_exceptions,
	47	NO_DEFAULT,
	48	orderedSet,
	49	parse_codecs,
	50	parse_count,
	51	parse_duration,
	52	parse_iso8601,
	53	parse_qs,
	54	qualities,
	55	remove_end,
	56	remove_start,
	57	smuggle_url,
	58	str_or_none,
	59	str_to_int,
	60	strftime_or_none,
	61	traverse_obj,
	62	try_get,
	63	unescapeHTML,
	64	unified_strdate,
	65	unified_timestamp,
	66	unsmuggle_url,
	67	update_url_query,
	68	url_or_none,
	69	urljoin,
	70	variadic,
	71	)
	72
	73
	74	def get_first(obj, keys, **kwargs):
	75	return traverse_obj(obj, (..., variadic(keys)), *kwargs, get_all=False)
	76
	77
	78	# any clients starting with _ cannot be explicity requested by the user
	79	INNERTUBE_CLIENTS = {
	80	'web': {
	81	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	82	'INNERTUBE_CONTEXT': {
	83	'client': {
	84	'clientName': 'WEB',
	85	'clientVersion': '2.20211221.00.00',
	86	}
	87	},
	88	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	89	},
	90	'web_embedded': {
	91	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	92	'INNERTUBE_CONTEXT': {
	93	'client': {
	94	'clientName': 'WEB_EMBEDDED_PLAYER',
	95	'clientVersion': '1.20211215.00.01',
	96	},
	97	},
	98	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	99	},
	100	'web_music': {
	101	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	102	'INNERTUBE_HOST': 'music.youtube.com',
	103	'INNERTUBE_CONTEXT': {
	104	'client': {
	105	'clientName': 'WEB_REMIX',
	106	'clientVersion': '1.20211213.00.00',
	107	}
	108	},
	109	'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
	110	},
	111	'web_creator': {
	112	'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
	113	'INNERTUBE_CONTEXT': {
	114	'client': {
	115	'clientName': 'WEB_CREATOR',
	116	'clientVersion': '1.20211220.02.00',
	117	}
	118	},
	119	'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
	120	},
	121	'android': {
	122	'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
	123	'INNERTUBE_CONTEXT': {
	124	'client': {
	125	'clientName': 'ANDROID',
	126	'clientVersion': '16.49',
	127	}
	128	},
	129	'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
	130	'REQUIRE_JS_PLAYER': False
	131	},
	132	'android_embedded': {
	133	'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
	134	'INNERTUBE_CONTEXT': {
	135	'client': {
	136	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	137	'clientVersion': '16.49',
	138	},
	139	},
	140	'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
	141	'REQUIRE_JS_PLAYER': False
	142	},
	143	'android_music': {
	144	'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
	145	'INNERTUBE_CONTEXT': {
	146	'client': {
	147	'clientName': 'ANDROID_MUSIC',
	148	'clientVersion': '4.57',
	149	}
	150	},
	151	'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
	152	'REQUIRE_JS_PLAYER': False
	153	},
	154	'android_creator': {
	155	'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
	156	'INNERTUBE_CONTEXT': {
	157	'client': {
	158	'clientName': 'ANDROID_CREATOR',
	159	'clientVersion': '21.47',
	160	},
	161	},
	162	'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
	163	'REQUIRE_JS_PLAYER': False
	164	},
	165	# iOS clients have HLS live streams. Setting device model to get 60fps formats.
	166	# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
	167	'ios': {
	168	'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
	169	'INNERTUBE_CONTEXT': {
	170	'client': {
	171	'clientName': 'IOS',
	172	'clientVersion': '16.46',
	173	'deviceModel': 'iPhone14,3',
	174	}
	175	},
	176	'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
	177	'REQUIRE_JS_PLAYER': False
	178	},
	179	'ios_embedded': {
	180	'INNERTUBE_CONTEXT': {
	181	'client': {
	182	'clientName': 'IOS_MESSAGES_EXTENSION',
	183	'clientVersion': '16.46',
	184	'deviceModel': 'iPhone14,3',
	185	},
	186	},
	187	'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
	188	'REQUIRE_JS_PLAYER': False
	189	},
	190	'ios_music': {
	191	'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
	192	'INNERTUBE_CONTEXT': {
	193	'client': {
	194	'clientName': 'IOS_MUSIC',
	195	'clientVersion': '4.57',
	196	},
	197	},
	198	'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
	199	'REQUIRE_JS_PLAYER': False
	200	},
	201	'ios_creator': {
	202	'INNERTUBE_CONTEXT': {
	203	'client': {
	204	'clientName': 'IOS_CREATOR',
	205	'clientVersion': '21.47',
	206	},
	207	},
	208	'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
	209	'REQUIRE_JS_PLAYER': False
	210	},
	211	# mweb has 'ultralow' formats
	212	# See: https://github.com/yt-dlp/yt-dlp/pull/557
	213	'mweb': {
	214	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	215	'INNERTUBE_CONTEXT': {
	216	'client': {
	217	'clientName': 'MWEB',
	218	'clientVersion': '2.20211221.01.00',
	219	}
	220	},
	221	'INNERTUBE_CONTEXT_CLIENT_NAME': 2
	222	}
	223	}
	224
	225
	226	def build_innertube_clients():
	227	third_party = {
	228	'embedUrl': 'https://google.com', # Can be any valid URL
	229	}
	230	base_clients = ('android', 'web', 'ios', 'mweb')
	231	priority = qualities(base_clients[::-1])
	232
	233	for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
	234	ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
	235	ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
	236	ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
	237	ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
	238	ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
	239
	240	if client in base_clients:
	241	INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
	242	agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
	243	agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
	244	agegate_ytcfg['priority'] -= 1
	245	elif client.endswith('_embedded'):
	246	ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
	247	ytcfg['priority'] -= 2
	248	else:
	249	ytcfg['priority'] -= 3
	250
	251
	252	build_innertube_clients()
	253
	254
	255	class YoutubeBaseInfoExtractor(InfoExtractor):
	256	"""Provide base functions for Youtube extractors"""
	257
	258	_RESERVED_NAMES = (
	259	r'channel\|c\|user\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|clip\|'
	260	r'shorts\|movies\|results\|shared\|hashtag\|trending\|feed\|feeds\|'
	261	r'browse\|oembed\|get_video_info\|iframe_api\|s/player\|'
	262	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	263
	264	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	265
	266	_NETRC_MACHINE = 'youtube'
	267
	268	# If True it will raise an error if no login info is provided
	269	_LOGIN_REQUIRED = False
	270
	271	_INVIDIOUS_SITES = (
	272	# invidious-redirect websites
	273	r'(?:www\.)?redirect\.invidious\.io',
	274	r'(?:(?:www\|dev)\.)?invidio\.us',
	275	# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
	276	r'(?:www\.)?invidious\.pussthecat\.org',
	277	r'(?:www\.)?invidious\.zee\.li',
	278	r'(?:www\.)?invidious\.ethibox\.fr',
	279	r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
	280	# youtube-dl invidious instances list
	281	r'(?:(?:www\|no)\.)?invidiou\.sh',
	282	r'(?:(?:www\|fi)\.)?invidious\.snopyta\.org',
	283	r'(?:www\.)?invidious\.kabi\.tk',
	284	r'(?:www\.)?invidious\.mastodon\.host',
	285	r'(?:www\.)?invidious\.zapashcanon\.fr',
	286	r'(?:www\.)?(?:invidious(?:-us)?\|piped)\.kavin\.rocks',
	287	r'(?:www\.)?invidious\.tinfoil-hat\.net',
	288	r'(?:www\.)?invidious\.himiko\.cloud',
	289	r'(?:www\.)?invidious\.reallyancient\.tech',
	290	r'(?:www\.)?invidious\.tube',
	291	r'(?:www\.)?invidiou\.site',
	292	r'(?:www\.)?invidious\.site',
	293	r'(?:www\.)?invidious\.xyz',
	294	r'(?:www\.)?invidious\.nixnet\.xyz',
	295	r'(?:www\.)?invidious\.048596\.xyz',
	296	r'(?:www\.)?invidious\.drycat\.fr',
	297	r'(?:www\.)?inv\.skyn3t\.in',
	298	r'(?:www\.)?tube\.poal\.co',
	299	r'(?:www\.)?tube\.connect\.cafe',
	300	r'(?:www\.)?vid\.wxzm\.sx',
	301	r'(?:www\.)?vid\.mint\.lgbt',
	302	r'(?:www\.)?vid\.puffyan\.us',
	303	r'(?:www\.)?yewtu\.be',
	304	r'(?:www\.)?yt\.elukerio\.org',
	305	r'(?:www\.)?yt\.lelux\.fi',
	306	r'(?:www\.)?invidious\.ggc-project\.de',
	307	r'(?:www\.)?yt\.maisputain\.ovh',
	308	r'(?:www\.)?ytprivate\.com',
	309	r'(?:www\.)?invidious\.13ad\.de',
	310	r'(?:www\.)?invidious\.toot\.koeln',
	311	r'(?:www\.)?invidious\.fdn\.fr',
	312	r'(?:www\.)?watch\.nettohikari\.com',
	313	r'(?:www\.)?invidious\.namazso\.eu',
	314	r'(?:www\.)?invidious\.silkky\.cloud',
	315	r'(?:www\.)?invidious\.exonip\.de',
	316	r'(?:www\.)?invidious\.riverside\.rocks',
	317	r'(?:www\.)?invidious\.blamefran\.net',
	318	r'(?:www\.)?invidious\.moomoo\.de',
	319	r'(?:www\.)?ytb\.trom\.tf',
	320	r'(?:www\.)?yt\.cyberhost\.uk',
	321	r'(?:www\.)?kgg2m7yk5aybusll\.onion',
	322	r'(?:www\.)?qklhadlycap4cnod\.onion',
	323	r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
	324	r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
	325	r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
	326	r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
	327	r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
	328	r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
	329	r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
	330	r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
	331	r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
	332	r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
	333	)
	334
	335	def _login(self):
	336	"""
	337	Attempt to log in to YouTube.
	338	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	339	"""
	340
	341	if (self._LOGIN_REQUIRED
	342	and self.get_param('cookiefile') is None
	343	and self.get_param('cookiesfrombrowser') is None):
	344	self.raise_login_required(
	345	'Login details are needed to download this content', method='cookies')
	346	username, password = self._get_login_info()
	347	if username:
	348	self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
	349
	350	def _initialize_consent(self):
	351	cookies = self._get_cookies('https://www.youtube.com/')
	352	if cookies.get('__Secure-3PSID'):
	353	return
	354	consent_id = None
	355	consent = cookies.get('CONSENT')
	356	if consent:
	357	if 'YES' in consent.value:
	358	return
	359	consent_id = self._search_regex(
	360	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	361	if not consent_id:
	362	consent_id = random.randint(100, 999)
	363	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	364
	365	def _initialize_pref(self):
	366	cookies = self._get_cookies('https://www.youtube.com/')
	367	pref_cookie = cookies.get('PREF')
	368	pref = {}
	369	if pref_cookie:
	370	try:
	371	pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
	372	except ValueError:
	373	self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
	374	pref.update({'hl': 'en'})
	375	self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
	376
	377	def _real_initialize(self):
	378	self._initialize_pref()
	379	self._initialize_consent()
	380	self._login()
	381
	382	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	383	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	384	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	385
	386	def _get_default_ytcfg(self, client='web'):
	387	return copy.deepcopy(INNERTUBE_CLIENTS[client])
	388
	389	def _get_innertube_host(self, client='web'):
	390	return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
	391
	392	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
	393	# try_get but with fallback to default ytcfg client values when present
	394	_func = lambda y: try_get(y, getter, expected_type)
	395	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	396
	397	def _extract_client_name(self, ytcfg, default_client='web'):
	398	return self._ytcfg_get_safe(
	399	ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
	400	lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
	401
	402	def _extract_client_version(self, ytcfg, default_client='web'):
	403	return self._ytcfg_get_safe(
	404	ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
	405	lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
	406
	407	def _extract_api_key(self, ytcfg=None, default_client='web'):
	408	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	409
	410	def _extract_context(self, ytcfg=None, default_client='web'):
	411	context = get_first(
	412	(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
	413	# Enforce language for extraction
	414	traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
	415	return context
	416
	417	_SAPISID = None
	418
	419	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	420	time_now = round(time.time())
	421	if self._SAPISID is None:
	422	yt_cookies = self._get_cookies('https://www.youtube.com')
	423	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	424	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	425	sapisid_cookie = dict_get(
	426	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	427	if sapisid_cookie and sapisid_cookie.value:
	428	self._SAPISID = sapisid_cookie.value
	429	self.write_debug('Extracted SAPISID cookie')
	430	# SAPISID cookie is required if not already present
	431	if not yt_cookies.get('SAPISID'):
	432	self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
	433	self._set_cookie(
	434	'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
	435	else:
	436	self._SAPISID = False
	437	if not self._SAPISID:
	438	return None
	439	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	440	sapisidhash = hashlib.sha1(
	441	f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
	442	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	443
	444	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	445	note='Downloading API JSON', errnote='Unable to download API page',
	446	context=None, api_key=None, api_hostname=None, default_client='web'):
	447
	448	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	449	data.update(query)
	450	real_headers = self.generate_api_headers(default_client=default_client)
	451	real_headers.update({'content-type': 'application/json'})
	452	if headers:
	453	real_headers.update(headers)
	454	return self._download_json(
	455	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	456	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	457	data=json.dumps(data).encode('utf8'), headers=real_headers,
	458	query={'key': api_key or self._extract_api_key()})
	459
	460	def extract_yt_initial_data(self, item_id, webpage, fatal=True):
	461	data = self._search_regex(
	462	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	463	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
	464	if data:
	465	return self._parse_json(data, item_id, fatal=fatal)
	466
	467	@staticmethod
	468	def _extract_session_index(*data):
	469	"""
	470	Index of current account in account list.
	471	See: https://github.com/yt-dlp/yt-dlp/pull/519
	472	"""
	473	for ytcfg in data:
	474	session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
	475	if session_index is not None:
	476	return session_index
	477
	478	# Deprecated?
	479	def _extract_identity_token(self, ytcfg=None, webpage=None):
	480	if ytcfg:
	481	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	482	if token:
	483	return token
	484	if webpage:
	485	return self._search_regex(
	486	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	487	'identity token', default=None, fatal=False)
	488
	489	@staticmethod
	490	def _extract_account_syncid(*args):
	491	"""
	492	Extract syncId required to download private playlists of secondary channels
	493	@params response and/or ytcfg
	494	"""
	495	for data in args:
	496	# ytcfg includes channel_syncid if on secondary channel
	497	delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
	498	if delegated_sid:
	499	return delegated_sid
	500	sync_ids = (try_get(

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import calendar

import copy

import datetime

import functools

import hashlib

import itertools

import json

import math

import os.path

import random

import re

import sys

import time

import traceback

import threading

from .common import InfoExtractor, SearchInfoExtractor

22

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

28

compat_urllib_parse_urlencode,

29

compat_urllib_parse_urlparse,

30

compat_urlparse,

31

)

32

from ..jsinterp import JSInterpreter

33

from ..utils import (

bug_reports_message,

clean_html,

datetime_from_str,

dict_get,

error_to_compat_str,

ExtractorError,

float_or_none,

format_field,

int_or_none,

is_html,

join_nonempty,

mimetype2ext,

network_exceptions,

NO_DEFAULT,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

parse_iso8601,

parse_qs,

qualities,

remove_end,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

strftime_or_none,

traverse_obj,

try_get,

unescapeHTML,

unified_strdate,

unified_timestamp,

unsmuggle_url,

update_url_query,

url_or_none,

urljoin,

variadic,

)

def get_first(obj, keys, **kwargs):

75

return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)

76

77

78

# any clients starting with _ cannot be explicity requested by the user

79

INNERTUBE_CLIENTS = {

80

'web': {

81

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

82

'INNERTUBE_CONTEXT': {

83

'client': {

84

'clientName': 'WEB',

85

'clientVersion': '2.20211221.00.00',

86

}

87

},

88

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

89

},

90

'web_embedded': {

91

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

92

'INNERTUBE_CONTEXT': {

93

'client': {

94

'clientName': 'WEB_EMBEDDED_PLAYER',

95

'clientVersion': '1.20211215.00.01',

96

},

97

},

98

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

99

},

100

'web_music': {

101

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

102

'INNERTUBE_HOST': 'music.youtube.com',

103

'INNERTUBE_CONTEXT': {

104

'client': {

105

'clientName': 'WEB_REMIX',

106

'clientVersion': '1.20211213.00.00',

107

}

108

},

109

'INNERTUBE_CONTEXT_CLIENT_NAME': 67,

110

},

111

'web_creator': {

112

'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',

113

'INNERTUBE_CONTEXT': {

114

'client': {

115

'clientName': 'WEB_CREATOR',

116

'clientVersion': '1.20211220.02.00',

117

}

118

},

119

'INNERTUBE_CONTEXT_CLIENT_NAME': 62,

120

},

121

'android': {

122

'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',

123

'INNERTUBE_CONTEXT': {

124

'client': {

125

'clientName': 'ANDROID',

126

'clientVersion': '16.49',

127

}

128

},

129

'INNERTUBE_CONTEXT_CLIENT_NAME': 3,

130

'REQUIRE_JS_PLAYER': False

131

},

132

'android_embedded': {

133

'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',

134

'INNERTUBE_CONTEXT': {

135

'client': {

136

'clientName': 'ANDROID_EMBEDDED_PLAYER',

137

'clientVersion': '16.49',

138

},

139

},

140

'INNERTUBE_CONTEXT_CLIENT_NAME': 55,

141

'REQUIRE_JS_PLAYER': False

142

},

143

'android_music': {

144

'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',

145

'INNERTUBE_CONTEXT': {

146

'client': {

147

'clientName': 'ANDROID_MUSIC',

148

'clientVersion': '4.57',

149

}

150

},

151

'INNERTUBE_CONTEXT_CLIENT_NAME': 21,

152

'REQUIRE_JS_PLAYER': False

153

},

154

'android_creator': {

155

'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',

156

'INNERTUBE_CONTEXT': {

157

'client': {

158

'clientName': 'ANDROID_CREATOR',

159

'clientVersion': '21.47',

160

},

161

},

162

'INNERTUBE_CONTEXT_CLIENT_NAME': 14,

163

'REQUIRE_JS_PLAYER': False

164

},

165

# iOS clients have HLS live streams. Setting device model to get 60fps formats.

166

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558

167

'ios': {

168

'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',

169

'INNERTUBE_CONTEXT': {

170

'client': {

171

'clientName': 'IOS',

172

'clientVersion': '16.46',

173

'deviceModel': 'iPhone14,3',

174

}

175

},

176

'INNERTUBE_CONTEXT_CLIENT_NAME': 5,

177

'REQUIRE_JS_PLAYER': False

178

},

179

'ios_embedded': {

180

'INNERTUBE_CONTEXT': {

181

'client': {

182

'clientName': 'IOS_MESSAGES_EXTENSION',

183

'clientVersion': '16.46',

184

'deviceModel': 'iPhone14,3',

185

},

186

},

187

'INNERTUBE_CONTEXT_CLIENT_NAME': 66,

188

'REQUIRE_JS_PLAYER': False

189

},

190

'ios_music': {

191

'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',

192

'INNERTUBE_CONTEXT': {

193

'client': {

194

'clientName': 'IOS_MUSIC',

195

'clientVersion': '4.57',

196

},

197

},

198

'INNERTUBE_CONTEXT_CLIENT_NAME': 26,

199

'REQUIRE_JS_PLAYER': False

200

},

201

'ios_creator': {

202

'INNERTUBE_CONTEXT': {

203

'client': {

204

'clientName': 'IOS_CREATOR',

205

'clientVersion': '21.47',

206

},

207

},

208

'INNERTUBE_CONTEXT_CLIENT_NAME': 15,

209

'REQUIRE_JS_PLAYER': False

210

},

211

# mweb has 'ultralow' formats

212

# See: https://github.com/yt-dlp/yt-dlp/pull/557

213

'mweb': {

214

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

215

'INNERTUBE_CONTEXT': {

216

'client': {

217

'clientName': 'MWEB',

218

'clientVersion': '2.20211221.01.00',

219

}

220

},

221

'INNERTUBE_CONTEXT_CLIENT_NAME': 2

}

}

def build_innertube_clients():

227

third_party = {

228

'embedUrl': 'https://google.com', # Can be any valid URL

229

}

230

base_clients = ('android', 'web', 'ios', 'mweb')

231

priority = qualities(base_clients[::-1])

232

233

for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):

234

ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')

235

ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')

236

ytcfg.setdefault('REQUIRE_JS_PLAYER', True)

237

ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')

238

ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])

239

240

if client in base_clients:

241

INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)

242

agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'

243

agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party

244

agegate_ytcfg['priority'] -= 1

245

elif client.endswith('_embedded'):

246

ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party

247

ytcfg['priority'] -= 2

248

else:

249

ytcfg['priority'] -= 3

250

251

252

build_innertube_clients()

253

254

255

class YoutubeBaseInfoExtractor(InfoExtractor):

256

"""Provide base functions for Youtube extractors"""

257

258

_RESERVED_NAMES = (

259

r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

265

266

_NETRC_MACHINE = 'youtube'

267

268

# If True it will raise an error if no login info is provided

269

_LOGIN_REQUIRED = False

270

271

_INVIDIOUS_SITES = (

272

# invidious-redirect websites

273

r'(?:www\.)?redirect\.invidious\.io',

274

r'(?:(?:www|dev)\.)?invidio\.us',

275

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

276

r'(?:www\.)?invidious\.pussthecat\.org',

277

r'(?:www\.)?invidious\.zee\.li',

278

r'(?:www\.)?invidious\.ethibox\.fr',

279

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

280

# youtube-dl invidious instances list

281

r'(?:(?:www|no)\.)?invidiou\.sh',

282

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

283

r'(?:www\.)?invidious\.kabi\.tk',

284

r'(?:www\.)?invidious\.mastodon\.host',

285

r'(?:www\.)?invidious\.zapashcanon\.fr',

286

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

287

r'(?:www\.)?invidious\.tinfoil-hat\.net',

288

r'(?:www\.)?invidious\.himiko\.cloud',

289

r'(?:www\.)?invidious\.reallyancient\.tech',

290

r'(?:www\.)?invidious\.tube',

291

r'(?:www\.)?invidiou\.site',

292

r'(?:www\.)?invidious\.site',

293

r'(?:www\.)?invidious\.xyz',

294

r'(?:www\.)?invidious\.nixnet\.xyz',

295

r'(?:www\.)?invidious\.048596\.xyz',

296

r'(?:www\.)?invidious\.drycat\.fr',

297

r'(?:www\.)?inv\.skyn3t\.in',

298

r'(?:www\.)?tube\.poal\.co',

299

r'(?:www\.)?tube\.connect\.cafe',

300

r'(?:www\.)?vid\.wxzm\.sx',

301

r'(?:www\.)?vid\.mint\.lgbt',

302

r'(?:www\.)?vid\.puffyan\.us',

303

r'(?:www\.)?yewtu\.be',

304

r'(?:www\.)?yt\.elukerio\.org',

305

r'(?:www\.)?yt\.lelux\.fi',

306

r'(?:www\.)?invidious\.ggc-project\.de',

307

r'(?:www\.)?yt\.maisputain\.ovh',

308

r'(?:www\.)?ytprivate\.com',

309

r'(?:www\.)?invidious\.13ad\.de',

310

r'(?:www\.)?invidious\.toot\.koeln',

311

r'(?:www\.)?invidious\.fdn\.fr',

312

r'(?:www\.)?watch\.nettohikari\.com',

313

r'(?:www\.)?invidious\.namazso\.eu',

314

r'(?:www\.)?invidious\.silkky\.cloud',

315

r'(?:www\.)?invidious\.exonip\.de',

316

r'(?:www\.)?invidious\.riverside\.rocks',

317

r'(?:www\.)?invidious\.blamefran\.net',

318

r'(?:www\.)?invidious\.moomoo\.de',

319

r'(?:www\.)?ytb\.trom\.tf',

320

r'(?:www\.)?yt\.cyberhost\.uk',

321

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

322

r'(?:www\.)?qklhadlycap4cnod\.onion',

323

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

324

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

325

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

326

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

327

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

328

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

329

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

330

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

331

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

332

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

)

def _login(self):

"""

Attempt to log in to YouTube.

338

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

339

"""

340

341

if (self._LOGIN_REQUIRED

342

and self.get_param('cookiefile') is None

343

and self.get_param('cookiesfrombrowser') is None):

344

self.raise_login_required(

345

'Login details are needed to download this content', method='cookies')

346

username, password = self._get_login_info()

347

if username:

348

self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')

349

350

def _initialize_consent(self):

351

cookies = self._get_cookies('https://www.youtube.com/')

352

if cookies.get('__Secure-3PSID'):

353

return

354

consent_id = None

355

consent = cookies.get('CONSENT')

356

if consent:

357

if 'YES' in consent.value:

358

return

359

consent_id = self._search_regex(

360

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

361

if not consent_id:

362

consent_id = random.randint(100, 999)

363

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

364

365

def _initialize_pref(self):

366

cookies = self._get_cookies('https://www.youtube.com/')

367

pref_cookie = cookies.get('PREF')

pref = {}

if pref_cookie:

try:

pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))

372

except ValueError:

373

self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())

374

pref.update({'hl': 'en'})

375

self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))

376

377

def _real_initialize(self):

378

self._initialize_pref()

379

self._initialize_consent()

380

self._login()

381

382

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

383

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

384

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

385

386

def _get_default_ytcfg(self, client='web'):

387

return copy.deepcopy(INNERTUBE_CLIENTS[client])

388

389

def _get_innertube_host(self, client='web'):

390

return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']

391

392

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):

393

# try_get but with fallback to default ytcfg client values when present

394

_func = lambda y: try_get(y, getter, expected_type)

395

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

396

397

def _extract_client_name(self, ytcfg, default_client='web'):

398

return self._ytcfg_get_safe(

399

ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],

400

lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)

401

402

def _extract_client_version(self, ytcfg, default_client='web'):

403

return self._ytcfg_get_safe(

404

ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],

405

lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)

406

407

def _extract_api_key(self, ytcfg=None, default_client='web'):

408

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

409

410

def _extract_context(self, ytcfg=None, default_client='web'):

411

context = get_first(

412

(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)

413

# Enforce language for extraction

414

traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'

return context

_SAPISID = None

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

420

time_now = round(time.time())

421

if self._SAPISID is None:

422

yt_cookies = self._get_cookies('https://www.youtube.com')

423

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

424

# See: https://github.com/yt-dlp/yt-dlp/issues/393

425

sapisid_cookie = dict_get(

426

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

427

if sapisid_cookie and sapisid_cookie.value:

428

self._SAPISID = sapisid_cookie.value

429

self.write_debug('Extracted SAPISID cookie')

430

# SAPISID cookie is required if not already present

431

if not yt_cookies.get('SAPISID'):

432

self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')

433

self._set_cookie(

434

'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)

435

else:

436

self._SAPISID = False

437

if not self._SAPISID:

438

return None

439

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

440

sapisidhash = hashlib.sha1(

441

f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()

442

return f'SAPISIDHASH {time_now}_{sapisidhash}'

443

444

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

445

note='Downloading API JSON', errnote='Unable to download API page',

446

context=None, api_key=None, api_hostname=None, default_client='web'):

447

448

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

449

data.update(query)

450

real_headers = self.generate_api_headers(default_client=default_client)

451

real_headers.update({'content-type': 'application/json'})

452

if headers:

453

real_headers.update(headers)

454

return self._download_json(

455

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

456

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

457

data=json.dumps(data).encode('utf8'), headers=real_headers,

458

query={'key': api_key or self._extract_api_key()})

459

460

def extract_yt_initial_data(self, item_id, webpage, fatal=True):

461

data = self._search_regex(

462

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

463

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)

464

if data:

465

return self._parse_json(data, item_id, fatal=fatal)

466

467

@staticmethod

468

def _extract_session_index(*data):

469

"""

470

Index of current account in account list.

471

See: https://github.com/yt-dlp/yt-dlp/pull/519

472

"""

473

for ytcfg in data:

474

session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))

475

if session_index is not None:

return session_index

# Deprecated?

def _extract_identity_token(self, ytcfg=None, webpage=None):

480

if ytcfg:

481

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

if token:

return token

if webpage:

return self._search_regex(

486

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

487

'identity token', default=None, fatal=False)

488

489

@staticmethod

490

def _extract_account_syncid(*args):

491

"""

492

Extract syncId required to download private playlists of secondary channels

493

@params response and/or ytcfg

494

"""

495

for data in args:

496

# ytcfg includes channel_syncid if on secondary channel

497

delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)

if delegated_sid:

return delegated_sid

sync_ids = (try_get(

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

502

lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')

503

if len(sync_ids) >= 2 and sync_ids[1]:

504

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

505

# and just "user_syncid||" for primary channel. We only want the channel_syncid

return sync_ids[0]

@staticmethod

def _extract_visitor_data(*args):

510

"""

511

Extracts visitorData from an API response or ytcfg

512

Appears to be used to track session state

513

"""

514

return get_first(

515

args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),

expected_type=str)

@property

def is_authenticated(self):

520

return bool(self._generate_sapisidhash_header())

521

522

def extract_ytcfg(self, video_id, webpage):

523

if not webpage:

524

return {}

525

return self._parse_json(

526

self._search_regex(

527

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

528

default='{}'), video_id, fatal=False) or {}

529

530

def generate_api_headers(

531

self, *, ytcfg=None, account_syncid=None, session_index=None,

532

visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):

533

534

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))

535

headers = {

536

'X-YouTube-Client-Name': compat_str(

537

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),

538

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),

539

'Origin': origin,

540

'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),

541

'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),

542

'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)

543

}

544

if session_index is None:

545

session_index = self._extract_session_index(ytcfg)

546

if account_syncid or session_index is not None:

547

headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0

548

549

auth = self._generate_sapisidhash_header(origin)

550

if auth is not None:

551

headers['Authorization'] = auth

552

headers['X-Origin'] = origin

553

return {h: v for h, v in headers.items() if v is not None}

554

555

@staticmethod

556

def _build_api_continuation_query(continuation, ctp=None):

557

query = {

558

'continuation': continuation

559

}

560

# TODO: Inconsistency with clickTrackingParams.

561

# Currently we have a fixed ctp contained within context (from ytcfg)

562

# and a ctp in root query for continuation.

563

if ctp:

564

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

569

next_continuation = try_get(

570

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

571

lambda x: x['continuation']['reloadContinuationData']), dict)

572

if not next_continuation:

573

return

574

continuation = next_continuation.get('continuation')

575

if not continuation:

576

return

577

ctp = next_continuation.get('clickTrackingParams')

578

return cls._build_api_continuation_query(continuation, ctp)

579

580

@classmethod

581

def _extract_continuation_ep_data(cls, continuation_ep: dict):

582

if isinstance(continuation_ep, dict):

583

continuation = try_get(

584

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

585

if not continuation:

586

return

587

ctp = continuation_ep.get('clickTrackingParams')

588

return cls._build_api_continuation_query(continuation, ctp)

589

590

@classmethod

591

def _extract_continuation(cls, renderer):

592

next_continuation = cls._extract_next_continuation_data(renderer)

593

if next_continuation:

594

return next_continuation

595

596

contents = []

597

for key in ('contents', 'items'):

598

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

599

600

for content in contents:

601

if not isinstance(content, dict):

602

continue

603

continuation_ep = try_get(

604

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

605

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

606

dict)

607

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@classmethod

def _extract_alerts(cls, data):

613

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

614

if not isinstance(alert_dict, dict):

615

continue

616

for alert in alert_dict.values():

617

alert_type = alert.get('type')

618

if not alert_type:

619

continue

620

message = cls._get_text(alert, 'text')

621

if message:

622

yield alert_type, message

623

624

def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):

625

errors = []

626

warnings = []

627

for alert_type, alert_message in alerts:

628

if alert_type.lower() == 'error' and fatal:

629

errors.append([alert_type, alert_message])

630

else:

631

warnings.append([alert_type, alert_message])

632

633

for alert_type, alert_message in (warnings + errors[:-1]):

634

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)

635

if errors:

636

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

637

638

def _extract_and_report_alerts(self, data, *args, **kwargs):

639

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

640

641

def _extract_badges(self, renderer: dict):

642

badges = set()

643

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

644

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

645

if label:

646

badges.add(label.lower())

return badges

@staticmethod

def _get_text(data, *path_list, max_runs=None):

651

for path in path_list or [None]:

if path is None:

obj = [data]

else:

obj = traverse_obj(data, path, default=[])

656

if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):

657

obj = [obj]

658

for item in obj:

659

text = try_get(item, lambda x: x['simpleText'], compat_str)

660

if text:

661

return text

662

runs = try_get(item, lambda x: x['runs'], list) or []

663

if not runs and isinstance(item, list):

664

runs = item

665

666

runs = runs[:min(len(runs), max_runs or len(runs))]

667

text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))

if text:

return text

def _get_count(self, data, *path_list):

672

count_text = self._get_text(data, *path_list) or ''

673

count = parse_count(count_text)

674

if count is None:

675

count = str_to_int(

676

self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))

return count

@staticmethod

def _extract_thumbnails(data, *path_list):

681

"""

682

Extract thumbnails from thumbnails dict

683

@param path_list: path list to level that contains 'thumbnails' key

684

"""

685

thumbnails = []

686

for path in path_list or [()]:

687

for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):

688

thumbnail_url = url_or_none(thumbnail.get('url'))

689

if not thumbnail_url:

690

continue

691

# Sometimes youtube gives a wrong thumbnail URL. See:

692

# https://github.com/yt-dlp/yt-dlp/issues/233

693

# https://github.com/ytdl-org/youtube-dl/issues/28023

694

if 'maxresdefault' in thumbnail_url:

695

thumbnail_url = thumbnail_url.split('?')[0]

696

thumbnails.append({

697

'url': thumbnail_url,

698

'height': int_or_none(thumbnail.get('height')),

699

'width': int_or_none(thumbnail.get('width')),

})

return thumbnails

@staticmethod

def extract_relative_time(relative_time_text):

705

"""

706

Extracts a relative time from string and converts to dt object

707

e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'

"""

if mobj:

start = mobj.group('start')

712

if start:

713

return datetime_from_str(start)

714

try:

715

return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))

except ValueError:

return None

def _extract_time_text(self, renderer, *path_list):

720

text = self._get_text(renderer, *path_list) or ''

721

dt = self.extract_relative_time(text)

722

timestamp = None

723

if isinstance(dt, datetime.datetime):

724

timestamp = calendar.timegm(dt.timetuple())

725

726

if timestamp is None:

727

timestamp = (

728

unified_timestamp(text) or unified_timestamp(

729

self._search_regex(

730

(r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'), text.lower(), 'time text', default=None)))

731

732

if text and timestamp is None:

733

self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)

734

return timestamp, text

735

736

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

737

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

738

default_client='web'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

743

if check_get_keys is None:

744

check_get_keys = []

745

while count < retries:

746

count += 1

747

if last_error:

748

self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))

749

try:

750

response = self._call_api(

751

ep=ep, fatal=True, headers=headers,

752

video_id=item_id, query=query,

753

context=self._extract_context(ytcfg, default_client),

754

api_key=self._extract_api_key(ytcfg, default_client),

755

api_hostname=api_hostname, default_client=default_client,

756

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

757

except ExtractorError as e:

758

if isinstance(e.cause, network_exceptions):

759

if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):

760

e.cause.seek(0)

761

yt_error = try_get(

762

self._parse_json(e.cause.read().decode(), item_id, fatal=False),

763

lambda x: x['error']['message'], compat_str)

764

if yt_error:

765

self._report_alerts([('ERROR', yt_error)], fatal=False)

766

# Downloading page may result in intermittent 5xx HTTP error

767

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

768

# We also want to catch all other network exceptions since errors in later pages can be troublesome

769

# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210

770

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

771

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

try:

self._extract_and_report_alerts(response, only_once=True)

783

except ExtractorError as e:

784

# YouTube servers may return errors we want to retry on in a 200 OK response

785

# See: https://github.com/yt-dlp/yt-dlp/issues/839

786

if 'unknown error' in e.msg.lower():

last_error = e.msg

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

792

return

793

if not check_get_keys or dict_get(response, check_get_keys):

794

break

795

# Youtube sometimes sends incomplete data

796

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

797

last_error = 'Incomplete data received'

798

if count >= retries:

799

if fatal:

800

raise ExtractorError(last_error)

801

else:

802

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

808

return re.match(r'https?://music\.youtube\.com/', url) is not None

809

810

def _extract_video(self, renderer):

811

video_id = renderer.get('videoId')

812

title = self._get_text(renderer, 'title')

813

description = self._get_text(renderer, 'descriptionSnippet')

814

duration = parse_duration(self._get_text(

815

renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))

816

view_count = self._get_count(renderer, 'viewCountText')

817

818

uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

819

channel_id = traverse_obj(

820

renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)

821

timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')

822

scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))

823

overlay_style = traverse_obj(

824

renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)

825

badges = self._extract_badges(renderer)

826

thumbnails = self._extract_thumbnails(renderer, 'thumbnail')

return {

'_type': 'url',

'ie_key': YoutubeIE.ie_key(),

831

'id': video_id,

832

'url': f'https://www.youtube.com/watch?v={video_id}',

833

'title': title,

834

'description': description,

835

'duration': duration,

836

'view_count': view_count,

837

'uploader': uploader,

838

'channel_id': channel_id,

839

'thumbnails': thumbnails,

840

'upload_date': strftime_or_none(timestamp, '%Y%m%d'),

841

'live_status': ('is_upcoming' if scheduled_timestamp is not None

842

else 'was_live' if 'streamed' in time_text.lower()

843

else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges

844

else None),

845

'release_timestamp': scheduled_timestamp,

846

'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)

}

class YoutubeIE(YoutubeBaseInfoExtractor):

851

IE_DESC = 'YouTube'

852

_VALID_URL = r"""(?x)^

853

(

854

(?:https?://|//) # http(s):// or protocol-independent URL

855

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

856

(?:www\.)?deturl\.com/www\.youtube\.com|

857

(?:www\.)?pwnyoutube\.com|

858

(?:www\.)?hooktube\.com|

859

(?:www\.)?yourepeat\.com|

860

tube\.majestyc\.net|

861

%(invidious)s|

862

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

863

(?:.*?\#/)? # handle anchor (#/) redirect urls

864

(?: # the various things that can precede the ID:

865

(?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/

866

|(?: # or the v= param in all its forms

867

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

868

(?:\?|\#!?) # the params delimiter ? or # or #!

869

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

875

vid\.plus| # or vid.plus/xxxx

876

zwearz\.com/watch| # or zwearz.com/watch/xxxx

877

%(invidious)s

878

)/

879

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

880

)

881

)? # all until now is optional -> you can pass the naked ID

882

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

883

(?(1).+)? # if we found the ID, everything can follow

884

(?:\#|$)""" % {

885

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

886

}

887

_PLAYER_INFO_RE = (

888

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

889

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

890

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

891

)

892

_formats = {

893

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

894

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

895

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

896

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

897

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

898

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

899

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

900

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

901

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

902

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

903

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

904

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

905

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

906

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

907

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

908

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

909

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

910

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

915

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

916

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

917

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

918

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

919

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

920

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

921

922

# Apple HTTP Live Streaming

923

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

924

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

925

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

926

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

927

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

928

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

929

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

930

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

931

932

# DASH mp4 video

933

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

934

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

935

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

936

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

937

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

938

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

939

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

940

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

941

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

942

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

943

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

944

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

945

946

# Dash mp4 audio

947

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

948

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

949

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

950

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

951

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

952

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

953

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

954

955

# Dash webm

956

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

957

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

958

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

959

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

960

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

961

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

962

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

963

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

964

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

965

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

966

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

967

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

968

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

969

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

970

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

971

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

972

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

973

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

974

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

975

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

976

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

977

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

978

979

# Dash webm audio

980

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

981

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

982

983

# Dash webm audio with opus inside

984

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

985

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

986

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

987

988

# RTMP (unnamed)

989

'_rtmp': {'protocol': 'rtmp'},

990

991

# av01 video only formats sometimes served with "unknown" codecs

992

'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

993

'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},

994

'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},

995

'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},

996

'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},

997

'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},

998

'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

999

'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},

1000

}

1001

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1013

'uploader': 'Philipp Hagemeister',

1014

'uploader_id': 'phihag',

1015

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1016

'channel': 'Philipp Hagemeister',

1017

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1018

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1019

'upload_date': '20121002',

1020

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1021

'categories': ['Science & Technology'],

1022

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1027

'playable_in_embed': True,

1028

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1029

'live_status': 'not_live',

'age_limit': 0,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1037

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1042

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1043

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1044

'uploader': 'SET India',

1045

'uploader_id': 'setindia',

1046

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1047

'age_limit': 18,

1048

},

1049

'skip': 'Private video',

1050

},

1051

{

1052

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1053

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1058

'uploader': 'Philipp Hagemeister',

1059

'uploader_id': 'phihag',

1060

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1061

'channel': 'Philipp Hagemeister',

1062

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

1063

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

1064

'upload_date': '20121002',

1065

'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',

1066

'categories': ['Science & Technology'],

1067

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'availability': 'public',

1072

'playable_in_embed': True,

1073

'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',

1074

'live_status': 'not_live',

'age_limit': 0,

},

'params': {

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1083

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1088

'uploader_id': '8KVIDEO',

1089

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1090

'description': '',

1091

'uploader': '8KVIDEO',

1092

'title': 'UHDTV TEST 8K VIDEO.mp4'

1093

},

1094

'params': {

1095

'youtube_include_dash_manifest': True,

1096

'format': '141',

1097

},

1098

'skip': 'format 141 not served anymore',

1099

},

1100

# DASH manifest with encrypted signature

1101

{

1102

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1107

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1108

'duration': 244,

1109

'uploader': 'AfrojackVEVO',

1110

'uploader_id': 'AfrojackVEVO',

1111

'upload_date': '20131011',

1112

'abr': 129.495,

1113

'like_count': int,

1114

'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',

1115

'playable_in_embed': True,

1116

'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',

1117

'view_count': int,

1118

'track': 'The Spark',

1119

'live_status': 'not_live',

1120

'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',

1121

'channel': 'Afrojack',

1122

'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',

1123

'tags': 'count:19',

1124

'availability': 'public',

1125

'categories': ['Music'],

1126

'age_limit': 0,

1127

'alt_title': 'The Spark',

1128

},

1129

'params': {

1130

'youtube_include_dash_manifest': True,

1131

'format': '141/bestaudio[ext=m4a]',

1132

},

1133

},

1134

# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000

1135

{

1136

'note': 'Embed allowed age-gate video',

1137

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1142

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1143

'duration': 142,

1144

'uploader': 'The Witcher',

1145

'uploader_id': 'WitcherGame',

1146

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1147

'upload_date': '20140605',

1148

'age_limit': 18,

1149

'categories': ['Gaming'],

1150

'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',

1151

'availability': 'needs_auth',

1152

'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',

1153

'like_count': int,

1154

'channel': 'The Witcher',

1155

'live_status': 'not_live',

1156

'tags': 'count:17',

1157

'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',

1158

'playable_in_embed': True,

'view_count': int,

},

},

{

'note': 'Age-gate video with embed allowed in public site',

1164

'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',

'info_dict': {

'id': 'HsUATh_Nc2U',

'ext': 'mp4',

'title': 'Godzilla 2 (Official Video)',

1169

'description': 'md5:bf77e03fcae5529475e500129b05668a',

1170

'upload_date': '20200408',

1171

'uploader_id': 'FlyingKitty900',

1172

'uploader': 'FlyingKitty',

1173

'age_limit': 18,

1174

'availability': 'needs_auth',

1175

'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',

1176

'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',

1177

'channel': 'FlyingKitty',

1178

'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',

1179

'view_count': int,

1180

'categories': ['Entertainment'],

1181

'live_status': 'not_live',

1182

'tags': ['Flyingkitty', 'godzilla 2'],

1183

'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',

1184

'like_count': int,

1185

'duration': 177,

1186

'playable_in_embed': True,

},

},

{

'note': 'Age-gate video embedable only with clientScreen=EMBED',

1191

'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',

1192

'info_dict': {

1193

'id': 'Tq92D6wQ1mg',

1194

'title': '[MMD] Adios - EVERGLOW [+Motion DL]',

1195

'ext': 'mp4',

1196

'upload_date': '20191227',

1197

'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1198

'uploader': 'Projekt Melody',

1199

'description': 'md5:17eccca93a786d51bc67646756894066',

1200

'age_limit': 18,

1201

'like_count': int,

1202

'availability': 'needs_auth',

1203

'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

1204

'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',

1205

'view_count': int,

1206

'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',

1207

'channel': 'Projekt Melody',

1208

'live_status': 'not_live',

1209

'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],

1210

'playable_in_embed': True,

1211

'categories': ['Entertainment'],

1212

'duration': 106,

1213

'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',

},

},

{

'note': 'Non-Agegated non-embeddable video',

1218

'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',

'info_dict': {

'id': 'MeJVWBSsPAY',

'ext': 'mp4',

'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',

1223

'uploader': 'Herr Lurik',

1224

'uploader_id': 'st3in234',

1225

'description': 'Fan Video. Music & Lyrics by OOMPH!.',

1226

'upload_date': '20130730',

1227

'track': 'Such mich find mich',

1228

'age_limit': 0,

1229

'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],

1230

'like_count': int,

1231

'playable_in_embed': False,

1232

'creator': 'OOMPH!',

1233

'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',

1234

'view_count': int,

1235

'alt_title': 'Such mich find mich',

1236

'duration': 210,

1237

'channel': 'Herr Lurik',

1238

'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',

1239

'categories': ['Music'],

1240

'availability': 'public',

1241

'uploader_url': 'http://www.youtube.com/user/st3in234',

1242

'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',

1243

'live_status': 'not_live',

'artist': 'OOMPH!',

},

},

{

'note': 'Non-bypassable age-gated video',

1249

'url': 'https://youtube.com/watch?v=Cr381pDsSsA',

1250

'only_matching': True,

1251

},

1252

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1253

# YouTube Red ad is not captured for creator

1254

{

1255

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1261

'uploader_id': 'deadmau5',

1262

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1263

'creator': 'deadmau5',

1264

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1265

'uploader': 'deadmau5',

1266

'title': 'Deadmau5 - Some Chords (HD)',

1267

'alt_title': 'Some Chords',

1268

'availability': 'public',

1269

'tags': 'count:14',

1270

'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',

1271

'view_count': int,

1272

'live_status': 'not_live',

1273

'channel': 'deadmau5',

1274

'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',

1275

'like_count': int,

1276

'track': 'Some Chords',

1277

'artist': 'deadmau5',

1278

'playable_in_embed': True,

1279

'age_limit': 0,

1280

'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',

1281

'categories': ['Music'],

1282

'album': 'Some Chords',

1283

},

1284

'expected_warnings': [

1285

'DASH manifest missing',

1286

]

1287

},

1288

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1289

{

1290

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1296

'uploader_id': 'olympic',

1297

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1298

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1299

'uploader': 'Olympics',

1300

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1301

'like_count': int,

1302

'release_timestamp': 1343767800,

1303

'playable_in_embed': True,

1304

'categories': ['Sports'],

1305

'release_date': '20120731',

1306

'channel': 'Olympics',

1307

'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],

1308

'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',

1309

'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',

1310

'age_limit': 0,

1311

'availability': 'public',

1312

'live_status': 'was_live',

1313

'view_count': int,

1314

'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',

1315

},

1316

'params': {

1317

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1327

'duration': 85,

1328

'upload_date': '20110310',

1329

'uploader_id': 'AllenMeow',

1330

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1331

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1332

'uploader': '孫ᄋᄅ',

1333

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1334

'playable_in_embed': True,

'channel': '孫ᄋᄅ',

'age_limit': 0,

'tags': 'count:11',

'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',

1339

'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',

1340

'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',

1341

'view_count': int,

1342

'categories': ['People & Blogs'],

1343

'like_count': int,

1344

'live_status': 'not_live',

1345

'availability': 'unlisted',

1346

},

1347

},

1348

# url_encoded_fmt_stream_map is empty string

1349

{

1350

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1355

'description': '',

1356

'upload_date': '20150404',

1357

'uploader_id': 'spbelect',

1358

'uploader': 'Наблюдатели Петербурга',

1359

},

1360

'params': {

1361

'skip_download': 'requires avconv',

1362

},

1363

'skip': 'This live event has ended.',

1364

},

1365

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1366

{

1367

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1372

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1373

'duration': 220,

1374

'upload_date': '20150625',

1375

'uploader_id': 'dorappi2000',

1376

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1377

'uploader': 'dorappi2000',

1378

'formats': 'mincount:31',

1379

},

1380

'skip': 'not actual anymore',

1381

},

1382

# DASH manifest with segment_list

1383

{

1384

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1385

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1390

'uploader': 'Airtek',

1391

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1392

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1393

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1394

},

1395

'params': {

1396

'youtube_include_dash_manifest': True,

1397

'format': '135', # bestvideo

1398

},

1399

'skip': 'This live event has ended.',

1400

},

1401

{

1402

# Multifeed videos (multiple cameras), URL is for Main Camera

1403

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1404

'info_dict': {

1405

'id': 'jvGDaLqkpTg',

1406

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1407

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1414

'description': 'md5:e03b909557865076822aa169218d6a5d',

1415

'duration': 10643,

1416

'upload_date': '20161111',

1417

'uploader': 'Team PGP',

1418

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1419

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1426

'description': 'md5:e03b909557865076822aa169218d6a5d',

1427

'duration': 10991,

1428

'upload_date': '20161111',

1429

'uploader': 'Team PGP',

1430

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1431

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1438

'description': 'md5:e03b909557865076822aa169218d6a5d',

1439

'duration': 10995,

1440

'upload_date': '20161111',

1441

'uploader': 'Team PGP',

1442

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1443

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1450

'description': 'md5:e03b909557865076822aa169218d6a5d',

1451

'duration': 10990,

1452

'upload_date': '20161111',

1453

'uploader': 'Team PGP',

1454

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1455

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

1460

},

1461

'skip': 'Not multifeed anymore',

1462

},

1463

{

1464

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1465

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1466

'info_dict': {

1467

'id': 'gVfLd0zydlo',

1468

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1469

},

1470

'playlist_count': 2,

1471

'skip': 'Not multifeed anymore',

1472

},

1473

{

1474

'url': 'https://vid.plus/FlRa-iH7PGw',

1475

'only_matching': True,

1476

},

1477

{

1478

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1479

'only_matching': True,

1480

},

1481

{

1482

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1483

# Also tests cut-off URL expansion in video description (see

1484

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1485

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1486

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1491

'alt_title': 'Dark Walk',

1492

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1493

'duration': 133,

1494

'upload_date': '20151119',

1495

'uploader_id': 'IronSoulElf',

1496

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1497

'uploader': 'IronSoulElf',

1498

'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1499

'track': 'Dark Walk',

1500

'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',

1501

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1502

'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',

1503

'categories': ['Film & Animation'],

1504

'view_count': int,

1505

'live_status': 'not_live',

1506

'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',

1507

'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',

1508

'tags': 'count:13',

1509

'availability': 'public',

1510

'channel': 'IronSoulElf',

1511

'playable_in_embed': True,

'like_count': int,

'age_limit': 0,

},

'params': {

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1521

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1522

'only_matching': True,

1523

},

1524

{

1525

# Video with yt:stretch=17:0

1526

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1531

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1532

'upload_date': '20151107',

1533

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1534

'uploader': 'CH GAMER DROID',

1535

},

1536

'params': {

1537

'skip_download': True,

1538

},

1539

'skip': 'This video does not exist.',

1540

},

1541

{

1542

# Video with incomplete 'yt:stretch=16:'

1543

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1544

'only_matching': True,

1545

},

1546

{

1547

# Video licensed under Creative Commons

1548

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1553

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1554

'duration': 721,

1555

'upload_date': '20150127',

1556

'uploader_id': 'BerkmanCenter',

1557

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1558

'uploader': 'The Berkman Klein Center for Internet & Society',

1559

'license': 'Creative Commons Attribution license (reuse allowed)',

1560

'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',

1561

'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',

1562

'like_count': int,

1563

'age_limit': 0,

1564

'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],

1565

'channel': 'The Berkman Klein Center for Internet & Society',

1566

'availability': 'public',

1567

'view_count': int,

1568

'categories': ['Education'],

1569

'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',

1570

'live_status': 'not_live',

1571

'playable_in_embed': True,

1572

},

1573

'params': {

1574

'skip_download': True,

},

},

{

# Channel-like uploader_url

1579

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1584

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1585

'duration': 4060,

1586

'upload_date': '20151119',

1587

'uploader': 'Bernie Sanders',

1588

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1589

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1590

'license': 'Creative Commons Attribution license (reuse allowed)',

1591

'playable_in_embed': True,

1592

'tags': 'count:12',

1593

'like_count': int,

1594

'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1595

'age_limit': 0,

1596

'availability': 'public',

1597

'categories': ['News & Politics'],

1598

'channel': 'Bernie Sanders',

1599

'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',

1600

'view_count': int,

1601

'live_status': 'not_live',

1602

'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1603

},

1604

'params': {

1605

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1610

'only_matching': True,

1611

},

1612

{

1613

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1614

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1615

'only_matching': True,

1616

},

1617

{

1618

# Rental video preview

1619

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1624

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1625

'upload_date': '20150811',

1626

'uploader': 'FlixMatrix',

1627

'uploader_id': 'FlixMatrixKaravan',

1628

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1629

'license': 'Standard YouTube License',

1630

},

1631

'params': {

1632

'skip_download': True,

1633

},

1634

'skip': 'This video is not available.',

1635

},

1636

{

1637

# YouTube Red video with episode data

1638

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1643

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1644

'duration': 2085,

1645

'upload_date': '20170118',

1646

'uploader': 'Vsauce',

1647

'uploader_id': 'Vsauce',

1648

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1649

'series': 'Mind Field',

1650

'season_number': 1,

1651

'episode_number': 1,

1652

'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',

1653

'tags': 'count:12',

1654

'view_count': int,

1655

'availability': 'public',

1656

'age_limit': 0,

1657

'channel': 'Vsauce',

1658

'episode': 'Episode 1',

1659

'categories': ['Entertainment'],

1660

'season': 'Season 1',

1661

'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',

1662

'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',

1663

'like_count': int,

1664

'playable_in_embed': True,

1665

'live_status': 'not_live',

1666

},

1667

'params': {

1668

'skip_download': True,

1669

},

1670

'expected_warnings': [

1671

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1676

# as inappropriate or offensive to some audiences.

1677

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1682

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1683

'duration': 965,

1684

'upload_date': '20140124',

1685

'uploader': 'New Century Foundation',

1686

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1687

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1688

},

1689

'params': {

1690

'skip_download': True,

1691

},

1692

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1697

'only_matching': True,

1698

},

1699

{

1700

# geo restricted to JP

1701

'url': 'sJL6WA-aGkQ',

1702

'only_matching': True,

1703

},

1704

{

1705

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1706

'only_matching': True,

1707

},

1708

{

1709

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1710

'only_matching': True,

1711

},

1712

{

1713

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1714

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1715

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1720

'only_matching': True,

1721

},

1722

{

1723

# Video with unsupported adaptive stream type formats

1724

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1729

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1730

'duration': 433,

1731

'upload_date': '20130923',

1732

'uploader': 'Amelia Putri Harwita',

1733

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1734

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1735

'formats': 'maxcount:10',

1736

},

1737

'params': {

1738

'skip_download': True,

1739

'youtube_include_dash_manifest': False,

1740

},

1741

'skip': 'not actual anymore',

1742

},

1743

{

1744

# Youtube Music Auto-generated description

1745

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1750

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1751

'upload_date': '20190312',

1752

'uploader': 'Stephen - Topic',

1753

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1754

'artist': 'Stephen',

1755

'track': 'Voyeur Girl',

1756

'album': 'it\'s too much love to know my dear',

1757

'release_date': '20190313',

1758

'release_year': 2019,

1759

'alt_title': 'Voyeur Girl',

1760

'view_count': int,

1761

'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1762

'playable_in_embed': True,

1763

'like_count': int,

1764

'categories': ['Music'],

1765

'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',

1766

'channel': 'Stephen',

1767

'availability': 'public',

1768

'creator': 'Stephen',

1769

'duration': 169,

1770

'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',

1771

'age_limit': 0,

1772

'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1773

'tags': 'count:11',

1774

'live_status': 'not_live',

1775

},

1776

'params': {

1777

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1782

'only_matching': True,

1783

},

1784

{

1785

# invalid -> valid video id redirection

1786

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1791

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1792

'upload_date': '20090125',

1793

'uploader': 'Prochorowka',

1794

'uploader_id': 'Prochorowka',

1795

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1796

'artist': 'Panjabi MC',

1797

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1798

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1799

},

1800

'params': {

1801

'skip_download': True,

1802

},

1803

'skip': 'Video unavailable',

1804

},

1805

{

1806

# empty description results in an empty string

1807

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1814

'uploader_id': 'ElevageOrVert',

1815

'uploader': 'ElevageOrVert',

1816

'view_count': int,

1817

'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',

1818

'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',

1819

'like_count': int,

1820

'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',

1821

'tags': [],

1822

'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',

1823

'availability': 'public',

1824

'age_limit': 0,

1825

'categories': ['Pets & Animals'],

1826

'duration': 7,

1827

'playable_in_embed': True,

1828

'live_status': 'not_live',

1829

'channel': 'ElevageOrVert',

1830

},

1831

'params': {

1832

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1837

# see [2] for an example with '};' inside ytInitialPlayerResponse

1838

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1839

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1840

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1845

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1846

'upload_date': '20130831',

1847

'uploader_id': 'kudvenkat',

1848

'uploader': 'kudvenkat',

1849

'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',

1850

'like_count': int,

1851

'uploader_url': 'http://www.youtube.com/user/kudvenkat',

1852

'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',

1853

'live_status': 'not_live',

1854

'categories': ['Education'],

1855

'availability': 'public',

1856

'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',

1857

'tags': 'count:12',

1858

'playable_in_embed': True,

'age_limit': 0,

'view_count': int,

'duration': 522,

'channel': 'kudvenkat',

1863

},

1864

'params': {

1865

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1870

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1871

'only_matching': True,

1872

},

1873

{

1874

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1875

'only_matching': True,

1876

},

1877

{

1878

# https://github.com/ytdl-org/youtube-dl/pull/28094

1879

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1885

'upload_date': '20141120',

1886

'uploader': 'The Cinematic Orchestra - Topic',

1887

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1888

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1889

'artist': 'The Cinematic Orchestra',

1890

'track': 'Burn Out',

1891

'album': 'Every Day',

1892

'like_count': int,

1893

'live_status': 'not_live',

1894

'alt_title': 'Burn Out',

'duration': 614,

'age_limit': 0,

'view_count': int,

'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1899

'creator': 'The Cinematic Orchestra',

1900

'channel': 'The Cinematic Orchestra',

1901

'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],

1902

'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1903

'availability': 'public',

1904

'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',

1905

'categories': ['Music'],

1906

'playable_in_embed': True,

1907

},

1908

'params': {

1909

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1914

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1915

'only_matching': True,

1916

},

1917

{

1918

# controversial video, requires bpctr/contentCheckOk

1919

'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',

'info_dict': {

'id': 'SZJvDhaSDnc',

'ext': 'mp4',

'title': 'San Diego teen commits suicide after bullying over embarrassing video',

1924

'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',

1925

'uploader': 'CBS Mornings',

1926

'uploader_id': 'CBSThisMorning',

1927

'upload_date': '20140716',

1928

'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',

1929

'duration': 170,

1930

'categories': ['News & Politics'],

1931

'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',

1932

'view_count': int,

1933

'channel': 'CBS Mornings',

1934

'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],

1935

'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',

1936

'age_limit': 18,

1937

'availability': 'needs_auth',

1938

'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',

1939

'like_count': int,

1940

'live_status': 'not_live',

1941

'playable_in_embed': True,

}

},

{

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1946

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1951

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1952

'upload_date': '20201120',

1953

'uploader': 'Walk around Japan',

1954

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1955

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1956

'duration': 1456,

1957

'categories': ['Travel & Events'],

1958

'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1959

'view_count': int,

1960

'channel': 'Walk around Japan',

1961

'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],

1962

'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',

1963

'age_limit': 0,

1964

'availability': 'public',

1965

'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1966

'live_status': 'not_live',

1967

'playable_in_embed': True,

1968

},

1969

'params': {

1970

'skip_download': True,

1971

},

1972

}, {

1973

# Has multiple audio streams

1974

'url': 'WaOKSUlf4TM',

1975

'only_matching': True

1976

}, {

1977

# Requires Premium: has format 141 when requested using YTM url

1978

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

1979

'only_matching': True

1980

}, {

1981

# multiple subtitles with same lang_code

1982

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

1983

'only_matching': True,

1984

}, {

1985

# Force use android client fallback

1986

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

1987

'info_dict': {

1988

'id': 'YOelRv7fMxY',

1989

'title': 'DIGGING A SECRET TUNNEL Part 1',

1990

'ext': '3gp',

1991

'upload_date': '20210624',

1992

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

1993

'uploader': 'colinfurze',

1994

'uploader_id': 'colinfurze',

1995

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

1996

'description': 'md5:5d5991195d599b56cd0c4148907eec50',

1997

'duration': 596,

1998

'categories': ['Entertainment'],

1999

'uploader_url': 'http://www.youtube.com/user/colinfurze',

2000

'view_count': int,

2001

'channel': 'colinfurze',

2002

'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],

2003

'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',

2004

'age_limit': 0,

2005

'availability': 'public',

2006

'like_count': int,

2007

'live_status': 'not_live',

2008

'playable_in_embed': True,

2009

},

2010

'params': {

2011

'format': '17', # 3gp format available on android

2012

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

2017

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

2018

'only_matching': True,

2019

'params': {

2020

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}, {

# shorts

'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',

2025

'only_matching': True,

2026

}, {

2027

'note': 'Storyboards',

2028

'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',

'info_dict': {

'id': '5KLPxDtMqe8',

'ext': 'mhtml',

'format_id': 'sb0',

'title': 'Your Brain is Plastic',

2034

'uploader_id': 'scishow',

2035

'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',

2036

'upload_date': '20140324',

2037

'uploader': 'SciShow',

2038

'like_count': int,

2039

'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',

2040

'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',

2041

'view_count': int,

2042

'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',

2043

'playable_in_embed': True,

2044

'tags': 'count:12',

2045

'uploader_url': 'http://www.youtube.com/user/scishow',

2046

'availability': 'public',

2047

'channel': 'SciShow',

2048

'live_status': 'not_live',

2049

'duration': 248,

2050

'categories': ['Education'],

2051

'age_limit': 0,

2052

}, 'params': {'format': 'mhtml', 'skip_download': True}

}

]

@classmethod

def suitable(cls, url):

2058

from ..utils import parse_qs

2059

2060

qs = parse_qs(url)

2061

if qs.get('list', [None])[0]:

2062

return False

2063

return super(YoutubeIE, cls).suitable(url)

2064

2065

def __init__(self, *args, **kwargs):

2066

super(YoutubeIE, self).__init__(*args, **kwargs)

2067

self._code_cache = {}

2068

self._player_cache = {}

2069

2070

def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):

2071

lock = threading.Lock()

2072

2073

is_live = True

2074

start_time = time.time()

2075

formats = [f for f in formats if f.get('is_from_start')]

2076

2077

def refetch_manifest(format_id, delay):

2078

nonlocal formats, start_time, is_live

2079

if time.time() <= start_time + delay:

2080

return

2081

2082

_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

2083

video_details = traverse_obj(

2084

prs, (..., 'videoDetails'), expected_type=dict, default=[])

2085

microformats = traverse_obj(

2086

prs, (..., 'microformat', 'playerMicroformatRenderer'),

2087

expected_type=dict, default=[])

2088

_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)

2089

start_time = time.time()

2090

2091

def mpd_feed(format_id, delay):

2092

"""

2093

@returns (manifest_url, manifest_stream_number, is_live) or None

2094

"""

2095

with lock:

2096

refetch_manifest(format_id, delay)

2097

2098

f = next((f for f in formats if f['format_id'] == format_id), None)

2099

if not f:

2100

if not is_live:

2101

self.to_screen(f'{video_id}: Video is no longer live')

2102

else:

2103

self.report_warning(

2104

f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')

2105

return None

2106

return f['manifest_url'], f['manifest_stream_number'], is_live

2107

2108

for f in formats:

2109

f['protocol'] = 'http_dash_segments_generator'

2110

f['fragments'] = functools.partial(

2111

self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)

2112

2113

def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):

2114

FETCH_SPAN, MAX_DURATION = 5, 432000

2115

2116

mpd_url, stream_number, is_live = None, None, True

2117

2118

begin_index = 0

2119

download_start_time = ctx.get('start') or time.time()

2120

2121

lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION

2122

if lack_early_segments:

2123

self.report_warning(bug_reports_message(

2124

'Starting download from the last 120 hours of the live stream since '

2125

'YouTube does not have data before that. If you think this is wrong,'), only_once=True)

2126

lack_early_segments = True

2127

2128

known_idx, no_fragment_score, last_segment_url = begin_index, 0, None

2129

fragments, fragment_base_url = None, None

2130

2131

def _extract_sequence_from_mpd(refresh_sequence):

2132

nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url

2133

# Obtain from MPD's maximum seq value

2134

old_mpd_url = mpd_url

2135

last_error = ctx.pop('last_error', None)

2136

expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403

2137

mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)

2138

or (mpd_url, stream_number, False))

2139

if not refresh_sequence:

2140

if expire_fast and not is_live:

2141

return False, last_seq

2142

elif old_mpd_url == mpd_url:

2143

return True, last_seq

2144

try:

2145

fmts, _ = self._extract_mpd_formats_and_subtitles(

2146

mpd_url, None, note=False, errnote=False, fatal=False)

2147

except ExtractorError:

2148

fmts = None

2149

if not fmts:

2150

no_fragment_score += 1

2151

return False, last_seq

2152

fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)

2153

fragments = fmt_info['fragments']

2154

fragment_base_url = fmt_info['fragment_base_url']

2155

assert fragment_base_url

2156

2157

_last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))

2158

return True, _last_seq

2159

2160

while is_live:

2161

fetch_time = time.time()

2162

if no_fragment_score > 30:

2163

return

2164

if last_segment_url:

2165

# Obtain from "X-Head-Seqnum" header value from each segment

2166

try:

2167

urlh = self._request_webpage(

2168

last_segment_url, None, note=False, errnote=False, fatal=False)

2169

except ExtractorError:

2170

urlh = None

2171

last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))

2172

if last_seq is None:

2173

no_fragment_score += 1

2174

last_segment_url = None

2175

continue

2176

else:

2177

should_continue, last_seq = _extract_sequence_from_mpd(True)

2178

if not should_continue:

2179

continue

2180

2181

if known_idx > last_seq:

2182

last_segment_url = None

continue

last_seq += 1

if begin_index < 0 and known_idx < 0:

2188

# skip from the start when it's negative value

2189

known_idx = last_seq + begin_index

2190

if lack_early_segments:

2191

known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))

2192

try:

2193

for idx in range(known_idx, last_seq):

2194

# do not update sequence here or you'll get skipped some part of it

2195

should_continue, _ = _extract_sequence_from_mpd(False)

2196

if not should_continue:

2197

known_idx = idx - 1

2198

raise ExtractorError('breaking out of outer loop')

2199

last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)

2200

yield {

2201

'url': last_segment_url,

2202

}

2203

if known_idx == last_seq:

2204

no_fragment_score += 5

2205

else:

2206

no_fragment_score = 0

2207

known_idx = last_seq

2208

except ExtractorError:

2209

continue

2210

2211

time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))

2212

2213

def _extract_player_url(self, *ytcfgs, webpage=None):

2214

player_url = traverse_obj(

2215

ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),

2216

get_all=False, expected_type=compat_str)

2217

if not player_url:

2218

return

2219

if player_url.startswith('//'):

2220

player_url = 'https:' + player_url

2221

elif not re.match(r'https?://', player_url):

2222

player_url = compat_urlparse.urljoin(

2223

'https://www.youtube.com', player_url)

2224

return player_url

2225

2226

def _download_player_url(self, video_id, fatal=False):

2227

res = self._download_webpage(

2228

'https://www.youtube.com/iframe_api',

2229

note='Downloading iframe API JS', video_id=video_id, fatal=fatal)

2230

if res:

2231

player_version = self._search_regex(

2232

r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)

2233

if player_version:

2234

return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'

2235

2236

def _signature_cache_id(self, example_sig):

2237

""" Return a string representation of a signature """

2238

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

2239

2240

@classmethod

2241

def _extract_player_info(cls, player_url):

2242

for player_re in cls._PLAYER_INFO_RE:

2243

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

2248

return id_m.group('id')

2249

2250

def _load_player(self, video_id, player_url, fatal=True):

2251

player_id = self._extract_player_info(player_url)

2252

if player_id not in self._code_cache:

2253

code = self._download_webpage(

2254

player_url, video_id, fatal=fatal,

2255

note='Downloading player ' + player_id,

2256

errnote='Download of %s failed' % player_url)

2257

if code:

2258

self._code_cache[player_id] = code

2259

return self._code_cache.get(player_id)

2260

2261

def _extract_signature_function(self, video_id, player_url, example_sig):

2262

player_id = self._extract_player_info(player_url)

2263

2264

# Read from filesystem cache

2265

func_id = 'js_%s_%s' % (

2266

player_id, self._signature_cache_id(example_sig))

2267

assert os.path.basename(func_id) == func_id

2268

2269

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

2270

if cache_spec is not None:

2271

return lambda s: ''.join(s[i] for i in cache_spec)

2272

2273

code = self._load_player(video_id, player_url)

2274

if code:

2275

res = self._parse_sig_js(code)

2276

2277

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2278

cache_res = res(test_string)

2279

cache_spec = [ord(c) for c in cache_res]

2280

2281

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

2282

return res

2283

2284

def _print_sig_code(self, func, example_sig):

2285

if not self.get_param('youtube_print_sig_code'):

2286

return

2287

2288

def gen_sig_code(idxs):

2289

def _genslice(start, end, step):

2290

starts = '' if start == 0 else str(start)

2291

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

2292

steps = '' if step == 1 else (':%d' % step)

2293

return 's[%s%s%s]' % (starts, ends, steps)

2294

2295

step = None

2296

# Quelch pyflakes warnings - start will be set when step is set

2297

start = '(Never used)'

2298

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

2303

step = None

2304

continue

2305

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

2315

2316

test_string = ''.join(map(compat_chr, range(len(example_sig))))

2317

cache_res = func(test_string)

2318

cache_spec = [ord(c) for c in cache_res]

2319

expr_code = ' + '.join(gen_sig_code(cache_spec))

2320

signature_id_tuple = '(%s)' % (

2321

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

2322

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

2323

' return %s\n') % (signature_id_tuple, expr_code)

2324

self.to_screen('Extracted signature function:\n' + code)

2325

2326

def _parse_sig_js(self, jscode):

2327

funcname = self._search_regex(

2328

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2329

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2330

r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})$decodeURIComponent\(h\.s$\)',

2331

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c$\)',

2332

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

2333

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2334

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

2335

# Obsolete patterns

2336

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2337

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

2338

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2339

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2340

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2341

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2342

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

2343

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

2344

jscode, 'Initial JS player signature function name', group='sig')

2345

2346

jsi = JSInterpreter(jscode)

2347

initial_function = jsi.extract_function(funcname)

2348

return lambda s: initial_function([s])

2349

2350

def _decrypt_signature(self, s, video_id, player_url):

2351

"""Turn the encrypted s field into a working signature"""

2352

2353

if player_url is None:

2354

raise ExtractorError('Cannot decrypt signature without player_url')

2355

2356

try:

2357

player_id = (player_url, self._signature_cache_id(s))

2358

if player_id not in self._player_cache:

2359

func = self._extract_signature_function(

2360

video_id, player_url, s

2361

)

2362

self._player_cache[player_id] = func

2363

func = self._player_cache[player_id]

2364

self._print_sig_code(func, s)

2365

return func(s)

2366

except Exception as e:

2367

raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)

2368

2369

def _decrypt_nsig(self, s, video_id, player_url):

2370

"""Turn the encrypted n field into a working signature"""

2371

if player_url is None:

2372

raise ExtractorError('Cannot decrypt nsig without player_url')

2373

if player_url.startswith('//'):

2374

player_url = 'https:' + player_url

2375

elif not re.match(r'https?://', player_url):

2376

player_url = compat_urlparse.urljoin(

2377

'https://www.youtube.com', player_url)

2378

2379

sig_id = ('nsig_value', s)

2380

if sig_id in self._player_cache:

2381

return self._player_cache[sig_id]

2382

2383

try:

2384

player_id = ('nsig', player_url)

2385

if player_id not in self._player_cache:

2386

self._player_cache[player_id] = self._extract_n_function(video_id, player_url)

2387

func = self._player_cache[player_id]

2388

self._player_cache[sig_id] = func(s)

2389

self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')

2390

return self._player_cache[sig_id]

2391

except Exception as e:

2392

raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)

2393

2394

def _extract_n_function_name(self, jscode):

2395

return self._search_regex(

2396

(r'\.get$"n"$\)&&$b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]$',),

2397

jscode, 'Initial JS player n function name', group='nfunc')

2398

2399

def _extract_n_function(self, video_id, player_url):

2400

player_id = self._extract_player_info(player_url)

2401

func_code = self._downloader.cache.load('youtube-nsig', player_id)

2402

2403

if func_code:

2404

jsi = JSInterpreter(func_code)

2405

else:

2406

jscode = self._load_player(video_id, player_url)

2407

funcname = self._extract_n_function_name(jscode)

2408

jsi = JSInterpreter(jscode)

2409

func_code = jsi.extract_function_code(funcname)

2410

self._downloader.cache.store('youtube-nsig', player_id, func_code)

2411

2412

if self.get_param('youtube_print_sig_code'):

2413

self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')

2414

2415

return lambda s: jsi.extract_function_from_code(*func_code)([s])

2416

2417

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

2418

"""

2419

Extract signatureTimestamp (sts)

2420

Required to tell API what sig/player version is in use.

2421

"""

2422

sts = None

2423

if isinstance(ytcfg, dict):

2424

sts = int_or_none(ytcfg.get('STS'))

2425

2426

if not sts:

2427

# Attempt to extract from player

2428

if player_url is None:

2429

error_msg = 'Cannot extract signature timestamp without player_url.'

2430

if fatal:

2431

raise ExtractorError(error_msg)

2432

self.report_warning(error_msg)

2433

return

2434

code = self._load_player(video_id, player_url, fatal=fatal)

2435

if code:

2436

sts = int_or_none(self._search_regex(

2437

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

2438

'JS player signature timestamp', group='sts', fatal=fatal))

2439

return sts

2440

2441

def _mark_watched(self, video_id, player_responses):

2442

playback_url = get_first(

2443

player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),

2444

expected_type=url_or_none)

2445

if not playback_url:

2446

self.report_warning('Unable to mark watched')

2447

return

2448

parsed_playback_url = compat_urlparse.urlparse(playback_url)

2449

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

2450

2451

# cpn generation algorithm is reverse engineered from base.js.

2452

# In fact it works even with dummy cpn.

2453

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

2454

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

2461

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

2462

2463

self._download_webpage(

2464

playback_url, video_id, 'Marking watched',

2465

'Unable to mark watched', fatal=False)

2466

2467

@staticmethod

2468

def _extract_urls(webpage):

2469

# Embedded YouTube player

2470

entries = [

2471

unescapeHTML(mobj.group('url'))

2472

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

2483

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

2484

\1''', webpage)]

2485

2486

# lazyYT YouTube embed

2487

entries.extend(list(map(

2488

unescapeHTML,

2489

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

2490

2491

# Wordpress "YouTube Video Importer" plugin

2492

matches = re.findall(r'''(?x)<div[^>]+

2493

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

2494

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

2495

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

2501

urls = YoutubeIE._extract_urls(webpage)

2502

return urls[0] if urls else None

2503

2504

@classmethod

2505

def extract_id(cls, url):

2506

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

2507

if mobj is None:

2508

raise ExtractorError('Invalid URL: %s' % url)

2509

return mobj.group('id')

2510

2511

def _extract_chapters_from_json(self, data, duration):

2512

chapter_list = traverse_obj(

2513

data, (

2514

'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',

2515

'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'

2516

), expected_type=list)

2517

2518

return self._extract_chapters(

2519

chapter_list,

2520

chapter_time=lambda chapter: float_or_none(

2521

traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),

2522

chapter_title=lambda chapter: traverse_obj(

2523

chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),

2524

duration=duration)

2525

2526

def _extract_chapters_from_engagement_panel(self, data, duration):

2527

content_list = traverse_obj(

2528

data,

2529

('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),

2530

expected_type=list, default=[])

2531

chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))

2532

chapter_title = lambda chapter: self._get_text(chapter, 'title')

return next((

filter(None, (

self._extract_chapters(

2537

traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),

2538

chapter_time, chapter_title, duration)

2539

for contents in content_list

2540

))), [])

2541

2542

def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):

2543

chapters = []

2544

last_chapter = {'start_time': 0}

2545

for idx, chapter in enumerate(chapter_list or []):

2546

title = chapter_title(chapter)

2547

start_time = chapter_time(chapter)

2548

if start_time is None:

2549

continue

2550

last_chapter['end_time'] = start_time

2551

if start_time < last_chapter['start_time']:

2552

if idx == 1:

2553

chapters.pop()

2554

self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])

2555

else:

2556

self.report_warning(f'Invalid start time for chapter "{title}"')

2557

continue

2558

last_chapter = {'start_time': start_time, 'title': title}

2559

chapters.append(last_chapter)

2560

last_chapter['end_time'] = duration

2561

return chapters

2562

2563

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

2564

return self._parse_json(self._search_regex(

2565

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

2566

regex), webpage, name, default='{}'), video_id, fatal=False)

2567

2568

def _extract_comment(self, comment_renderer, parent=None):

2569

comment_id = comment_renderer.get('commentId')

if not comment_id:

return

text = self._get_text(comment_renderer, 'contentText')

2574

2575

# note: timestamp is an estimate calculated from the current time and time_text

2576

timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')

2577

author = self._get_text(comment_renderer, 'authorText')

2578

author_id = try_get(comment_renderer,

2579

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2580

2581

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2582

lambda x: x['likeCount']), compat_str)) or 0

2583

author_thumbnail = try_get(comment_renderer,

2584

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2585

2586

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2587

is_favorited = 'creatorHeart' in (try_get(

2588

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2593

'time_text': time_text,

2594

'like_count': votes,

2595

'is_favorited': is_favorited,

2596

'author': author,

2597

'author_id': author_id,

2598

'author_thumbnail': author_thumbnail,

2599

'author_is_uploader': author_is_uploader,

2600

'parent': parent or 'root'

2601

}

2602

2603

def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):

2604

2605

get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]

2606

2607

def extract_header(contents):

2608

_continuation = None

2609

for content in contents:

2610

comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')

2611

expected_comment_count = self._get_count(

2612

comments_header_renderer, 'countText', 'commentsCount')

2613

2614

if expected_comment_count:

2615

tracker['est_total'] = expected_comment_count

2616

self.to_screen(f'Downloading ~{expected_comment_count} comments')

2617

comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top

2618

2619

sort_menu_item = try_get(

2620

comments_header_renderer,

2621

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2622

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2623

2624

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2625

if not _continuation:

2626

continue

2627

2628

sort_text = str_or_none(sort_menu_item.get('title'))

2629

if not sort_text:

2630

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2631

self.to_screen('Sorting comments by %s' % sort_text.lower())

break

return _continuation

def extract_thread(contents):

2636

if not parent:

2637

tracker['current_page_thread'] = 0

2638

for content in contents:

2639

if not parent and tracker['total_parent_comments'] >= max_parents:

2640

yield

2641

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2642

comment_renderer = get_first(

2643

(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],

2644

expected_type=dict, default={})

2645

2646

comment = self._extract_comment(comment_renderer, parent)

if not comment:

continue

tracker['running_total'] += 1

2651

tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1

2652

yield comment

2653

2654

# Attempt to get the replies

2655

comment_replies_renderer = try_get(

2656

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2657

2658

if comment_replies_renderer:

2659

tracker['current_page_thread'] += 1

2660

comment_entries_iter = self._comment_entries(

2661

comment_replies_renderer, ytcfg, video_id,

2662

parent=comment.get('id'), tracker=tracker)

2663

for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):

2664

yield reply_comment

2665

2666

# Keeps track of counts across recursive calls

if not tracker:

tracker = dict(

running_total=0,

est_total=0,

current_page_thread=0,

2672

total_parent_comments=0,

2673

total_reply_comments=0)

2674

2675

# TODO: Deprecated

2676

# YouTube comments have a max depth of 2

2677

max_depth = int_or_none(get_single_config_arg('max_comment_depth'))

2678

if max_depth:

2679

self._downloader.deprecation_warning(

2680

'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')

2681

if max_depth == 1 and parent:

2682

return

2683

2684

max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(

2685

lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)

2686

2687

continuation = self._extract_continuation(root_continuation_data)

2688

message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)

2689

if message and not parent:

2690

self.report_warning(message, video_id=video_id)

2691

2692

response = None

2693

is_first_continuation = parent is None

2694

2695

for page_num in itertools.count(0):

2696

if not continuation:

2697

break

2698

headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))

2699

comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"

2700

if page_num == 0:

2701

if is_first_continuation:

2702

note_prefix = 'Downloading comment section API JSON'

2703

else:

2704

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2705

tracker['current_page_thread'], comment_prog_str)

2706

else:

2707

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2708

' ' if parent else '', ' replies' if parent else '',

2709

page_num, comment_prog_str)

2710

2711

response = self._extract_response(

2712

item_id=None, query=continuation,

2713

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2714

check_get_keys='onResponseReceivedEndpoints')

2715

2716

continuation_contents = traverse_obj(

2717

response, 'onResponseReceivedEndpoints', expected_type=list, default=[])

2718

2719

continuation = None

2720

for continuation_section in continuation_contents:

2721

continuation_items = traverse_obj(

2722

continuation_section,

2723

(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),

2724

get_all=False, expected_type=list) or []

2725

if is_first_continuation:

2726

continuation = extract_header(continuation_items)

2727

is_first_continuation = False

if continuation:

break

continue

for entry in extract_thread(continuation_items):

if not entry:

return

yield entry

continuation = self._extract_continuation({'contents': continuation_items})

if continuation:

break

def _get_comments(self, ytcfg, video_id, contents, webpage):

2741

"""Entry for comment extraction"""

2742

def _real_comment_extract(contents):

2743

renderer = next((

2744

item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})

2745

if item.get('sectionIdentifier') == 'comment-item-section'), None)

2746

yield from self._comment_entries(renderer, ytcfg, video_id)

2747

2748

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])

2749

return itertools.islice(_real_comment_extract(contents), 0, max_comments)

2750

2751

@staticmethod

2752

def _get_checkok_params():

2753

return {'contentCheckOk': True, 'racyCheckOk': True}

2754

2755

@classmethod

2756

def _generate_player_context(cls, sts=None):

2757

context = {

2758

'html5Preference': 'HTML5_PREF_WANTS',

2759

}

2760

if sts is not None:

2761

context['signatureTimestamp'] = sts

2762

return {

2763

'playbackContext': {

2764

'contentPlaybackContext': context

2765

},

2766

**cls._get_checkok_params()

}

@staticmethod

def _is_agegated(player_response):

2771

if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):

2772

return True

2773

2774

reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])

2775

AGE_GATE_REASONS = (

2776

'confirm your age', 'age-restricted', 'inappropriate', # reason

2777

'age_verification_required', 'age_check_required', # status

2778

)

2779

return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)

2780

2781

@staticmethod

2782

def _is_unplayable(player_response):

2783

return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'

2784

2785

def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):

2786

2787

session_index = self._extract_session_index(player_ytcfg, master_ytcfg)

2788

syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)

2789

sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None

2790

headers = self.generate_api_headers(

2791

ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)

2792

2793

yt_query = {'videoId': video_id}

2794

yt_query.update(self._generate_player_context(sts))

2795

return self._extract_response(

2796

item_id=video_id, ep='player', query=yt_query,

2797

ytcfg=player_ytcfg, headers=headers, fatal=True,

2798

default_client=client,

2799

note='Downloading %s player API JSON' % client.replace('_', ' ').strip()

2800

) or None

2801

2802

def _get_requested_clients(self, url, smuggled_data):

2803

requested_clients = []

2804

default = ['android', 'web']

2805

allowed_clients = sorted(

2806

[client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],

2807

key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)

2808

for client in self._configuration_arg('player_client'):

2809

if client in allowed_clients:

2810

requested_clients.append(client)

2811

elif client == 'default':

2812

requested_clients.extend(default)

2813

elif client == 'all':

2814

requested_clients.extend(allowed_clients)

2815

else:

2816

self.report_warning(f'Skipping unsupported client {client}')

2817

if not requested_clients:

2818

requested_clients = default

2819

2820

if smuggled_data.get('is_music_url') or self.is_music_url(url):

2821

requested_clients.extend(

2822

f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)

2823

2824

return orderedSet(requested_clients)

2825

2826

def _extract_player_ytcfg(self, client, video_id):

2827

url = {

2828

'web_music': 'https://music.youtube.com',

2829

'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'

}.get(client)

if not url:

return {}

webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())

2834

return self.extract_ytcfg(video_id, webpage) or {}

2835

2836

def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):

2837

initial_pr = None

2838

if webpage:

2839

initial_pr = self._extract_yt_initial_variable(

2840

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2841

video_id, 'initial player response')

2842

2843

original_clients = clients

2844

clients = clients[::-1]

2845

prs = []

2846

2847

def append_client(client_name):

2848

if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:

2849

clients.append(client_name)

2850

2851

# Android player_response does not have microFormats which are needed for

2852

# extraction of some data. So we return the initial_pr with formats

2853

# stripped out even if not requested by the user

2854

# See: https://github.com/yt-dlp/yt-dlp/issues/501

2855

if initial_pr:

2856

pr = dict(initial_pr)

2857

pr['streamingData'] = None

prs.append(pr)

last_error = None

tried_iframe_fallback = False

2862

player_url = None

2863

while clients:

2864

client = clients.pop()

2865

player_ytcfg = master_ytcfg if client == 'web' else {}

2866

if 'configs' not in self._configuration_arg('player_skip'):

2867

player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg

2868

2869

player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)

2870

require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')

2871

if 'js' in self._configuration_arg('player_skip'):

2872

require_js_player = False

2873

player_url = None

2874

2875

if not player_url and not tried_iframe_fallback and require_js_player:

2876

player_url = self._download_player_url(video_id)

2877

tried_iframe_fallback = True

2878

2879

try:

2880

pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(

2881

client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)

2882

except ExtractorError as e:

2883

if last_error:

2884

self.report_warning(last_error)

last_error = e

continue

if pr:

prs.append(pr)

# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in

2892

if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:

2893

append_client(client.replace('_agegate', '_creator'))

2894

elif self._is_agegated(pr):

2895

append_client(f'{client}_agegate')

if last_error:

if not len(prs):

raise last_error

self.report_warning(last_error)

2901

return prs, player_url

2902

2903

def _extract_formats(self, streaming_data, video_id, player_url, is_live):

2904

itags, stream_ids = {}, []

2905

itag_qualities, res_qualities = {}, {}

2906

q = qualities([

2907

# Normally tiny is the smallest video-only formats. But

2908

# audio-only formats with unknown quality may get tagged as tiny

2909

'tiny',

2910

'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

2911

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

2912

])

2913

streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])

2914

2915

for fmt in streaming_formats:

2916

if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):

2917

continue

2918

2919

itag = str_or_none(fmt.get('itag'))

2920

audio_track = fmt.get('audioTrack') or {}

2921

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

2922

if stream_id in stream_ids:

2923

continue

2924

2925

quality = fmt.get('quality')

2926

height = int_or_none(fmt.get('height'))

2927

if quality == 'tiny' or not quality:

2928

quality = fmt.get('audioQuality', '').lower() or quality

2929

# The 3gp format (17) in android client has a quality of "small",

2930

# but is actually worse than other formats

if itag == '17':

quality = 'tiny'

if quality:

if itag:

itag_qualities[itag] = quality

2936

if height:

2937

res_qualities[height] = quality

2938

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

2939

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

2940

# number of fragment that would subsequently requested with (`&sq=N`)

2941

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

2942

continue

2943

2944

fmt_url = fmt.get('url')

2945

if not fmt_url:

2946

sc = compat_parse_qs(fmt.get('signatureCipher'))

2947

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

2948

encrypted_sig = try_get(sc, lambda x: x['s'][0])

2949

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

2954

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

2955

fmt_url += '&' + sp + '=' + signature

2956

2957

query = parse_qs(fmt_url)

throttled = False

if query.get('n'):

try:

fmt_url = update_url_query(fmt_url, {

2962

'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})

2963

except ExtractorError as e:

2964

self.report_warning(

2965

f'nsig extraction failed: You may experience throttling for some formats\n'

2966

f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)

throttled = True

if itag:

itags[itag] = 'https'

2971

stream_ids.append(stream_id)

2972

2973

tbr = float_or_none(

2974

fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

2975

dct = {

2976

'asr': int_or_none(fmt.get('audioSampleRate')),

2977

'filesize': int_or_none(fmt.get('contentLength')),

2978

'format_id': itag,

2979

'format_note': join_nonempty(

2980

'%s%s' % (audio_track.get('displayName') or '',

2981

' (default)' if audio_track.get('audioIsDefault') else ''),

2982

fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),

2983

throttled and 'THROTTLED', delim=', '),

2984

'source_preference': -10 if throttled else -1,

2985

'fps': int_or_none(fmt.get('fps')) or None,

2986

'height': height,

2987

'quality': q(quality),

2988

'tbr': tbr,

2989

'url': fmt_url,

2990

'width': int_or_none(fmt.get('width')),

2991

'language': audio_track.get('id', '').split('.')[0],

2992

'language_preference': 1 if audio_track.get('audioIsDefault') else -1,

2993

}

2994

mime_mobj = re.match(

2995

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

2996

if mime_mobj:

2997

dct['ext'] = mimetype2ext(mime_mobj.group(1))

2998

dct.update(parse_codecs(mime_mobj.group(2)))

2999

no_audio = dct.get('acodec') == 'none'

3000

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

3006

dct['downloader_options'] = {

3007

# Youtube throttles chunks >~10M

3008

'http_chunk_size': 10485760,

3009

}

3010

if dct.get('ext'):

3011

dct['container'] = dct['ext'] + '_dash'

3012

yield dct

3013

3014

live_from_start = is_live and self.get_param('live_from_start')

3015

skip_manifests = self._configuration_arg('skip')

3016

if not self.get_param('youtube_include_hls_manifest', True):

3017

skip_manifests.append('hls')

3018

get_dash = 'dash' not in skip_manifests and (

3019

not is_live or live_from_start or self._configuration_arg('include_live_dash'))

3020

get_hls = not live_from_start and 'hls' not in skip_manifests

3021

3022

def process_manifest_format(f, proto, itag):

3023

if itag in itags:

3024

if itags[itag] == proto or f'{itag}-{proto}' in itags:

3025

return False

3026

itag = f'{itag}-{proto}'

3027

if itag:

3028

f['format_id'] = itag

3029

itags[itag] = proto

3030

3031

f['quality'] = next((

3032

q(qdict[val])

3033

for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))

3034

if val in qdict), -1)

3035

return True

3036

3037

for sd in streaming_data:

3038

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

3039

if hls_manifest_url:

3040

for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):

3041

if process_manifest_format(f, 'hls', self._search_regex(

3042

r'/itag/(\d+)', f['url'], 'itag', default=None)):

3043

yield f

3044

3045

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

3046

if dash_manifest_url:

3047

for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):

3048

if process_manifest_format(f, 'dash', f['format_id']):

3049

f['filesize'] = int_or_none(self._search_regex(

3050

r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))

3051

if live_from_start:

3052

f['is_from_start'] = True

yield f

def _extract_storyboard(self, player_responses, duration):

3057

spec = get_first(

3058

player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]

3059

if not spec:

3060

return

3061

base_url = spec.pop()

3062

L = len(spec) - 1

3063

for i, args in enumerate(spec):

3064

args = args.split('#')

3065

counts = list(map(int_or_none, args[:5]))

3066

if len(args) != 8 or not all(counts):

3067

self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')

3068

continue

3069

width, height, frame_count, cols, rows = counts

3070

N, sigh = args[6:]

3071

3072

url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'

3073

fragment_count = frame_count / (cols * rows)

3074

fragment_duration = duration / fragment_count

3075

yield {

3076

'format_id': f'sb{i}',

3077

'format_note': 'storyboard',

'ext': 'mhtml',

'protocol': 'mhtml',

'acodec': 'none',

'vcodec': 'none',

'url': url,

'width': width,

'height': height,

'fragments': [{

'path': url.replace('$M', str(j)),

3087

'duration': min(fragment_duration, duration - (j * fragment_duration)),

3088

} for j in range(math.ceil(fragment_count))],

3089

}

3090

3091

def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):

3092

webpage = None

3093

if 'webpage' not in self._configuration_arg('player_skip'):

3094

webpage = self._download_webpage(

3095

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

3096

3097

master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

3098

3099

player_responses, player_url = self._extract_player_responses(

3100

self._get_requested_clients(url, smuggled_data),

3101

video_id, webpage, master_ytcfg)

3102

3103

return webpage, master_ytcfg, player_responses, player_url

3104

3105

def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):

3106

live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))

3107

is_live = get_first(video_details, 'isLive')

3108

if is_live is None:

3109

is_live = get_first(live_broadcast_details, 'isLiveNow')

3110

3111

streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])

3112

formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))

3113

3114

return live_broadcast_details, is_live, streaming_data, formats

3115

3116

def _real_extract(self, url):

3117

url, smuggled_data = unsmuggle_url(url, {})

3118

video_id = self._match_id(url)

3119

3120

base_url = self.http_scheme() + '//www.youtube.com/'

3121

webpage_url = base_url + 'watch?v=' + video_id

3122

3123

webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)

3124

3125

playability_statuses = traverse_obj(

3126

player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])

3127

3128

trailer_video_id = get_first(

3129

playability_statuses,

3130

('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),

3131

expected_type=str)

3132

if trailer_video_id:

3133

return self.url_result(

3134

trailer_video_id, self.ie_key(), trailer_video_id)

3135

3136

search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))

3137

if webpage else (lambda x: None))

3138

3139

video_details = traverse_obj(

3140

player_responses, (..., 'videoDetails'), expected_type=dict, default=[])

3141

microformats = traverse_obj(

3142

player_responses, (..., 'microformat', 'playerMicroformatRenderer'),

3143

expected_type=dict, default=[])

3144

video_title = (

3145

get_first(video_details, 'title')

3146

or self._get_text(microformats, (..., 'title'))

3147

or search_meta(['og:title', 'twitter:title', 'title']))

3148

video_description = get_first(video_details, 'shortDescription')

3149

3150

multifeed_metadata_list = get_first(

3151

player_responses,

3152

('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),

3153

expected_type=str)

3154

if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):

3155

if self.get_param('noplaylist'):

3156

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

else:

entries = []

feed_ids = []

for feed in multifeed_metadata_list.split(','):

3161

# Unquote should take place before split on comma (,) since textual

3162

# fields may contain comma as well (see

3163

# https://github.com/ytdl-org/youtube-dl/issues/8536)

3164

feed_data = compat_parse_qs(

3165

compat_urllib_parse_unquote_plus(feed))

3166

3167

def feed_entry(name):

3168

return try_get(

3169

feed_data, lambda x: x[name][0], compat_str)

3170

3171

feed_id = feed_entry('id')

3172

if not feed_id:

3173

continue

3174

feed_title = feed_entry('title')

3175

title = video_title

3176

if feed_title:

3177

title += ' (%s)' % feed_title

3178

entries.append({

3179

'_type': 'url_transparent',

3180

'ie_key': 'Youtube',

3181

'url': smuggle_url(

3182

'%swatch?v=%s' % (base_url, feed_data['id'][0]),

3183

{'force_singlefeed': True}),

3184

'title': title,

3185

})

3186

feed_ids.append(feed_id)

3187

self.to_screen(

3188

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

3189

% (', '.join(feed_ids), video_id))

3190

return self.playlist_result(

3191

entries, video_id, video_title, video_description)

3192

3193

live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)

3194

3195

if not formats:

3196

if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):

3197

self.report_drm(video_id)

3198

pemr = get_first(

3199

playability_statuses,

3200

('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}

3201

reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')

3202

subreason = clean_html(self._get_text(pemr, 'subreason') or '')

3203

if subreason:

3204

if subreason == 'The uploader has not made this video available in your country.':

3205

countries = get_first(microformats, 'availableCountries')

3206

if not countries:

3207

regions_allowed = search_meta('regionsAllowed')

3208

countries = regions_allowed.split(',') if regions_allowed else None

3209

self.raise_geo_restricted(subreason, countries, metadata_available=True)

3210

reason += f'. {subreason}'

3211

if reason:

3212

self.raise_no_formats(reason, expected=True)

3213

3214

keywords = get_first(video_details, 'keywords', expected_type=list) or []

3215

if not keywords and webpage:

3216

keywords = [

3217

unescapeHTML(m.group('content'))

3218

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

3219

for keyword in keywords:

3220

if keyword.startswith('yt:stretch='):

3221

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

3222

if mobj:

3223

# NB: float is intentional for forcing float division

3224

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

3229

f['stretched_ratio'] = ratio

3230

break

3231

thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))

3232

thumbnail_url = search_meta(['og:image', 'twitter:image'])

3233

if thumbnail_url:

3234

thumbnails.append({

3235

'url': thumbnail_url,

3236

})

3237

original_thumbnails = thumbnails.copy()

3238

3239

# The best resolution thumbnails sometimes does not appear in the webpage

3240

# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340

3241

# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>

3242

thumbnail_names = [

3243

'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',

3244

'hqdefault', 'hq1', 'hq2', 'hq3', '0',

3245

'mqdefault', 'mq1', 'mq2', 'mq3',

3246

'default', '1', '2', '3'

3247

]

3248

n_thumbnail_names = len(thumbnail_names)

3249

thumbnails.extend({

3250

'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(

3251

video_id=video_id, name=name, ext=ext,

3252

webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),

3253

} for name in thumbnail_names for ext in ('webp', 'jpg'))

3254

for thumb in thumbnails:

3255

i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)

3256

thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)

3257

self._remove_duplicate_formats(thumbnails)

3258

self._downloader._sort_thumbnails(original_thumbnails)

3259

3260

category = get_first(microformats, 'category') or search_meta('genre')

3261

channel_id = str_or_none(

3262

get_first(video_details, 'channelId')

3263

or get_first(microformats, 'externalChannelId')

3264

or search_meta('channelId'))

3265

duration = int_or_none(

3266

get_first(video_details, 'lengthSeconds')

3267

or get_first(microformats, 'lengthSeconds')

3268

or parse_duration(search_meta('duration'))) or None

3269

owner_profile_url = get_first(microformats, 'ownerProfileUrl')

3270

3271

live_content = get_first(video_details, 'isLiveContent')

3272

is_upcoming = get_first(video_details, 'isUpcoming')

3273

if is_live is None:

3274

if is_upcoming or live_content is False:

3275

is_live = False

3276

if is_upcoming is None and (live_content or is_live):

3277

is_upcoming = False

3278

live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))

3279

live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))

3280

if not duration and live_end_time and live_start_time:

3281

duration = live_end_time - live_start_time

3282

3283

if is_live and self.get_param('live_from_start'):

3284

self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)

3285

3286

formats.extend(self._extract_storyboard(player_responses, duration))

3287

3288

# Source is given priority since formats that throttle are given lower source_preference

3289

# When throttling issue is fully fixed, remove this

3290

self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))

info = {

'id': video_id,

'title': video_title,

3295

'formats': formats,

3296

'thumbnails': thumbnails,

3297

# The best thumbnail that we are sure exists. Prevents unnecessary

3298

# URL checking if user don't care about getting the best possible thumbnail

3299

'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),

3300

'description': video_description,

3301

'upload_date': unified_strdate(

3302

get_first(microformats, 'uploadDate')

3303

or search_meta('uploadDate')),

3304

'uploader': get_first(video_details, 'author'),

3305

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

3306

'uploader_url': owner_profile_url,

3307

'channel_id': channel_id,

3308

'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,

3309

'duration': duration,

3310

'view_count': int_or_none(

3311

get_first((video_details, microformats), (..., 'viewCount'))

3312

or search_meta('interactionCount')),

3313

'average_rating': float_or_none(get_first(video_details, 'averageRating')),

3314

'age_limit': 18 if (

3315

get_first(microformats, 'isFamilySafe') is False

3316

or search_meta('isFamilyFriendly') == 'false'

3317

or search_meta('og:restrictions:age') == '18+') else 0,

3318

'webpage_url': webpage_url,

3319

'categories': [category] if category else None,

3320

'tags': keywords,

3321

'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),

3322

'is_live': is_live,

3323

'was_live': (False if is_live or is_upcoming or live_content is False

3324

else None if is_live is None or is_upcoming is None

3325

else live_content),

3326

'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL

3327

'release_timestamp': live_start_time,

3328

}

3329

3330

pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)

3331

if pctr:

3332

def get_lang_code(track):

3333

return (remove_start(track.get('vssId') or '', '.').replace('.', '-')

3334

or track.get('languageCode'))

3335

3336

# Converted into dicts to remove duplicates

3337

captions = {

3338

get_lang_code(sub): sub

3339

for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}

3340

translation_languages = {

3341

lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)

3342

for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}

3343

3344

def process_language(container, base_url, lang_code, sub_name, query):

3345

lang_subs = container.setdefault(lang_code, [])

3346

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': update_url_query(base_url, query),

'name': sub_name,

})

subtitles, automatic_captions = {}, {}

3357

for lang_code, caption_track in captions.items():

3358

base_url = caption_track.get('baseUrl')

3359

if not base_url:

3360

continue

3361

lang_name = self._get_text(caption_track, 'name', max_runs=1)

3362

if caption_track.get('kind') != 'asr':

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code, lang_name, {})

3367

if not caption_track.get('isTranslatable'):

3368

continue

3369

for trans_code, trans_name in translation_languages.items():

3370

if not trans_code:

3371

continue

3372

if caption_track.get('kind') != 'asr':

3373

trans_code += f'-{lang_code}'

3374

trans_name += format_field(lang_name, template=' from %s')

3375

process_language(

3376

automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})

3377

info['automatic_captions'] = automatic_captions

3378

info['subtitles'] = subtitles

3379

3380

parsed_url = compat_urllib_parse_urlparse(url)

3381

for component in [parsed_url.fragment, parsed_url.query]:

3382

query = compat_parse_qs(component)

3383

for k, v in query.items():

3384

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

3385

d_k += '_time'

3386

if d_k not in info and k in s_ks:

3387

info[d_k] = parse_duration(query[k][0])

3388

3389

# Youtube Music Auto-generated description

3390

if video_description:

3391

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

3392

if mobj:

3393

release_year = mobj.group('release_year')

3394

release_date = mobj.group('release_date')

3395

if release_date:

3396

release_date = release_date.replace('-', '')

3397

if not release_year:

3398

release_year = release_date[:4]

3399

info.update({

3400

'album': mobj.group('album'.strip()),

3401

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

3402

'track': mobj.group('track').strip(),

3403

'release_date': release_date,

3404

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

3410

webpage, self._YT_INITIAL_DATA_RE, video_id,

3411

'yt initial data')

3412

if not initial_data:

3413

query = {'videoId': video_id}

3414

query.update(self._get_checkok_params())

3415

initial_data = self._extract_response(

3416

item_id=video_id, ep='next', fatal=False,

3417

ytcfg=master_ytcfg, query=query,

3418

headers=self.generate_api_headers(ytcfg=master_ytcfg),

3419

note='Downloading initial data API JSON')

3420

3421

try:

3422

# This will error if there is no livechat

3423

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

3424

info.setdefault('subtitles', {})['live_chat'] = [{

3425

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

3426

'video_id': video_id,

3427

'ext': 'json',

3428

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

3429

}]

3430

except (KeyError, IndexError, TypeError):

pass

if initial_data:

info['chapters'] = (

self._extract_chapters_from_json(initial_data, duration)

3436

or self._extract_chapters_from_engagement_panel(initial_data, duration)

or None)

contents = try_get(

initial_data,

lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],

3442

list) or []

3443

for content in contents:

3444

vpir = content.get('videoPrimaryInfoRenderer')

3445

if vpir:

3446

stl = vpir.get('superTitleLink')

3447

if stl:

3448

stl = self._get_text(stl)

3449

if try_get(

3450

vpir,

3451

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

3452

info['location'] = stl

3453

else:

3454

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

3455

if mobj:

3456

info.update({

3457

'series': mobj.group(1),

3458

'season_number': int(mobj.group(2)),

3459

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

3464

list) or []):

3465

tbr = tlb.get('toggleButtonRenderer') or {}

3466

for getter, regex in [(

3467

lambda x: x['defaultText']['accessibility']['accessibilityData'],

3468

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

3469

lambda x: x['accessibility'],

3470

lambda x: x['accessibilityData']['accessibilityData'],

3471

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

3472

label = (try_get(tbr, getter, dict) or {}).get('label')

3473

if label:

3474

mobj = re.match(regex, label)

3475

if mobj:

3476

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

3477

break

3478

sbr_tooltip = try_get(

3479

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

3480

if sbr_tooltip:

3481

like_count, dislike_count = sbr_tooltip.split(' / ')

3482

info.update({

3483

'like_count': str_to_int(like_count),

3484

'dislike_count': str_to_int(dislike_count),

3485

})

3486

vsir = content.get('videoSecondaryInfoRenderer')

3487

if vsir:

3488

info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))

3489

rows = try_get(

3490

vsir,

3491

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

3492

list) or []

3493

multiple_songs = False

3494

for row in rows:

3495

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

3496

multiple_songs = True

3497

break

3498

for row in rows:

3499

mrr = row.get('metadataRowRenderer') or {}

3500

mrr_title = mrr.get('title')

3501

if not mrr_title:

3502

continue

3503

mrr_title = self._get_text(mrr, 'title')

3504

mrr_contents_text = self._get_text(mrr, ('contents', 0))

3505

if mrr_title == 'License':

3506

info['license'] = mrr_contents_text

3507

elif not multiple_songs:

3508

if mrr_title == 'Album':

3509

info['album'] = mrr_contents_text

3510

elif mrr_title == 'Artist':

3511

info['artist'] = mrr_contents_text

3512

elif mrr_title == 'Song':

3513

info['track'] = mrr_contents_text

3514

3515

fallbacks = {

3516

'channel': 'uploader',

3517

'channel_id': 'uploader_id',

3518

'channel_url': 'uploader_url',

3519

}

3520

for to, frm in fallbacks.items():

3521

if not info.get(to):

3522

info[to] = info.get(frm)

3523

3524

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = get_first(video_details, 'isPrivate', expected_type=bool)

3530

is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)

3531

is_membersonly = None

3532

is_premium = None

3533

if initial_data and is_private is not None:

3534

is_membersonly = False

3535

is_premium = False

3536

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

3537

badge_labels = set()

3538

for content in contents:

3539

if not isinstance(content, dict):

3540

continue

3541

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

3542

for badge_label in badge_labels:

3543

if badge_label.lower() == 'members only':

3544

is_membersonly = True

3545

elif badge_label.lower() == 'premium':

3546

is_premium = True

3547

elif badge_label.lower() == 'unlisted':

3548

is_unlisted = True

3549

3550

info['availability'] = self._availability(

3551

is_private=is_private,

3552

needs_premium=is_premium,

3553

needs_subscription=is_membersonly,

3554

needs_auth=info['age_limit'] >= 18,

3555

is_unlisted=None if is_private is None else is_unlisted)

3556

3557

info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)

3558

3559

self.mark_watched(video_id, player_responses)

return info

class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):

3565

3566

def _extract_channel_id(self, webpage):

3567

channel_id = self._html_search_meta(

3568

'channelId', webpage, 'channel id', default=None)

3569

if channel_id:

3570

return channel_id

3571

channel_url = self._html_search_meta(

3572

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3573

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3574

'twitter:app:url:googleplay'), webpage, 'channel url')

3575

return self._search_regex(

3576

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3577

channel_url, 'channel id')

3578

3579

@staticmethod

3580

def _extract_basic_item_renderer(item):

3581

# Modified from _extract_grid_item_renderer

3582

known_basic_renderers = (

3583

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'

3584

)

3585

for key, renderer in item.items():

3586

if not isinstance(renderer, dict):

3587

continue

3588

elif key in known_basic_renderers:

3589

return renderer

3590

elif key.startswith('grid') and key.endswith('Renderer'):

3591

return renderer

3592

3593

def _grid_entries(self, grid_renderer):

3594

for item in grid_renderer['items']:

3595

if not isinstance(item, dict):

3596

continue

3597

renderer = self._extract_basic_item_renderer(item)

3598

if not isinstance(renderer, dict):

3599

continue

3600

title = self._get_text(renderer, 'title')

3601

3602

# playlist

3603

playlist_id = renderer.get('playlistId')

3604

if playlist_id:

3605

yield self.url_result(

3606

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3607

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3612

if video_id:

3613

yield self._extract_video(renderer)

3614

continue

3615

# channel

3616

channel_id = renderer.get('channelId')

3617

if channel_id:

3618

yield self.url_result(

3619

'https://www.youtube.com/channel/%s' % channel_id,

3620

ie=YoutubeTabIE.ie_key(), video_title=title)

3621

continue

3622

# generic endpoint URL support

3623

ep_url = urljoin('https://www.youtube.com/', try_get(

3624

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3625

compat_str))

3626

if ep_url:

3627

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3628

if ie.suitable(ep_url):

3629

yield self.url_result(

3630

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3631

break

3632

3633

def _shelf_entries_from_content(self, shelf_renderer):

3634

content = shelf_renderer.get('content')

3635

if not isinstance(content, dict):

3636

return

3637

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3638

if renderer:

3639

# TODO: add support for nested playlists so each shelf is processed

3640

# as separate playlist

3641

# TODO: this includes only first N items

3642

for entry in self._grid_entries(renderer):

3643

yield entry

3644

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3650

ep = try_get(

3651

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3652

compat_str)

3653

shelf_url = urljoin('https://www.youtube.com', ep)

3654

if shelf_url:

3655

# Skipping links to another channels, note that checking for

3656

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3657

# will not work

3658

if skip_channels and '/channels?' in shelf_url:

3659

return

3660

title = self._get_text(shelf_renderer, 'title')

3661

yield self.url_result(shelf_url, video_title=title)

3662

# Shelf may not contain shelf URL, fallback to extraction from content

3663

for entry in self._shelf_entries_from_content(shelf_renderer):

3664

yield entry

3665

3666

def _playlist_entries(self, video_list_renderer):

3667

for content in video_list_renderer['contents']:

3668

if not isinstance(content, dict):

3669

continue

3670

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3671

if not isinstance(renderer, dict):

3672

continue

3673

video_id = renderer.get('videoId')

3674

if not video_id:

3675

continue

3676

yield self._extract_video(renderer)

3677

3678

def _rich_entries(self, rich_grid_renderer):

3679

renderer = try_get(

3680

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3681

video_id = renderer.get('videoId')

3682

if not video_id:

3683

return

3684

yield self._extract_video(renderer)

3685

3686

def _video_entry(self, video_renderer):

3687

video_id = video_renderer.get('videoId')

3688

if video_id:

3689

return self._extract_video(video_renderer)

3690

3691

def _post_thread_entries(self, post_thread_renderer):

3692

post_renderer = try_get(

3693

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3694

if not post_renderer:

3695

return

3696

# video attachment

3697

video_renderer = try_get(

3698

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3699

video_id = video_renderer.get('videoId')

3700

if video_id:

3701

entry = self._extract_video(video_renderer)

3702

if entry:

3703

yield entry

3704

# playlist attachment

3705

playlist_id = try_get(

3706

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3707

if playlist_id:

3708

yield self.url_result(

3709

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3710

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3711

# inline video links

3712

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3713

for run in runs:

3714

if not isinstance(run, dict):

3715

continue

3716

ep_url = try_get(

3717

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3718

if not ep_url:

3719

continue

3720

if not YoutubeIE.suitable(ep_url):

3721

continue

3722

ep_video_id = YoutubeIE._match_id(ep_url)

3723

if video_id == ep_video_id:

3724

continue

3725

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3726

3727

def _post_thread_continuation_entries(self, post_thread_continuation):

3728

contents = post_thread_continuation.get('contents')

3729

if not isinstance(contents, list):

3730

return

3731

for content in contents:

3732

renderer = content.get('backstagePostThreadRenderer')

3733

if not isinstance(renderer, dict):

3734

continue

3735

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3740

for content in contents:

3741

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3742

if video_renderer:

3743

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _extract_entries(self, parent_renderer, continuation_list):

3748

# continuation_list is modified in-place with continuation_list = [continuation_token]

3749

continuation_list[:] = [None]

3750

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3751

for content in contents:

3752

if not isinstance(content, dict):

3753

continue

3754

is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)

3755

if not is_renderer:

3756

renderer = content.get('richItemRenderer')

3757

if renderer:

3758

for entry in self._rich_entries(renderer):

3759

yield entry

3760

continuation_list[0] = self._extract_continuation(parent_renderer)

3761

continue

3762

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3763

for isr_content in isr_contents:

3764

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3769

'gridRenderer': self._grid_entries,

3770

'shelfRenderer': lambda x: self._shelf_entries(x),

3771

'backstagePostThreadRenderer': self._post_thread_entries,

3772

'videoRenderer': lambda x: [self._video_entry(x)],

3773

'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),

3774

'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),

3775

}

3776

for key, renderer in isr_content.items():

3777

if key not in known_renderers:

3778

continue

3779

for entry in known_renderers[key](renderer):

3780

if entry:

3781

yield entry

3782

continuation_list[0] = self._extract_continuation(renderer)

3783

break

3784

3785

if not continuation_list[0]:

3786

continuation_list[0] = self._extract_continuation(is_renderer)

3787

3788

if not continuation_list[0]:

3789

continuation_list[0] = self._extract_continuation(parent_renderer)

3790

3791

def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):

3792

continuation_list = [None]

3793

extract_entries = lambda x: self._extract_entries(x, continuation_list)

3794

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3799

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3800

for entry in extract_entries(parent_renderer):

3801

yield entry

3802

continuation = continuation_list[0]

3803

3804

for page_num in itertools.count(1):

3805

if not continuation:

3806

break

3807

headers = self.generate_api_headers(

3808

ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)

3809

response = self._extract_response(

3810

item_id='%s page %s' % (item_id, page_num),

3811

query=continuation, headers=headers, ytcfg=ytcfg,

3812

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases

3817

# See: https://github.com/ytdl-org/youtube-dl/issues/28702

3818

visitor_data = self._extract_visitor_data(response) or visitor_data

3819

3820

known_continuation_renderers = {

3821

'playlistVideoListContinuation': self._playlist_entries,

3822

'gridContinuation': self._grid_entries,

3823

'itemSectionContinuation': self._post_thread_continuation_entries,

3824

'sectionListContinuation': extract_entries, # for feeds

3825

}

3826

continuation_contents = try_get(

3827

response, lambda x: x['continuationContents'], dict) or {}

3828

continuation_renderer = None

3829

for key, value in continuation_contents.items():

3830

if key not in known_continuation_renderers:

3831

continue

3832

continuation_renderer = value

3833

continuation_list = [None]

3834

for entry in known_continuation_renderers[key](continuation_renderer):

3835

yield entry

3836

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

3837

break

3838

if continuation_renderer:

continue

known_renderers = {

'gridPlaylistRenderer': (self._grid_entries, 'items'),

3843

'gridVideoRenderer': (self._grid_entries, 'items'),

3844

'gridChannelRenderer': (self._grid_entries, 'items'),

3845

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

3846

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

3847

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

3848

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

3849

}

3850

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

3851

continuation_items = try_get(

3852

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

3853

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

3854

video_items_renderer = None

3855

for key, value in continuation_item.items():

3856

if key not in known_renderers:

3857

continue

3858

video_items_renderer = {known_renderers[key][1]: continuation_items}

3859

continuation_list = [None]

3860

for entry in known_renderers[key][0](video_items_renderer):

3861

yield entry

3862

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

3863

break

3864

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3870

for tab in tabs:

3871

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

3872

if renderer.get('selected') is True:

3873

return renderer

3874

else:

3875

raise ExtractorError('Unable to find selected tab')

3876

3877

@classmethod

3878

def _extract_uploader(cls, data):

3879

uploader = {}

3880

renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

3881

owner = try_get(

3882

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3883

if owner:

3884

uploader['uploader'] = owner.get('text')

3885

uploader['uploader_id'] = try_get(

3886

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3887

uploader['uploader_url'] = urljoin(

3888

'https://www.youtube.com/',

3889

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3890

return {k: v for k, v in uploader.items() if v is not None}

3891

3892

def _extract_from_tabs(self, item_id, ytcfg, data, tabs):

3893

playlist_id = title = description = channel_url = channel_name = channel_id = None

3894

tags = []

3895

3896

selected_tab = self._extract_selected_tab(tabs)

3897

primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

3898

renderer = try_get(

3899

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3900

if renderer:

3901

channel_name = renderer.get('title')

3902

channel_url = renderer.get('channelUrl')

3903

channel_id = renderer.get('externalId')

3904

else:

3905

renderer = try_get(

3906

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3907

3908

if renderer:

3909

title = renderer.get('title')

3910

description = renderer.get('description', '')

3911

playlist_id = channel_id

3912

tags = renderer.get('keywords', '').split()

3913

3914

thumbnails = (

3915

self._extract_thumbnails(renderer, 'avatar')

3916

or self._extract_thumbnails(

3917

primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))

3918

3919

if playlist_id is None:

3920

playlist_id = item_id

3921

3922

playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')

3923

last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)

3924

if title is None:

3925

title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id

3926

title += format_field(selected_tab, 'title', ' - %s')

3927

title += format_field(selected_tab, 'expandedText', ' - %s')

3928

3929

metadata = {

3930

'playlist_id': playlist_id,

3931

'playlist_title': title,

3932

'playlist_description': description,

3933

'uploader': channel_name,

3934

'uploader_id': channel_id,

3935

'uploader_url': channel_url,

3936

'thumbnails': thumbnails,

3937

'tags': tags,

3938

'view_count': self._get_count(playlist_stats, 1),

3939

'availability': self._extract_availability(data),

3940

'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),

3941

'playlist_count': self._get_count(playlist_stats, 0)

3942

}

3943

if not channel_id:

3944

metadata.update(self._extract_uploader(data))

3945

metadata.update({

3946

'channel': metadata['uploader'],

3947

'channel_id': metadata['uploader_id'],

3948

'channel_url': metadata['uploader_url']})

3949

return self.playlist_result(

3950

self._entries(

3951

selected_tab, playlist_id, ytcfg,

3952

self._extract_account_syncid(ytcfg, data),

3953

self._extract_visitor_data(data, ytcfg)),

3954

**metadata)

3955

3956

def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):

3957

first_id = last_id = response = None

3958

for page_num in itertools.count(1):

3959

videos = list(self._playlist_entries(playlist))

3960

if not videos:

3961

return

3962

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

3963

if start >= len(videos):

3964

return

3965

for video in videos[start:]:

3966

if video['id'] == first_id:

3967

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

3968

return

3969

yield video

3970

first_id = first_id or videos[0]['id']

3971

last_id = videos[-1]['id']

3972

watch_endpoint = try_get(

3973

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

3974

headers = self.generate_api_headers(

3975

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

3976

visitor_data=self._extract_visitor_data(response, data, ytcfg))

3977

query = {

3978

'playlistId': playlist_id,

3979

'videoId': watch_endpoint.get('videoId') or last_id,

3980

'index': watch_endpoint.get('index') or len(videos),

3981

'params': watch_endpoint.get('params') or 'OAE%3D'

3982

}

3983

response = self._extract_response(

3984

item_id='%s page %d' % (playlist_id, page_num),

3985

query=query, ep='next', headers=headers, ytcfg=ytcfg,

3986

check_get_keys='contents'

3987

)

3988

playlist = try_get(

3989

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

3990

3991

def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):

3992

title = playlist.get('title') or try_get(

3993

data, lambda x: x['titleText']['simpleText'], compat_str)

3994

playlist_id = playlist.get('playlistId') or item_id

3995

3996

# Delegating everything except mix playlists to regular tab-based playlist URL

3997

playlist_url = urljoin(url, try_get(

3998

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3999

compat_str))

4000

if playlist_url and playlist_url != url:

4001

return self.url_result(

4002

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

4003

video_title=title)

4004

4005

return self.playlist_result(

4006

self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),

4007

playlist_id=playlist_id, playlist_title=title)

4008

4009

def _extract_availability(self, data):

4010

"""

4011

Gets the availability of a given playlist/tab.

4012

Note: Unless YouTube tells us explicitly, we do not assume it is public

4013

@param data: response

4014

"""

4015

is_private = is_unlisted = None

4016

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

4017

badge_labels = self._extract_badges(renderer)

4018

4019

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

4020

privacy_dropdown_entries = try_get(

4021

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

4022

for renderer_dict in privacy_dropdown_entries:

4023

is_selected = try_get(

4024

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

4025

if not is_selected:

4026

continue

4027

label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))

4028

if label:

4029

badge_labels.add(label.lower())

4030

break

4031

4032

for badge_label in badge_labels:

4033

if badge_label == 'unlisted':

4034

is_unlisted = True

4035

elif badge_label == 'private':

4036

is_private = True

4037

elif badge_label == 'public':

4038

is_unlisted = is_private = False

4039

return self._availability(is_private, False, False, False, is_unlisted)

4040

4041

@staticmethod

4042

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

4043

sidebar_renderer = try_get(

4044

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

4045

for item in sidebar_renderer:

4046

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, ytcfg):

4051

"""

4052

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

4053

"""

4054

browse_id = params = None

4055

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

4056

if not renderer:

4057

return

4058

menu_renderer = try_get(

4059

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

4060

for menu_item in menu_renderer:

4061

if not isinstance(menu_item, dict):

4062

continue

4063

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

4064

text = try_get(

4065

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

4066

if not text or text.lower() != 'show unavailable videos':

4067

continue

4068

browse_endpoint = try_get(

4069

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4070

browse_id = browse_endpoint.get('browseId')

4071

params = browse_endpoint.get('params')

4072

break

4073

4074

headers = self.generate_api_headers(

4075

ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),

4076

visitor_data=self._extract_visitor_data(data, ytcfg))

4077

query = {

4078

'params': params or 'wgYCCAA=',

4079

'browseId': browse_id or 'VL%s' % item_id

4080

}

4081

return self._extract_response(

4082

item_id=item_id, headers=headers, query=query,

4083

check_get_keys='contents', fatal=False, ytcfg=ytcfg,

4084

note='Downloading API JSON with unavailable videos')

4085

4086

def _extract_webpage(self, url, item_id, fatal=True):

4087

retries = self.get_param('extractor_retries', 3)

4088

count = -1

4089

webpage = data = last_error = None

4090

while count < retries:

4091

count += 1

4092

# Sometimes youtube returns a webpage with incomplete ytInitialData

4093

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4094

if last_error:

4095

self.report_warning('%s. Retrying ...' % last_error)

4096

try:

4097

webpage = self._download_webpage(

4098

url, item_id,

4099

note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))

4100

data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}

4101

except ExtractorError as e:

4102

if isinstance(e.cause, network_exceptions):

4103

if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):

4104

last_error = error_to_compat_str(e.cause or e.msg)

if count < retries:

continue

if fatal:

raise

self.report_warning(error_to_compat_str(e))

break

else:

try:

self._extract_and_report_alerts(data)

4114

except ExtractorError as e:

4115

if fatal:

4116

raise

4117

self.report_warning(error_to_compat_str(e))

4118

break

4119

4120

if dict_get(data, ('contents', 'currentVideoEndpoint')):

4121

break

4122

4123

last_error = 'Incomplete yt initial data received'

4124

if count >= retries:

4125

if fatal:

4126

raise ExtractorError(last_error)

4127

self.report_warning(last_error)

break

return webpage, data

def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):

4133

data = None

4134

if 'webpage' not in self._configuration_arg('skip'):

4135

webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)

4136

ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)

4137

if not data:

4138

if not ytcfg and self.is_authenticated:

4139

msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

4140

if 'authcheck' not in self._configuration_arg('skip') and fatal:

4141

raise ExtractorError(

4142

msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'

4143

' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',

4144

expected=True)

4145

self.report_warning(msg, only_once=True)

4146

data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)

4147

return data, ytcfg

4148

4149

def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):

4150

headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)

4151

resolve_response = self._extract_response(

4152

item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,

4153

ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)

4154

endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}

4155

for ep_key, ep in endpoints.items():

4156

params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)

4157

if params:

4158

return self._extract_response(

4159

item_id=item_id, query=params, ep=ep, headers=headers,

4160

ytcfg=ytcfg, fatal=fatal, default_client=default_client,

4161

check_get_keys=('contents', 'currentVideoEndpoint'))

4162

err_note = 'Failed to resolve url (does the playlist exist?)'

4163

if fatal:

4164

raise ExtractorError(err_note, expected=True)

4165

self.report_warning(err_note, item_id)

4166

4167

@staticmethod

4168

def _smuggle_data(entries, data):

4169

for entry in entries:

4170

if data:

4171

entry['url'] = smuggle_url(entry['url'], data)

4172

yield entry

4173

4174

_SEARCH_PARAMS = None

4175

4176

def _search_results(self, query, params=NO_DEFAULT):

4177

data = {'query': query}

4178

if params is NO_DEFAULT:

4179

params = self._SEARCH_PARAMS

4180

if params:

4181

data['params'] = params

4182

continuation_list = [None]

4183

for page_num in itertools.count(1):

4184

data.update(continuation_list[0] or {})

4185

search = self._extract_response(

4186

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4187

check_get_keys=('contents', 'onResponseReceivedCommands'))

4188

slr_contents = try_get(

4189

search,

4190

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

4191

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

4192

list)

4193

yield from self._extract_entries({'contents': slr_contents}, continuation_list)

4194

if not continuation_list[0]:

break

class YoutubeTabIE(YoutubeTabBaseInfoExtractor):

4199

IE_DESC = 'YouTube Tabs'

4200

_VALID_URL = r'''(?x:

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

%(invidious)s

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

4209

(?P<not_channel>

4210

feed/|hashtag/|

4211

(?:playlist|watch)\?.*?\blist=

4212

)|

4213

(?!(?:%(reserved_names)s)\b) # Direct URLs

)

(?P<id>[^/?\#&]+)

)''' % {

'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,

4218

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4219

}

4220

IE_NAME = 'youtube:tab'

4221

4222

_TESTS = [{

4223

'note': 'playlists, multipage',

4224

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

4225

'playlist_mincount': 94,

4226

'info_dict': {

4227

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4228

'title': 'Igor Kleiner - Playlists',

4229

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4230

'uploader': 'Igor Kleiner',

4231

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4232

'channel': 'Igor Kleiner',

4233

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4234

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4235

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4236

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4237

},

4238

}, {

4239

'note': 'playlists, multipage, different order',

4240

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

4241

'playlist_mincount': 94,

4242

'info_dict': {

4243

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

4244

'title': 'Igor Kleiner - Playlists',

4245

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

4246

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4247

'uploader': 'Igor Kleiner',

4248

'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4249

'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],

4250

'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',

4251

'channel': 'Igor Kleiner',

4252

'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',

4253

},

4254

}, {

4255

'note': 'playlists, series',

4256

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

4257

'playlist_mincount': 5,

4258

'info_dict': {

4259

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4260

'title': '3Blue1Brown - Playlists',

4261

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4262

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4263

'uploader': '3Blue1Brown',

4264

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4265

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4266

'channel': '3Blue1Brown',

4267

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4268

'tags': ['Mathematics'],

4269

},

4270

}, {

4271

'note': 'playlists, singlepage',

4272

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

4273

'playlist_mincount': 4,

4274

'info_dict': {

4275

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4276

'title': 'ThirstForScience - Playlists',

4277

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

4278

'uploader': 'ThirstForScience',

4279

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4280

'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4281

'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',

4282

'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

4283

'tags': 'count:13',

4284

'channel': 'ThirstForScience',

4285

}

4286

}, {

4287

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

4288

'only_matching': True,

4289

}, {

4290

'note': 'basic, single video playlist',

4291

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4292

'info_dict': {

4293

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4294

'uploader': 'Sergey M.',

4295

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4296

'title': 'youtube-dl public playlist',

'description': '',

'tags': [],

'view_count': int,

'modified_date': '20201130',

4301

'channel': 'Sergey M.',

4302

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4303

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4304

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

4309

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4310

'info_dict': {

4311

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4312

'uploader': 'Sergey M.',

4313

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

4314

'title': 'youtube-dl empty playlist',

4315

'tags': [],

4316

'channel': 'Sergey M.',

4317

'description': '',

4318

'modified_date': '20160902',

4319

'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

4320

'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4321

'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

4327

'info_dict': {

4328

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4329

'title': 'lex will - Home',

4330

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4331

'uploader': 'lex will',

4332

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4333

'channel': 'lex will',

4334

'tags': ['bible', 'history', 'prophesy'],

4335

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4336

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4337

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4338

},

4339

'playlist_mincount': 2,

4340

}, {

4341

'note': 'Videos tab',

4342

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

4343

'info_dict': {

4344

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4345

'title': 'lex will - Videos',

4346

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4347

'uploader': 'lex will',

4348

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4349

'tags': ['bible', 'history', 'prophesy'],

4350

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4351

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4352

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4353

'channel': 'lex will',

4354

},

4355

'playlist_mincount': 975,

4356

}, {

4357

'note': 'Videos tab, sorted by popular',

4358

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

4359

'info_dict': {

4360

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4361

'title': 'lex will - Videos',

4362

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4363

'uploader': 'lex will',

4364

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4365

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4366

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4367

'channel': 'lex will',

4368

'tags': ['bible', 'history', 'prophesy'],

4369

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4370

},

4371

'playlist_mincount': 199,

4372

}, {

4373

'note': 'Playlists tab',

4374

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

4375

'info_dict': {

4376

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4377

'title': 'lex will - Playlists',

4378

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4379

'uploader': 'lex will',

4380

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4381

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4382

'channel': 'lex will',

4383

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4384

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4385

'tags': ['bible', 'history', 'prophesy'],

4386

},

4387

'playlist_mincount': 17,

4388

}, {

4389

'note': 'Community tab',

4390

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

4391

'info_dict': {

4392

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4393

'title': 'lex will - Community',

4394

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4395

'uploader': 'lex will',

4396

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4397

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4398

'channel': 'lex will',

4399

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4400

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4401

'tags': ['bible', 'history', 'prophesy'],

4402

},

4403

'playlist_mincount': 18,

4404

}, {

4405

'note': 'Channels tab',

4406

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

4407

'info_dict': {

4408

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4409

'title': 'lex will - Channels',

4410

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

4411

'uploader': 'lex will',

4412

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4413

'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4414

'channel': 'lex will',

4415

'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

4416

'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

4417

'tags': ['bible', 'history', 'prophesy'],

4418

},

4419

'playlist_mincount': 12,

4420

}, {

4421

'note': 'Search tab',

4422

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

4423

'playlist_mincount': 40,

4424

'info_dict': {

4425

'id': 'UCYO_jab_esuFRV4b17AJtAw',

4426

'title': '3Blue1Brown - Search - linear algebra',

4427

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

4428

'uploader': '3Blue1Brown',

4429

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

4430

'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4431

'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',

4432

'tags': ['Mathematics'],

4433

'channel': '3Blue1Brown',

4434

'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',

4435

},

4436

}, {

4437

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4438

'only_matching': True,

4439

}, {

4440

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4441

'only_matching': True,

4442

}, {

4443

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

4444

'only_matching': True,

4445

}, {

4446

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

4447

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4448

'info_dict': {

4449

'title': '29C3: Not my department',

4450

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

4451

'uploader': 'Christiaan008',

4452

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4453

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

4454

'tags': [],

4455

'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4456

'view_count': int,

4457

'modified_date': '20150605',

4458

'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

4459

'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',

4460

'channel': 'Christiaan008',

4461

},

4462

'playlist_count': 96,

4463

}, {

4464

'note': 'Large playlist',

4465

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

4466

'info_dict': {

4467

'title': 'Uploads from Cauchemar',

4468

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

4469

'uploader': 'Cauchemar',

4470

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4471

'channel_url': 'https://www.youtube.com/c/Cauchemar89',

4472

'tags': [],

4473

'modified_date': r're:\d{8}',

4474

'channel': 'Cauchemar',

4475

'uploader_url': 'https://www.youtube.com/c/Cauchemar89',

4476

'view_count': int,

4477

'description': '',

4478

'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

4479

},

4480

'playlist_mincount': 1123,

4481

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4482

}, {

4483

'note': 'even larger playlist, 8832 videos',

4484

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

4485

'only_matching': True,

4486

}, {

4487

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

4488

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

4489

'info_dict': {

4490

'title': 'Uploads from Interstellar Movie',

4491

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

4492

'uploader': 'Interstellar Movie',

4493

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4494

'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',

4495

'tags': [],

4496

'view_count': int,

4497

'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

4498

'channel_url': 'https://www.youtube.com/c/InterstellarMovie',

4499

'channel': 'Interstellar Movie',

4500

'description': '',

4501

'modified_date': r're:\d{8}',

4502

},

4503

'playlist_mincount': 21,

4504

}, {

4505

'note': 'Playlist with "show unavailable videos" button',

4506

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

4507

'info_dict': {

4508

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

4509

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

4510

'uploader': 'Phim Siêu Nhân Nhật Bản',

4511

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4512

'view_count': int,

4513

'channel': 'Phim Siêu Nhân Nhật Bản',

4514

'tags': [],

4515

'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4516

'description': '',

4517

'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',

4518

'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

4519

'modified_date': r're:\d{8}',

4520

},

4521

'playlist_mincount': 200,

4522

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4523

}, {

4524

'note': 'Playlist with unavailable videos in page 7',

4525

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

4526

'info_dict': {

4527

'title': 'Uploads from BlankTV',

4528

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

4529

'uploader': 'BlankTV',

4530

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4531

'channel': 'BlankTV',

4532

'channel_url': 'https://www.youtube.com/c/blanktv',

4533

'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',

4534

'view_count': int,

4535

'tags': [],

4536

'uploader_url': 'https://www.youtube.com/c/blanktv',

4537

'modified_date': r're:\d{8}',

4538

'description': '',

4539

},

4540

'playlist_mincount': 1000,

4541

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4542

}, {

4543

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

4544

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4545

'info_dict': {

4546

'title': 'Data Analysis with Dr Mike Pound',

4547

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

4548

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4549

'uploader': 'Computerphile',

4550

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

4551

'uploader_url': 'https://www.youtube.com/user/Computerphile',

4552

'tags': [],

4553

'view_count': int,

4554

'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',

4555

'channel_url': 'https://www.youtube.com/user/Computerphile',

4556

'channel': 'Computerphile',

4557

},

4558

'playlist_mincount': 11,

4559

}, {

4560

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

4561

'only_matching': True,

4562

}, {

4563

'note': 'Playlist URL that does not actually serve a playlist',

4564

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

4569

'uploader': 'STREEM',

4570

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

4571

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

4572

'upload_date': '20150526',

4573

'license': 'Standard YouTube License',

4574

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

4575

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

},

'params': {

'skip_download': True,

4582

},

4583

'skip': 'This video is not available.',

4584

'add_ie': [YoutubeIE.ie_key()],

4585

}, {

4586

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

4587

'only_matching': True,

4588

}, {

4589

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

4590

'only_matching': True,

4591

}, {

4592

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

4593

'info_dict': {

4594

'id': 'zpsbVPFwsqk', # This will keep changing

4595

'ext': 'mp4',

4596

'title': str,

4597

'uploader': 'Sky News',

4598

'uploader_id': 'skynews',

4599

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

4600

'upload_date': r're:\d{8}',

4601

'description': str,

4602

'categories': ['News & Politics'],

4603

'tags': list,

4604

'like_count': int,

4605

'release_timestamp': 1640164857,

4606

'channel': 'Sky News',

4607

'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',

4608

'age_limit': 0,

4609

'view_count': int,

4610

'thumbnail': 'https://i.ytimg.com/vi/zpsbVPFwsqk/maxresdefault_live.jpg',

4611

'playable_in_embed': True,

4612

'release_date': '20211222',

4613

'availability': 'public',

4614

'live_status': 'is_live',

4615

'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',

4616

},

4617

'params': {

4618

'skip_download': True,

4619

},

4620

'expected_warnings': ['Ignoring subtitle tracks found in '],

4621

}, {

4622

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

4627

'uploader': 'The Young Turks',

4628

'uploader_id': 'TheYoungTurks',

4629

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

4630

'upload_date': '20150715',

4631

'license': 'Standard YouTube License',

4632

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

4633

'categories': ['News & Politics'],

4634

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

'like_count': int,

},

'params': {

'skip_download': True,

4639

},

4640

'only_matching': True,

4641

}, {

4642

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

4643

'only_matching': True,

4644

}, {

4645

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

4646

'only_matching': True,

4647

}, {

4648

'note': 'A channel that is not live. Should raise error',

4649

'url': 'https://www.youtube.com/user/numberphile/live',

4650

'only_matching': True,

4651

}, {

4652

'url': 'https://www.youtube.com/feed/trending',

4653

'only_matching': True,

4654

}, {

4655

'url': 'https://www.youtube.com/feed/library',

4656

'only_matching': True,

4657

}, {

4658

'url': 'https://www.youtube.com/feed/history',

4659

'only_matching': True,

4660

}, {

4661

'url': 'https://www.youtube.com/feed/subscriptions',

4662

'only_matching': True,

4663

}, {

4664

'url': 'https://www.youtube.com/feed/watch_later',

4665

'only_matching': True,

4666

}, {

4667

'note': 'Recommended - redirects to home page.',

4668

'url': 'https://www.youtube.com/feed/recommended',

4669

'only_matching': True,

4670

}, {

4671

'note': 'inline playlist with not always working continuations',

4672

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

4673

'only_matching': True,

4674

}, {

4675

'url': 'https://www.youtube.com/course',

4676

'only_matching': True,

4677

}, {

4678

'url': 'https://www.youtube.com/zsecurity',

4679

'only_matching': True,

4680

}, {

4681

'url': 'http://www.youtube.com/NASAgovVideo/videos',

4682

'only_matching': True,

4683

}, {

4684

'url': 'https://www.youtube.com/TheYoungTurks/live',

4685

'only_matching': True,

4686

}, {

4687

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

'tags': [],

},

'playlist_mincount': 350,

4694

}, {

4695

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

4696

'only_matching': True,

4697

}, {

4698

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

4699

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4700

'only_matching': True

4701

}, {

4702

'note': '/browse/ should redirect to /channel/',

4703

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

4704

'only_matching': True

4705

}, {

4706

'note': 'VLPL, should redirect to playlist?list=PL...',

4707

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4708

'info_dict': {

4709

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

4710

'uploader': 'NoCopyrightSounds',

4711

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

4712

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4713

'title': 'NCS Releases',

4714

'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4715

'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',

4716

'modified_date': r're:\d{8}',

4717

'view_count': int,

4718

'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

4719

'tags': [],

4720

'channel': 'NoCopyrightSounds',

4721

},

4722

'playlist_mincount': 166,

4723

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

4724

}, {

4725

'note': 'Topic, should redirect to playlist?list=UU...',

4726

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4727

'info_dict': {

4728

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4729

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4730

'title': 'Uploads from Royalty Free Music - Topic',

4731

'uploader': 'Royalty Free Music - Topic',

4732

'tags': [],

4733

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4734

'channel': 'Royalty Free Music - Topic',

4735

'view_count': int,

4736

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4737

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4738

'modified_date': r're:\d{8}',

4739

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4740

'description': '',

4741

},

4742

'expected_warnings': [

4743

'The URL does not have a videos tab',

4744

r'[Uu]navailable videos (are|will be) hidden',

4745

],

4746

'playlist_mincount': 101,

4747

}, {

4748

'note': 'Topic without a UU playlist',

4749

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

4750

'info_dict': {

4751

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

4752

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

4753

'tags': [],

4754

},

4755

'expected_warnings': [

4756

'the playlist redirect gave error',

4757

],

4758

'playlist_mincount': 9,

4759

}, {

4760

'note': 'Youtube music Album',

4761

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

4762

'info_dict': {

4763

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

4764

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

'tags': [],

'view_count': int,

'description': '',

'availability': 'unlisted',

4769

'modified_date': r're:\d{8}',

4770

},

4771

'playlist_count': 50,

4772

}, {

4773

'note': 'unlisted single video playlist',

4774

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4775

'info_dict': {

4776

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4777

'uploader': 'colethedj',

4778

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

4779

'title': 'yt-dlp unlisted playlist test',

4780

'availability': 'unlisted',

4781

'tags': [],

4782

'modified_date': '20211208',

4783

'channel': 'colethedj',

4784

'view_count': int,

4785

'description': '',

4786

'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

4787

'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

4788

'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',

},

'playlist_count': 1,

}, {

'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',

4793

'url': 'https://www.youtube.com/feed/recommended',

4794

'info_dict': {

4795

'id': 'recommended',

4796

'title': 'recommended',

4797

},

4798

'playlist_mincount': 50,

4799

'params': {

4800

'skip_download': True,

4801

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4802

},

4803

}, {

4804

'note': 'API Fallback: /videos tab, sorted by oldest first',

4805

'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',

4806

'info_dict': {

4807

'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4808

'title': 'Cody\'sLab - Videos',

4809

'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',

4810

'uploader': 'Cody\'sLab',

4811

'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4812

'channel': 'Cody\'sLab',

4813

'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',

4814

'tags': [],

4815

'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4816

'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',

4817

},

4818

'playlist_mincount': 650,

4819

'params': {

4820

'skip_download': True,

4821

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

4822

},

4823

}, {

4824

'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',

4825

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

4826

'info_dict': {

4827

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

4828

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4829

'title': 'Uploads from Royalty Free Music - Topic',

4830

'uploader': 'Royalty Free Music - Topic',

4831

'modified_date': r're:\d{8}',

4832

'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

4833

'description': '',

4834

'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4835

'tags': [],

4836

'channel': 'Royalty Free Music - Topic',

4837

'view_count': int,

4838

'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',

4839

},

4840

'expected_warnings': [

4841

'does not have a videos tab',

4842

r'[Uu]navailable videos (are|will be) hidden',

4843

],

4844

'playlist_mincount': 101,

4845

'params': {

4846

'skip_download': True,

4847

'extractor_args': {'youtubetab': {'skip': ['webpage']}}

},

}]

@classmethod

def suitable(cls, url):

4853

return False if YoutubeIE.suitable(url) else super(

4854

YoutubeTabIE, cls).suitable(url)

4855

4856

def _real_extract(self, url):

4857

url, smuggled_data = unsmuggle_url(url, {})

4858

if self.is_music_url(url):

4859

smuggled_data['is_music_url'] = True

4860

info_dict = self.__real_extract(url, smuggled_data)

4861

if info_dict.get('entries'):

4862

info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)

4863

return info_dict

4864

4865

_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')

4866

4867

def __real_extract(self, url, smuggled_data):

4868

item_id = self._match_id(url)

4869

url = compat_urlparse.urlunparse(

4870

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

4871

compat_opts = self.get_param('compat_opts', [])

4872

4873

def get_mobj(url):

4874

mobj = self._URL_RE.match(url).groupdict()

4875

mobj.update((k, '') for k, v in mobj.items() if v is None)

4876

return mobj

4877

4878

mobj, redirect_warning = get_mobj(url), None

4879

# Youtube returns incomplete data if tabname is not lower case

4880

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

4881

if is_channel:

4882

if smuggled_data.get('is_music_url'):

4883

if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist

4884

item_id = item_id[2:]

4885

pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False

4886

elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist

4887

mdata = self._extract_tab_endpoint(

4888

f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')

4889

murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),

4890

get_all=False, expected_type=compat_str)

4891

if not murl:

4892

raise ExtractorError('Failed to resolve album to playlist')

4893

return self.url_result(murl, ie=YoutubeTabIE.ie_key())

4894

elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/

4895

pre = f'https://www.youtube.com/channel/{item_id}'

4896

4897

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

4898

# Home URLs should redirect to /videos/

4899

redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '

4900

'To download only the videos in the home page, add a "/featured" to the URL')

4901

tab = '/videos'

4902

4903

url = ''.join((pre, tab, post))

4904

mobj = get_mobj(url)

4905

4906

# Handle both video/playlist URLs

4907

qs = parse_qs(url)

4908

video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]

4909

4910

if not video_id and mobj['not_channel'].startswith('watch'):

4911

if not playlist_id:

4912

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

4913

raise ExtractorError('Unable to recognize tab page')

4914

# Common mistake: https://www.youtube.com/watch?list=playlist_id

4915

self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')

4916

url = f'https://www.youtube.com/playlist?list={playlist_id}'

4917

mobj = get_mobj(url)

4918

4919

if video_id and playlist_id:

4920

if self.get_param('noplaylist'):

4921

self.to_screen(f'Downloading just video {video_id} because of --no-playlist')

4922

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

4923

ie=YoutubeIE.ie_key(), video_id=video_id)

4924

self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')

4925

4926

data, ytcfg = self._extract_data(url, item_id)

4927

4928

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

4929

if tabs:

4930

selected_tab = self._extract_selected_tab(tabs)

4931

tab_name = selected_tab.get('title', '')

4932

if 'no-youtube-channel-redirect' not in compat_opts:

4933

if mobj['tab'] == '/live':

4934

# Live tab should have redirected to the video

4935

raise ExtractorError('The channel is not currently live', expected=True)

4936

if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:

4937

redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'

4938

if not mobj['not_channel'] and item_id[:2] == 'UC':

4939

# Topic channels don't have /videos. Use the equivalent playlist instead

4940

pl_id = f'UU{item_id[2:]}'

4941

pl_url = f'https://www.youtube.com/playlist?list={pl_id}'

4942

try:

4943

data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)

4944

except ExtractorError:

4945

redirect_warning += ' and the playlist redirect gave error'

4946

else:

4947

item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]

4948

redirect_warning += f'. Redirecting to playlist {pl_id} instead'

4949

if tab_name.lower() != mobj['tab'][1:]:

4950

redirect_warning += f'. {tab_name} tab is being downloaded instead'

4951

4952

if redirect_warning:

4953

self.report_warning(redirect_warning)

4954

self.write_debug(f'Final URL: {url}')

4955

4956

# YouTube sometimes provides a button to reload playlist with unavailable videos.

4957

if 'no-youtube-unavailable-videos' not in compat_opts:

4958

data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data

4959

self._extract_and_report_alerts(data, only_once=True)

4960

tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)

4961

if tabs:

4962

return self._extract_from_tabs(item_id, ytcfg, data, tabs)

4963

4964

playlist = traverse_obj(

4965

data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)

4966

if playlist:

4967

return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)

4968

4969

video_id = traverse_obj(

4970

data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id

4971

if video_id:

4972

if mobj['tab'] != '/live': # live tab is expected to redirect to video

4973

self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')

4974

return self.url_result(f'https://www.youtube.com/watch?v={video_id}',

4975

ie=YoutubeIE.ie_key(), video_id=video_id)

4976

4977

raise ExtractorError('Unable to recognize tab page')

4978

4979

4980

class YoutubePlaylistIE(InfoExtractor):

4981

IE_DESC = 'YouTube playlists'

4982

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

%(invidious)s

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

4993

)''' % {

4994

'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,

4995

'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),

4996

}

4997

IE_NAME = 'youtube:playlist'

4998

_TESTS = [{

4999

'note': 'issue #673',

5000

'url': 'PLBB231211A4F62143',

5001

'info_dict': {

5002

'title': '[OLD]Team Fortress 2 (Class-based LP)',

5003

'id': 'PLBB231211A4F62143',

5004

'uploader': 'Wickman',

5005

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5006

'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',

5007

'view_count': int,

5008

'uploader_url': 'https://www.youtube.com/user/Wickydoo',

5009

'modified_date': r're:\d{8}',

5010

'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

5011

'channel': 'Wickman',

5012

'tags': [],

5013

'channel_url': 'https://www.youtube.com/user/Wickydoo',

5014

},

5015

'playlist_mincount': 29,

5016

}, {

5017

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5018

'info_dict': {

5019

'title': 'YDL_safe_search',

5020

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

5021

},

5022

'playlist_count': 2,

5023

'skip': 'This playlist is private',

5024

}, {

5025

'note': 'embedded',

5026

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

5031

'uploader': 'milan',

5032

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5033

'description': '',

5034

'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5035

'tags': [],

5036

'modified_date': '20140919',

5037

'view_count': int,

5038

'channel': 'milan',

5039

'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

5040

'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',

5041

},

5042

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5043

}, {

5044

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5045

'playlist_mincount': 654,

5046

'info_dict': {

5047

'title': '2018 Chinese New Singles (11/6 updated)',

5048

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

5049

'uploader': 'LBK',

5050

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5051

'description': 'md5:da521864744d60a198e3a88af4db0d9d',

5052

'channel': 'LBK',

5053

'view_count': int,

5054

'channel_url': 'https://www.youtube.com/c/愛低音的國王',

5055

'tags': [],

5056

'uploader_url': 'https://www.youtube.com/c/愛低音的國王',

5057

'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',

5058

'modified_date': r're:\d{8}',

5059

},

5060

'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],

5061

}, {

5062

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

5063

'only_matching': True,

5064

}, {

5065

# music album playlist

5066

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

5067

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

5072

if YoutubeTabIE.suitable(url):

5073

return False

5074

from ..utils import parse_qs

5075

qs = parse_qs(url)

5076

if qs.get('v', [None])[0]:

5077

return False

5078

return super(YoutubePlaylistIE, cls).suitable(url)

5079

5080

def _real_extract(self, url):

5081

playlist_id = self._match_id(url)

5082

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

5083

url = update_url_query(

5084

'https://www.youtube.com/playlist',

5085

parse_qs(url) or {'list': playlist_id})

5086

if is_music_url:

5087

url = smuggle_url(url, {'is_music_url': True})

5088

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5089

5090

5091

class YoutubeYtBeIE(InfoExtractor):

5092

IE_DESC = 'youtu.be'

5093

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

5094

_TESTS = [{

5095

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

5100

'uploader': 'Backus-Page House Museum',

5101

'uploader_id': 'backuspagemuseum',

5102

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

5103

'upload_date': '20161008',

5104

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

5105

'categories': ['Nonprofits & Activism'],

'tags': list,

'like_count': int,

'age_limit': 0,

'playable_in_embed': True,

5110

'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',

5111

'channel': 'Backus-Page House Museum',

5112

'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',

5113

'live_status': 'not_live',

5114

'view_count': int,

5115

'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',

5116

'availability': 'public',

'duration': 59,

},

'params': {

'noplaylist': True,

'skip_download': True,

5122

},

5123

}, {

5124

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

5125

'only_matching': True,

5126

}]

5127

5128

def _real_extract(self, url):

5129

mobj = self._match_valid_url(url)

5130

video_id = mobj.group('id')

5131

playlist_id = mobj.group('playlist_id')

5132

return self.url_result(

5133

update_url_query('https://www.youtube.com/watch', {

5134

'v': video_id,

5135

'list': playlist_id,

5136

'feature': 'youtu.be',

5137

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

5138

5139

5140

class YoutubeYtUserIE(InfoExtractor):

5141

IE_DESC = 'YouTube user videos; "ytuser:" prefix'

5142

_VALID_URL = r'ytuser:(?P<id>.+)'

5143

_TESTS = [{

5144

'url': 'ytuser:phihag',

5145

'only_matching': True,

5146

}]

5147

5148

def _real_extract(self, url):

5149

user_id = self._match_id(url)

5150

return self.url_result(

5151

'https://www.youtube.com/user/%s/videos' % user_id,

5152

ie=YoutubeTabIE.ie_key(), video_id=user_id)

5153

5154

5155

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

5156

IE_NAME = 'youtube:favorites'

5157

IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'

5158

_VALID_URL = r':ytfav(?:ou?rite)?s?'

5159

_LOGIN_REQUIRED = True

5160

_TESTS = [{

5161

'url': ':ytfav',

5162

'only_matching': True,

5163

}, {

5164

'url': ':ytfavorites',

5165

'only_matching': True,

5166

}]

5167

5168

def _real_extract(self, url):

5169

return self.url_result(

5170

'https://www.youtube.com/playlist?list=LL',

5171

ie=YoutubeTabIE.ie_key())

5172

5173

5174

class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5175

IE_DESC = 'YouTube search'

5176

IE_NAME = 'youtube:search'

5177

_SEARCH_KEY = 'ytsearch'

5178

_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only

_TESTS = []

class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):

5183

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

5184

_SEARCH_KEY = 'ytsearchdate'

5185

IE_DESC = 'YouTube search, newest videos first'

5186

_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date

5187

5188

5189

class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):

5190

IE_DESC = 'YouTube search URLs with sorting and filter support'

5191

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

5192

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'

5193

_TESTS = [{

5194

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

5195

'playlist_mincount': 5,

5196

'info_dict': {

5197

'id': 'youtube-dl test video',

5198

'title': 'youtube-dl test video',

5199

}

5200

}, {

5201

'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',

5202

'playlist_mincount': 5,

'info_dict': {

'id': 'python',

'title': 'python',

}

}, {

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

5210

'only_matching': True,

5211

}]

5212

5213

def _real_extract(self, url):

5214

qs = parse_qs(url)

5215

query = (qs.get('search_query') or qs.get('q'))[0]

5216

return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)

5217

5218

5219

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

5220

"""

5221

Base class for feed extractors

5222

Subclasses must define the _FEED_NAME property.

5223

"""

5224

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

5230

5231

def _real_extract(self, url):

5232

return self.url_result(

5233

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

5234

ie=YoutubeTabIE.ie_key())

5235

5236

5237

class YoutubeWatchLaterIE(InfoExtractor):

5238

IE_NAME = 'youtube:watchlater'

5239

IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'

5240

_VALID_URL = r':ytwatchlater'

5241

_TESTS = [{

5242

'url': ':ytwatchlater',

5243

'only_matching': True,

5244

}]

5245

5246

def _real_extract(self, url):

5247

return self.url_result(

5248

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

5249

5250

5251

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

5252

IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'

5253

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

5254

_FEED_NAME = 'recommended'

5255

_LOGIN_REQUIRED = False

5256

_TESTS = [{

5257

'url': ':ytrec',

5258

'only_matching': True,

5259

}, {

5260

'url': ':ytrecommended',

5261

'only_matching': True,

5262

}, {

5263

'url': 'https://youtube.com',

5264

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

5269

IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'

5270

_VALID_URL = r':ytsub(?:scription)?s?'

5271

_FEED_NAME = 'subscriptions'

5272

_TESTS = [{

5273

'url': ':ytsubs',

5274

'only_matching': True,

5275

}, {

5276

'url': ':ytsubscriptions',

5277

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

5282

IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'

5283

_VALID_URL = r':ythis(?:tory)?'

5284

_FEED_NAME = 'history'

5285

_TESTS = [{

5286

'url': ':ythistory',

5287

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

5292

IE_NAME = 'youtube:truncated_url'

5293

IE_DESC = False # Do not list

5294

_VALID_URL = r'''(?x)

5295

(?:https?://)?

5296

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

5297

(?:watch\?(?:

5298

feature=[a-z_]+|

5299

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

5312

'only_matching': True,

5313

}, {

5314

'url': 'https://www.youtube.com/watch?',

5315

'only_matching': True,

5316

}, {

5317

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

5318

'only_matching': True,

5319

}, {

5320

'url': 'https://www.youtube.com/watch?feature=foo',

5321

'only_matching': True,

5322

}, {

5323

'url': 'https://www.youtube.com/watch?hl=en-GB',

5324

'only_matching': True,

5325

}, {

5326

'url': 'https://www.youtube.com/watch?t=2372',

5327

'only_matching': True,

5328

}]

5329

5330

def _real_extract(self, url):

5331

raise ExtractorError(

5332

'Did you forget to quote the URL? Remember that & is a meta '

5333

'character in most shells, so you want to put the URL in quotes, '

5334

'like youtube-dl '

5335

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

5336

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeClipIE(InfoExtractor):

5341

IE_NAME = 'youtube:clip'

5342

IE_DESC = False # Do not list

5343

_VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'

5344

5345

def _real_extract(self, url):

5346

self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')

5347

return self.url_result(url, 'Generic')

5348

5349

5350

class YoutubeTruncatedIDIE(InfoExtractor):

5351

IE_NAME = 'youtube:truncated_id'

5352

IE_DESC = False # Do not list

5353

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

5354

5355

_TESTS = [{

5356

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

5357

'only_matching': True,

5358

}]

5359

5360

def _real_extract(self, url):

5361

video_id = self._match_id(url)

5362

raise ExtractorError(

5363

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

5364

expected=True)