jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import base64
	6	import calendar
	7	import copy
	8	import hashlib
	9	import itertools
	10	import json
	11	import os.path
	12	import random
	13	import re
	14	import time
	15	import traceback
	16
	17	from .common import InfoExtractor, SearchInfoExtractor
	18	from ..compat import (
	19	compat_chr,
	20	compat_HTTPError,
	21	compat_parse_qs,
	22	compat_str,
	23	compat_urllib_parse_unquote_plus,
	24	compat_urllib_parse_urlencode,
	25	compat_urllib_parse_urlparse,
	26	compat_urlparse,
	27	)
	28	from ..jsinterp import JSInterpreter
	29	from ..utils import (
	30	bool_or_none,
	31	bytes_to_intlist,
	32	clean_html,
	33	dict_get,
	34	datetime_from_str,
	35	error_to_compat_str,
	36	ExtractorError,
	37	format_field,
	38	float_or_none,
	39	int_or_none,
	40	intlist_to_bytes,
	41	mimetype2ext,
	42	parse_codecs,
	43	parse_count,
	44	parse_duration,
	45	qualities,
	46	remove_start,
	47	smuggle_url,
	48	str_or_none,
	49	str_to_int,
	50	try_get,
	51	unescapeHTML,
	52	unified_strdate,
	53	unsmuggle_url,
	54	update_url_query,
	55	url_or_none,
	56	urlencode_postdata,
	57	urljoin
	58	)
	59
	60
	61	def parse_qs(url):
	62	return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	63
	64
	65	class YoutubeBaseInfoExtractor(InfoExtractor):
	66	"""Provide base functions for Youtube extractors"""
	67	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	68	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	69
	70	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	71	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	72	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	73
	74	_RESERVED_NAMES = (
	75	r'channel\|c\|user\|browse\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|shorts\|'
	76	r'movies\|results\|shared\|hashtag\|trending\|feed\|feeds\|oembed\|get_video_info\|'
	77	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	78
	79	_NETRC_MACHINE = 'youtube'
	80	# If True it will raise an error if no login info is provided
	81	_LOGIN_REQUIRED = False
	82
	83	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	84
	85	def _login(self):
	86	"""
	87	Attempt to log in to YouTube.
	88	True is returned if successful or skipped.
	89	False is returned if login failed.
	90
	91	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	92	"""
	93
	94	def warn(message):
	95	self.report_warning(message)
	96
	97	# username+password login is broken
	98	if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
	99	self.raise_login_required(
	100	'Login details are needed to download this content', method='cookies')
	101	username, password = self._get_login_info()
	102	if username:
	103	warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
	104	return
	105
	106	# Everything below this is broken!
	107	r'''
	108	# No authentication to be performed
	109	if username is None:
	110	if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
	111	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	112	# if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
	113	# self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
	114	return True
	115
	116	login_page = self._download_webpage(
	117	self._LOGIN_URL, None,
	118	note='Downloading login page',
	119	errnote='unable to fetch login page', fatal=False)
	120	if login_page is False:
	121	return
	122
	123	login_form = self._hidden_inputs(login_page)
	124
	125	def req(url, f_req, note, errnote):
	126	data = login_form.copy()
	127	data.update({
	128	'pstMsg': 1,
	129	'checkConnection': 'youtube',
	130	'checkedDomains': 'youtube',
	131	'hl': 'en',
	132	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	133	'f.req': json.dumps(f_req),
	134	'flowName': 'GlifWebSignIn',
	135	'flowEntry': 'ServiceLogin',
	136	# TODO: reverse actual botguard identifier generation algo
	137	'bgRequest': '["identifier",""]',
	138	})
	139	return self._download_json(
	140	url, None, note=note, errnote=errnote,
	141	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	142	fatal=False,
	143	data=urlencode_postdata(data), headers={
	144	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	145	'Google-Accounts-XSRF': 1,
	146	})
	147
	148	lookup_req = [
	149	username,
	150	None, [], None, 'US', None, None, 2, False, True,
	151	[
	152	None, None,
	153	[2, 1, None, 1,
	154	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	155	None, [], 4],
	156	1, [None, None, []], None, None, None, True
	157	],
	158	username,
	159	]
	160
	161	lookup_results = req(
	162	self._LOOKUP_URL, lookup_req,
	163	'Looking up account info', 'Unable to look up account info')
	164
	165	if lookup_results is False:
	166	return False
	167
	168	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	169	if not user_hash:
	170	warn('Unable to extract user hash')
	171	return False
	172
	173	challenge_req = [
	174	user_hash,
	175	None, 1, None, [1, None, None, None, [password, None, True]],
	176	[
	177	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	178	1, [None, None, []], None, None, None, True
	179	]]
	180
	181	challenge_results = req(
	182	self._CHALLENGE_URL, challenge_req,
	183	'Logging in', 'Unable to log in')
	184
	185	if challenge_results is False:
	186	return
	187
	188	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	189	if login_res:
	190	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	191	warn(
	192	'Unable to login: %s' % 'Invalid password'
	193	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	194	return False
	195
	196	res = try_get(challenge_results, lambda x: x[0][-1], list)
	197	if not res:
	198	warn('Unable to extract result entry')
	199	return False
	200
	201	login_challenge = try_get(res, lambda x: x[0][0], list)
	202	if login_challenge:
	203	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	204	if challenge_str == 'TWO_STEP_VERIFICATION':
	205	# SEND_SUCCESS - TFA code has been successfully sent to phone
	206	# QUOTA_EXCEEDED - reached the limit of TFA codes
	207	status = try_get(login_challenge, lambda x: x[5], compat_str)
	208	if status == 'QUOTA_EXCEEDED':
	209	warn('Exceeded the limit of TFA codes, try later')
	210	return False
	211
	212	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	213	if not tl:
	214	warn('Unable to extract TL')
	215	return False
	216
	217	tfa_code = self._get_tfa_info('2-step verification code')
	218
	219	if not tfa_code:
	220	warn(
	221	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	222	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	223	return False
	224
	225	tfa_code = remove_start(tfa_code, 'G-')
	226
	227	tfa_req = [
	228	user_hash, None, 2, None,
	229	[
	230	9, None, None, None, None, None, None, None,
	231	[None, tfa_code, True, 2]
	232	]]
	233
	234	tfa_results = req(
	235	self._TFA_URL.format(tl), tfa_req,
	236	'Submitting TFA code', 'Unable to submit TFA code')
	237
	238	if tfa_results is False:
	239	return False
	240
	241	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	242	if tfa_res:
	243	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	244	warn(
	245	'Unable to finish TFA: %s' % 'Invalid TFA code'
	246	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	247	return False
	248
	249	check_cookie_url = try_get(
	250	tfa_results, lambda x: x[0][-1][2], compat_str)
	251	else:
	252	CHALLENGES = {
	253	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	254	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	255	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	256	}
	257	challenge = CHALLENGES.get(
	258	challenge_str,
	259	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	260	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	261	return False
	262	else:
	263	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	264
	265	if not check_cookie_url:
	266	warn('Unable to extract CheckCookie URL')
	267	return False
	268
	269	check_cookie_results = self._download_webpage(
	270	check_cookie_url, None, 'Checking cookie', fatal=False)
	271
	272	if check_cookie_results is False:
	273	return False
	274
	275	if 'https://myaccount.google.com/' not in check_cookie_results:
	276	warn('Unable to log in')
	277	return False
	278
	279	return True
	280	'''
	281
	282	def _initialize_consent(self):
	283	cookies = self._get_cookies('https://www.youtube.com/')
	284	if cookies.get('__Secure-3PSID'):
	285	return
	286	consent_id = None
	287	consent = cookies.get('CONSENT')
	288	if consent:
	289	if 'YES' in consent.value:
	290	return
	291	consent_id = self._search_regex(
	292	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	293	if not consent_id:
	294	consent_id = random.randint(100, 999)
	295	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	296
	297	def _real_initialize(self):
	298	self._initialize_consent()
	299	if self._downloader is None:
	300	return
	301	if not self._login():
	302	return
	303
	304	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	305	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	306	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	307
	308	_YT_DEFAULT_YTCFGS = {
	309	'WEB': {
	310	'INNERTUBE_API_VERSION': 'v1',
	311	'INNERTUBE_CLIENT_NAME': 'WEB',
	312	'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
	313	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	314	'INNERTUBE_CONTEXT': {
	315	'client': {
	316	'clientName': 'WEB',
	317	'clientVersion': '2.20210622.10.00',
	318	'hl': 'en',
	319	}
	320	},
	321	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	322	},
	323	'WEB_REMIX': {
	324	'INNERTUBE_API_VERSION': 'v1',
	325	'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
	326	'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
	327	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	328	'INNERTUBE_CONTEXT': {
	329	'client': {
	330	'clientName': 'WEB_REMIX',
	331	'clientVersion': '1.20210621.00.00',
	332	'hl': 'en',
	333	}
	334	},
	335	'INNERTUBE_CONTEXT_CLIENT_NAME': 67
	336	},
	337	'WEB_EMBEDDED_PLAYER': {
	338	'INNERTUBE_API_VERSION': 'v1',
	339	'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
	340	'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
	341	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	342	'INNERTUBE_CONTEXT': {
	343	'client': {
	344	'clientName': 'WEB_EMBEDDED_PLAYER',
	345	'clientVersion': '1.20210620.0.1',
	346	'hl': 'en',
	347	}
	348	},
	349	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	350	},
	351	'ANDROID': {
	352	'INNERTUBE_API_VERSION': 'v1',
	353	'INNERTUBE_CLIENT_NAME': 'ANDROID',
	354	'INNERTUBE_CLIENT_VERSION': '16.20',
	355	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	356	'INNERTUBE_CONTEXT': {
	357	'client': {
	358	'clientName': 'ANDROID',
	359	'clientVersion': '16.20',
	360	'hl': 'en',
	361	}
	362	},
	363	'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
	364	},
	365	'ANDROID_EMBEDDED_PLAYER': {
	366	'INNERTUBE_API_VERSION': 'v1',
	367	'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
	368	'INNERTUBE_CLIENT_VERSION': '16.20',
	369	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	370	'INNERTUBE_CONTEXT': {
	371	'client': {
	372	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	373	'clientVersion': '16.20',
	374	'hl': 'en',
	375	}
	376	},
	377	'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
	378	},
	379	'ANDROID_MUSIC': {
	380	'INNERTUBE_API_VERSION': 'v1',
	381	'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
	382	'INNERTUBE_CLIENT_VERSION': '4.32',
	383	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	384	'INNERTUBE_CONTEXT': {
	385	'client': {
	386	'clientName': 'ANDROID_MUSIC',
	387	'clientVersion': '4.32',
	388	'hl': 'en',
	389	}
	390	},
	391	'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
	392	}
	393	}
	394
	395	_YT_DEFAULT_INNERTUBE_HOSTS = {
	396	'DIRECT': 'youtubei.googleapis.com',
	397	'WEB': 'www.youtube.com',
	398	'WEB_REMIX': 'music.youtube.com',
	399	'ANDROID_MUSIC': 'music.youtube.com'
	400	}
	401
	402	def _get_default_ytcfg(self, client='WEB'):
	403	if client in self._YT_DEFAULT_YTCFGS:
	404	return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
	405	self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
	406	return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
	407
	408	def _get_innertube_host(self, client='WEB'):
	409	return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
	410
	411	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
	412	# try_get but with fallback to default ytcfg client values when present
	413	_func = lambda y: try_get(y, getter, expected_type)
	414	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	415
	416	def _extract_client_name(self, ytcfg, default_client='WEB'):
	417	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
	418
	419	def _extract_client_version(self, ytcfg, default_client='WEB'):
	420	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
	421
	422	def _extract_api_key(self, ytcfg=None, default_client='WEB'):
	423	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	424
	425	def _extract_context(self, ytcfg=None, default_client='WEB'):
	426	_get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
	427	context = _get_context(ytcfg)
	428	if context:
	429	return context
	430
	431	context = _get_context(self._get_default_ytcfg(default_client))
	432	if not ytcfg:
	433	return context
	434
	435	# Recreate the client context (required)
	436	context['client'].update({
	437	'clientVersion': self._extract_client_version(ytcfg, default_client),
	438	'clientName': self._extract_client_name(ytcfg, default_client),
	439	})
	440	visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
	441	if visitor_data:
	442	context['client']['visitorData'] = visitor_data
	443	return context
	444
	445	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	446	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	447	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	448	yt_cookies = self._get_cookies('https://www.youtube.com')
	449	sapisid_cookie = dict_get(
	450	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	451	if sapisid_cookie is None:
	452	return
	453	time_now = round(time.time())
	454	# SAPISID cookie is required if not already present
	455	if not yt_cookies.get('SAPISID'):
	456	self._set_cookie(
	457	'.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
	458	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	459	sapisidhash = hashlib.sha1(
	460	f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
	461	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	462
	463	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	464	note='Downloading API JSON', errnote='Unable to download API page',
	465	context=None, api_key=None, api_hostname=None, default_client='WEB'):
	466
	467	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	468	data.update(query)
	469	real_headers = self._generate_api_headers(client=default_client)
	470	real_headers.update({'content-type': 'application/json'})
	471	if headers:
	472	real_headers.update(headers)
	473	return self._download_json(
	474	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	475	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	476	data=json.dumps(data).encode('utf8'), headers=real_headers,
	477	query={'key': api_key or self._extract_api_key()})
	478
	479	def _extract_yt_initial_data(self, video_id, webpage):
	480	return self._parse_json(
	481	self._search_regex(
	482	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	483	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
	484	video_id)
	485
	486	def _extract_identity_token(self, webpage, item_id):
	487	ytcfg = self._extract_ytcfg(item_id, webpage)
	488	if ytcfg:
	489	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	490	if token:
	491	return token
	492	return self._search_regex(
	493	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	494	'identity token', default=None)
	495
	496	@staticmethod
	497	def _extract_account_syncid(data):
	498	"""
	499	Extract syncId required to download private playlists of secondary channels
	500	@param data Either response or ytcfg

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import base64

import calendar

import copy

import hashlib

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

18

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

24

compat_urllib_parse_urlencode,

25

compat_urllib_parse_urlparse,

26

compat_urlparse,

27

)

28

from ..jsinterp import JSInterpreter

29

from ..utils import (

bool_or_none,

bytes_to_intlist,

clean_html,

dict_get,

datetime_from_str,

error_to_compat_str,

ExtractorError,

format_field,

float_or_none,

int_or_none,

intlist_to_bytes,

mimetype2ext,

parse_codecs,

parse_count,

parse_duration,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

update_url_query,

url_or_none,

urlencode_postdata,

urljoin

)

def parse_qs(url):

return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

63

64

65

class YoutubeBaseInfoExtractor(InfoExtractor):

66

"""Provide base functions for Youtube extractors"""

67

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

68

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

69

70

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

71

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

72

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

_RESERVED_NAMES = (

_NETRC_MACHINE = 'youtube'

80

# If True it will raise an error if no login info is provided

81

_LOGIN_REQUIRED = False

82

83

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

def _login(self):

"""

Attempt to log in to YouTube.

88

True is returned if successful or skipped.

89

False is returned if login failed.

90

91

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

"""

def warn(message):

self.report_warning(message)

96

97

# username+password login is broken

98

if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:

99

self.raise_login_required(

100

'Login details are needed to download this content', method='cookies')

101

username, password = self._get_login_info()

102

if username:

103

warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])

104

return

105

106

# Everything below this is broken!

107

r'''

108

# No authentication to be performed

109

if username is None:

110

if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:

111

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

112

# if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.

113

# self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')

114

return True

115

116

login_page = self._download_webpage(

117

self._LOGIN_URL, None,

118

note='Downloading login page',

119

errnote='unable to fetch login page', fatal=False)

120

if login_page is False:

121

return

122

123

login_form = self._hidden_inputs(login_page)

124

125

def req(url, f_req, note, errnote):

126

data = login_form.copy()

127

data.update({

128

'pstMsg': 1,

129

'checkConnection': 'youtube',

130

'checkedDomains': 'youtube',

131

'hl': 'en',

132

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

133

'f.req': json.dumps(f_req),

134

'flowName': 'GlifWebSignIn',

135

'flowEntry': 'ServiceLogin',

136

# TODO: reverse actual botguard identifier generation algo

137

'bgRequest': '["identifier",""]',

138

})

139

return self._download_json(

140

url, None, note=note, errnote=errnote,

141

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

142

fatal=False,

143

data=urlencode_postdata(data), headers={

144

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

145

'Google-Accounts-XSRF': 1,

})

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

155

None, [], 4],

156

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

162

self._LOOKUP_URL, lookup_req,

163

'Looking up account info', 'Unable to look up account info')

164

165

if lookup_results is False:

166

return False

167

168

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

169

if not user_hash:

170

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

176

[

177

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

178

1, [None, None, []], None, None, None, True

179

]]

180

181

challenge_results = req(

182

self._CHALLENGE_URL, challenge_req,

183

'Logging in', 'Unable to log in')

184

185

if challenge_results is False:

186

return

187

188

login_res = try_get(challenge_results, lambda x: x[0][5], list)

189

if login_res:

190

login_msg = try_get(login_res, lambda x: x[5], compat_str)

191

warn(

192

'Unable to login: %s' % 'Invalid password'

193

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

194

return False

195

196

res = try_get(challenge_results, lambda x: x[0][-1], list)

197

if not res:

198

warn('Unable to extract result entry')

199

return False

200

201

login_challenge = try_get(res, lambda x: x[0][0], list)

202

if login_challenge:

203

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

204

if challenge_str == 'TWO_STEP_VERIFICATION':

205

# SEND_SUCCESS - TFA code has been successfully sent to phone

206

# QUOTA_EXCEEDED - reached the limit of TFA codes

207

status = try_get(login_challenge, lambda x: x[5], compat_str)

208

if status == 'QUOTA_EXCEEDED':

209

warn('Exceeded the limit of TFA codes, try later')

210

return False

211

212

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

213

if not tl:

214

warn('Unable to extract TL')

215

return False

216

217

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

222

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

223

return False

224

225

tfa_code = remove_start(tfa_code, 'G-')

226

227

tfa_req = [

228

user_hash, None, 2, None,

229

[

230

9, None, None, None, None, None, None, None,

231

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

236

'Submitting TFA code', 'Unable to submit TFA code')

237

238

if tfa_results is False:

239

return False

240

241

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

242

if tfa_res:

243

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

244

warn(

245

'Unable to finish TFA: %s' % 'Invalid TFA code'

246

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

247

return False

248

249

check_cookie_url = try_get(

250

tfa_results, lambda x: x[0][-1][2], compat_str)

251

else:

252

CHALLENGES = {

253

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

254

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

255

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

256

}

257

challenge = CHALLENGES.get(

258

challenge_str,

259

'%s returned error %s.' % (self.IE_NAME, challenge_str))

260

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

261

return False

262

else:

263

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

264

265

if not check_cookie_url:

266

warn('Unable to extract CheckCookie URL')

267

return False

268

269

check_cookie_results = self._download_webpage(

270

check_cookie_url, None, 'Checking cookie', fatal=False)

271

272

if check_cookie_results is False:

273

return False

274

275

if 'https://myaccount.google.com/' not in check_cookie_results:

276

warn('Unable to log in')

return False

return True

'''

def _initialize_consent(self):

283

cookies = self._get_cookies('https://www.youtube.com/')

284

if cookies.get('__Secure-3PSID'):

285

return

286

consent_id = None

287

consent = cookies.get('CONSENT')

288

if consent:

289

if 'YES' in consent.value:

290

return

291

consent_id = self._search_regex(

292

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

293

if not consent_id:

294

consent_id = random.randint(100, 999)

295

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

296

297

def _real_initialize(self):

298

self._initialize_consent()

299

if self._downloader is None:

300

return

301

if not self._login():

302

return

303

304

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

305

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

306

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

307

308

_YT_DEFAULT_YTCFGS = {

309

'WEB': {

310

'INNERTUBE_API_VERSION': 'v1',

311

'INNERTUBE_CLIENT_NAME': 'WEB',

312

'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',

313

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

314

'INNERTUBE_CONTEXT': {

315

'client': {

316

'clientName': 'WEB',

317

'clientVersion': '2.20210622.10.00',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

322

},

323

'WEB_REMIX': {

324

'INNERTUBE_API_VERSION': 'v1',

325

'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',

326

'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',

327

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

328

'INNERTUBE_CONTEXT': {

329

'client': {

330

'clientName': 'WEB_REMIX',

331

'clientVersion': '1.20210621.00.00',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 67

336

},

337

'WEB_EMBEDDED_PLAYER': {

338

'INNERTUBE_API_VERSION': 'v1',

339

'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',

340

'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',

341

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

342

'INNERTUBE_CONTEXT': {

343

'client': {

344

'clientName': 'WEB_EMBEDDED_PLAYER',

345

'clientVersion': '1.20210620.0.1',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

350

},

351

'ANDROID': {

352

'INNERTUBE_API_VERSION': 'v1',

353

'INNERTUBE_CLIENT_NAME': 'ANDROID',

354

'INNERTUBE_CLIENT_VERSION': '16.20',

355

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

356

'INNERTUBE_CONTEXT': {

357

'client': {

358

'clientName': 'ANDROID',

359

'clientVersion': '16.20',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'

364

},

365

'ANDROID_EMBEDDED_PLAYER': {

366

'INNERTUBE_API_VERSION': 'v1',

367

'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',

368

'INNERTUBE_CLIENT_VERSION': '16.20',

369

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

370

'INNERTUBE_CONTEXT': {

371

'client': {

372

'clientName': 'ANDROID_EMBEDDED_PLAYER',

373

'clientVersion': '16.20',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'

378

},

379

'ANDROID_MUSIC': {

380

'INNERTUBE_API_VERSION': 'v1',

381

'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',

382

'INNERTUBE_CLIENT_VERSION': '4.32',

383

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

384

'INNERTUBE_CONTEXT': {

385

'client': {

386

'clientName': 'ANDROID_MUSIC',

387

'clientVersion': '4.32',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'

}

}

_YT_DEFAULT_INNERTUBE_HOSTS = {

396

'DIRECT': 'youtubei.googleapis.com',

397

'WEB': 'www.youtube.com',

398

'WEB_REMIX': 'music.youtube.com',

399

'ANDROID_MUSIC': 'music.youtube.com'

400

}

401

402

def _get_default_ytcfg(self, client='WEB'):

403

if client in self._YT_DEFAULT_YTCFGS:

404

return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])

405

self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')

406

return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])

407

408

def _get_innertube_host(self, client='WEB'):

409

return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))

410

411

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):

412

# try_get but with fallback to default ytcfg client values when present

413

_func = lambda y: try_get(y, getter, expected_type)

414

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

415

416

def _extract_client_name(self, ytcfg, default_client='WEB'):

417

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)

418

419

def _extract_client_version(self, ytcfg, default_client='WEB'):

420

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)

421

422

def _extract_api_key(self, ytcfg=None, default_client='WEB'):

423

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

424

425

def _extract_context(self, ytcfg=None, default_client='WEB'):

426

_get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)

427

context = _get_context(ytcfg)

if context:

return context

context = _get_context(self._get_default_ytcfg(default_client))

if not ytcfg:

return context

# Recreate the client context (required)

436

context['client'].update({

437

'clientVersion': self._extract_client_version(ytcfg, default_client),

438

'clientName': self._extract_client_name(ytcfg, default_client),

439

})

440

visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)

441

if visitor_data:

442

context['client']['visitorData'] = visitor_data

443

return context

444

445

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

446

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

447

# See: https://github.com/yt-dlp/yt-dlp/issues/393

448

yt_cookies = self._get_cookies('https://www.youtube.com')

449

sapisid_cookie = dict_get(

450

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

451

if sapisid_cookie is None:

452

return

453

time_now = round(time.time())

454

# SAPISID cookie is required if not already present

455

if not yt_cookies.get('SAPISID'):

456

self._set_cookie(

457

'.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)

458

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

459

sapisidhash = hashlib.sha1(

460

f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()

461

return f'SAPISIDHASH {time_now}_{sapisidhash}'

462

463

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

464

note='Downloading API JSON', errnote='Unable to download API page',

465

context=None, api_key=None, api_hostname=None, default_client='WEB'):

466

467

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

468

data.update(query)

469

real_headers = self._generate_api_headers(client=default_client)

470

real_headers.update({'content-type': 'application/json'})

471

if headers:

472

real_headers.update(headers)

473

return self._download_json(

474

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

475

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

476

data=json.dumps(data).encode('utf8'), headers=real_headers,

477

query={'key': api_key or self._extract_api_key()})

478

479

def _extract_yt_initial_data(self, video_id, webpage):

480

return self._parse_json(

481

self._search_regex(

482

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

483

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),

484

video_id)

485

486

def _extract_identity_token(self, webpage, item_id):

487

ytcfg = self._extract_ytcfg(item_id, webpage)

488

if ytcfg:

489

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

490

if token:

491

return token

492

return self._search_regex(

493

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

494

'identity token', default=None)

495

496

@staticmethod

497

def _extract_account_syncid(data):

498

"""

499

Extract syncId required to download private playlists of secondary channels

500

@param data Either response or ytcfg

501

"""

502

sync_ids = (try_get(

503

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

504

lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")

505

if len(sync_ids) >= 2 and sync_ids[1]:

506

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

507

# and just "user_syncid||" for primary channel. We only want the channel_syncid

508

return sync_ids[0]

509

# ytcfg includes channel_syncid if on secondary channel

510

return data.get('DELEGATED_SESSION_ID')

511

512

def _extract_ytcfg(self, video_id, webpage):

513

if not webpage:

514

return {}

515

return self._parse_json(

516

self._search_regex(

517

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

518

default='{}'), video_id, fatal=False) or {}

519

520

def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,

521

visitor_data=None, api_hostname=None, client='WEB'):

522

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))

523

headers = {

524

'X-YouTube-Client-Name': compat_str(

525

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),

526

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),

527

'Origin': origin

528

}

529

if not visitor_data and ytcfg:

530

visitor_data = try_get(

531

self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)

532

if identity_token:

533

headers['X-Youtube-Identity-Token'] = identity_token

534

if account_syncid:

535

headers['X-Goog-PageId'] = account_syncid

536

session_index = try_get(ytcfg, lambda x: x['SESSION_INDEX'], compat_str)

537

if account_syncid or session_index:

538

headers['X-Goog-AuthUser'] = session_index or 0

539

if visitor_data:

540

headers['X-Goog-Visitor-Id'] = visitor_data

541

auth = self._generate_sapisidhash_header(origin)

542

if auth is not None:

543

headers['Authorization'] = auth

544

headers['X-Origin'] = origin

return headers

@staticmethod

def _build_api_continuation_query(continuation, ctp=None):

549

query = {

550

'continuation': continuation

551

}

552

# TODO: Inconsistency with clickTrackingParams.

553

# Currently we have a fixed ctp contained within context (from ytcfg)

554

# and a ctp in root query for continuation.

555

if ctp:

556

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _continuation_query_ajax_to_api(cls, continuation_query):

561

continuation = dict_get(continuation_query, ('continuation', 'ctoken'))

562

return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))

563

564

@staticmethod

565

def _build_continuation_query(continuation, ctp=None):

566

query = {

567

'ctoken': continuation,

568

'continuation': continuation,

}

if ctp:

query['itct'] = ctp

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

576

next_continuation = try_get(

577

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

578

lambda x: x['continuation']['reloadContinuationData']), dict)

579

if not next_continuation:

580

return

581

continuation = next_continuation.get('continuation')

582

if not continuation:

583

return

584

ctp = next_continuation.get('clickTrackingParams')

585

return cls._build_continuation_query(continuation, ctp)

586

587

@classmethod

588

def _extract_continuation_ep_data(cls, continuation_ep: dict):

589

if isinstance(continuation_ep, dict):

590

continuation = try_get(

591

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

592

if not continuation:

593

return

594

ctp = continuation_ep.get('clickTrackingParams')

595

return cls._build_continuation_query(continuation, ctp)

596

597

@classmethod

598

def _extract_continuation(cls, renderer):

599

next_continuation = cls._extract_next_continuation_data(renderer)

600

if next_continuation:

601

return next_continuation

602

contents = []

603

for key in ('contents', 'items'):

604

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

605

for content in contents:

606

if not isinstance(content, dict):

607

continue

608

continuation_ep = try_get(

609

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

610

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

611

dict)

612

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@staticmethod

def _extract_alerts(data):

618

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

619

if not isinstance(alert_dict, dict):

620

continue

621

for alert in alert_dict.values():

622

alert_type = alert.get('type')

623

if not alert_type:

624

continue

625

message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''

626

if message:

627

yield alert_type, message

628

for run in try_get(alert, lambda x: x['text']['runs'], list) or []:

629

message += try_get(run, lambda x: x['text'], compat_str)

630

if message:

631

yield alert_type, message

632

633

def _report_alerts(self, alerts, expected=True):

634

errors = []

635

warnings = []

636

for alert_type, alert_message in alerts:

637

if alert_type.lower() == 'error':

638

errors.append([alert_type, alert_message])

639

else:

640

warnings.append([alert_type, alert_message])

641

642

for alert_type, alert_message in (warnings + errors[:-1]):

643

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))

644

if errors:

645

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

646

647

def _extract_and_report_alerts(self, data, *args, **kwargs):

648

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

649

650

def _extract_badges(self, renderer: dict):

651

badges = set()

652

for badge in try_get(renderer, lambda x: x['badges'], list) or []:

653

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)

654

if label:

655

badges.add(label.lower())

return badges

@staticmethod

def _join_text_entries(runs):

660

text = None

661

for run in runs:

662

if not isinstance(run, dict):

663

continue

664

sub_text = try_get(run, lambda x: x['text'], compat_str)

if sub_text:

if not text:

text = sub_text

continue

text += sub_text

return text

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

673

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

674

default_client='WEB'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

679

if check_get_keys is None:

680

check_get_keys = []

681

while count < retries:

682

count += 1

683

if last_error:

684

self.report_warning('%s. Retrying ...' % last_error)

685

try:

686

response = self._call_api(

687

ep=ep, fatal=True, headers=headers,

688

video_id=item_id, query=query,

689

context=self._extract_context(ytcfg, default_client),

690

api_key=self._extract_api_key(ytcfg, default_client),

691

api_hostname=api_hostname, default_client=default_client,

692

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

693

except ExtractorError as e:

694

if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):

695

# Downloading page may result in intermittent 5xx HTTP error

696

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

697

last_error = 'HTTP Error %s' % e.cause.code

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

# Youtube may send alerts if there was an issue with the continuation page

708

try:

709

self._extract_and_report_alerts(response, expected=False)

710

except ExtractorError as e:

711

if fatal:

712

raise

713

self.report_warning(error_to_compat_str(e))

714

return

715

if not check_get_keys or dict_get(response, check_get_keys):

716

break

717

# Youtube sometimes sends incomplete data

718

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

719

last_error = 'Incomplete data received'

720

if count >= retries:

721

if fatal:

722

raise ExtractorError(last_error)

723

else:

724

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

730

return re.match(r'https?://music\.youtube\.com/', url) is not None

731

732

def _extract_video(self, renderer):

733

video_id = renderer.get('videoId')

734

title = try_get(

735

renderer,

736

(lambda x: x['title']['runs'][0]['text'],

737

lambda x: x['title']['simpleText']), compat_str)

738

description = try_get(

739

renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],

740

compat_str)

741

duration = parse_duration(try_get(

742

renderer, lambda x: x['lengthText']['simpleText'], compat_str))

743

view_count_text = try_get(

744

renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

745

view_count = str_to_int(self._search_regex(

746

r'^([\d,]+)', re.sub(r'\s', '', view_count_text),

747

'view count', default=None))

748

uploader = try_get(

749

renderer,

750

(lambda x: x['ownerText']['runs'][0]['text'],

751

lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)

752

return {

753

'_type': 'url',

754

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

759

'duration': duration,

760

'view_count': view_count,

761

'uploader': uploader,

}

class YoutubeIE(YoutubeBaseInfoExtractor):

766

IE_DESC = 'YouTube.com'

767

_INVIDIOUS_SITES = (

768

# invidious-redirect websites

769

r'(?:www\.)?redirect\.invidious\.io',

770

r'(?:(?:www|dev)\.)?invidio\.us',

771

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

772

r'(?:www\.)?invidious\.pussthecat\.org',

773

r'(?:www\.)?invidious\.zee\.li',

774

r'(?:www\.)?invidious\.ethibox\.fr',

775

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

776

# youtube-dl invidious instances list

777

r'(?:(?:www|no)\.)?invidiou\.sh',

778

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

779

r'(?:www\.)?invidious\.kabi\.tk',

780

r'(?:www\.)?invidious\.mastodon\.host',

781

r'(?:www\.)?invidious\.zapashcanon\.fr',

782

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

783

r'(?:www\.)?invidious\.tinfoil-hat\.net',

784

r'(?:www\.)?invidious\.himiko\.cloud',

785

r'(?:www\.)?invidious\.reallyancient\.tech',

786

r'(?:www\.)?invidious\.tube',

787

r'(?:www\.)?invidiou\.site',

788

r'(?:www\.)?invidious\.site',

789

r'(?:www\.)?invidious\.xyz',

790

r'(?:www\.)?invidious\.nixnet\.xyz',

791

r'(?:www\.)?invidious\.048596\.xyz',

792

r'(?:www\.)?invidious\.drycat\.fr',

793

r'(?:www\.)?inv\.skyn3t\.in',

794

r'(?:www\.)?tube\.poal\.co',

795

r'(?:www\.)?tube\.connect\.cafe',

796

r'(?:www\.)?vid\.wxzm\.sx',

797

r'(?:www\.)?vid\.mint\.lgbt',

798

r'(?:www\.)?vid\.puffyan\.us',

799

r'(?:www\.)?yewtu\.be',

800

r'(?:www\.)?yt\.elukerio\.org',

801

r'(?:www\.)?yt\.lelux\.fi',

802

r'(?:www\.)?invidious\.ggc-project\.de',

803

r'(?:www\.)?yt\.maisputain\.ovh',

804

r'(?:www\.)?ytprivate\.com',

805

r'(?:www\.)?invidious\.13ad\.de',

806

r'(?:www\.)?invidious\.toot\.koeln',

807

r'(?:www\.)?invidious\.fdn\.fr',

808

r'(?:www\.)?watch\.nettohikari\.com',

809

r'(?:www\.)?invidious\.namazso\.eu',

810

r'(?:www\.)?invidious\.silkky\.cloud',

811

r'(?:www\.)?invidious\.exonip\.de',

812

r'(?:www\.)?invidious\.riverside\.rocks',

813

r'(?:www\.)?invidious\.blamefran\.net',

814

r'(?:www\.)?invidious\.moomoo\.de',

815

r'(?:www\.)?ytb\.trom\.tf',

816

r'(?:www\.)?yt\.cyberhost\.uk',

817

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

818

r'(?:www\.)?qklhadlycap4cnod\.onion',

819

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

820

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

821

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

822

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

823

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

824

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

825

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

826

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

827

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

828

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

829

)

830

_VALID_URL = r"""(?x)^

831

(

832

(?:https?://|//) # http(s):// or protocol-independent URL

833

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

834

(?:www\.)?deturl\.com/www\.youtube\.com|

835

(?:www\.)?pwnyoutube\.com|

836

(?:www\.)?hooktube\.com|

837

(?:www\.)?yourepeat\.com|

838

tube\.majestyc\.net|

839

%(invidious)s|

840

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

841

(?:.*?\#/)? # handle anchor (#/) redirect urls

842

(?: # the various things that can precede the ID:

843

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

844

|(?: # or the v= param in all its forms

845

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

846

(?:\?|\#!?) # the params delimiter ? or # or #!

847

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

853

vid\.plus| # or vid.plus/xxxx

854

zwearz\.com/watch| # or zwearz.com/watch/xxxx

855

%(invidious)s

856

)/

857

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

858

)

859

)? # all until now is optional -> you can pass the naked ID

860

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

861

(?(1).+)? # if we found the ID, everything can follow

862

(?:\#|$)""" % {

863

'invidious': '|'.join(_INVIDIOUS_SITES),

864

}

865

_PLAYER_INFO_RE = (

866

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

867

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

868

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

869

)

870

_formats = {

871

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

872

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

873

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

874

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

875

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

876

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

877

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

878

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

879

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

880

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

881

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

882

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

883

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

884

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

885

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

886

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

887

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

888

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

893

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

894

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

895

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

896

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

897

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

898

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

899

900

# Apple HTTP Live Streaming

901

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

902

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

903

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

904

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

905

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

906

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

907

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

908

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

909

910

# DASH mp4 video

911

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

912

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

913

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

914

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

915

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

916

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

917

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

918

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

919

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

920

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

921

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

922

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

923

924

# Dash mp4 audio

925

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

926

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

927

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

928

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

929

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

930

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

931

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

932

933

# Dash webm

934

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

935

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

936

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

937

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

938

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

939

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

940

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

941

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

942

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

943

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

944

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

945

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

946

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

947

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

948

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

949

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

950

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

951

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

952

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

953

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

954

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

955

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

956

957

# Dash webm audio

958

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

959

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

960

961

# Dash webm audio with opus inside

962

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

963

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

964

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

965

966

# RTMP (unnamed)

967

'_rtmp': {'protocol': 'rtmp'},

968

969

# av01 video only formats sometimes served with "unknown" codecs

970

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

971

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

972

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

973

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

974

}

975

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

976

977

_AGE_GATE_REASONS = (

978

'Sign in to confirm your age',

979

'This video may be inappropriate for some users.',

980

'Sorry, this content is age-restricted.')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

992

'uploader': 'Philipp Hagemeister',

993

'uploader_id': 'phihag',

994

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

995

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

996

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

997

'upload_date': '20121002',

998

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

999

'categories': ['Science & Technology'],

1000

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

1011

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

1016

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

1017

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

1018

'uploader': 'SET India',

1019

'uploader_id': 'setindia',

1020

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

1021

'age_limit': 18,

1022

},

1023

'skip': 'Private video',

1024

},

1025

{

1026

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1027

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1032

'uploader': 'Philipp Hagemeister',

1033

'uploader_id': 'phihag',

1034

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1035

'upload_date': '20121002',

1036

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

1037

'categories': ['Science & Technology'],

1038

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

1043

},

1044

'params': {

1045

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1050

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1055

'uploader_id': '8KVIDEO',

1056

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1057

'description': '',

1058

'uploader': '8KVIDEO',

1059

'title': 'UHDTV TEST 8K VIDEO.mp4'

1060

},

1061

'params': {

1062

'youtube_include_dash_manifest': True,

1063

'format': '141',

1064

},

1065

'skip': 'format 141 not served anymore',

1066

},

1067

# DASH manifest with encrypted signature

1068

{

1069

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1074

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1075

'duration': 244,

1076

'uploader': 'AfrojackVEVO',

1077

'uploader_id': 'AfrojackVEVO',

1078

'upload_date': '20131011',

'abr': 129.495,

},

'params': {

'youtube_include_dash_manifest': True,

1083

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

1094

'uploader': 'Amazing Atheist',

1095

'uploader_id': 'TheAmazingAtheist',

1096

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

1097

'title': 'Burning Everyone\'s Koran',

1098

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

1099

}

1100

},

1101

# Normal age-gate video (embed allowed)

1102

{

1103

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1108

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1109

'duration': 142,

1110

'uploader': 'The Witcher',

1111

'uploader_id': 'WitcherGame',

1112

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1113

'upload_date': '20140605',

'age_limit': 18,

},

},

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1118

# YouTube Red ad is not captured for creator

1119

{

1120

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1126

'uploader_id': 'deadmau5',

1127

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1128

'creator': 'deadmau5',

1129

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1130

'uploader': 'deadmau5',

1131

'title': 'Deadmau5 - Some Chords (HD)',

1132

'alt_title': 'Some Chords',

1133

},

1134

'expected_warnings': [

1135

'DASH manifest missing',

1136

]

1137

},

1138

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1139

{

1140

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1146

'uploader_id': 'olympic',

1147

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1148

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1149

'uploader': 'Olympic',

1150

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1151

},

1152

'params': {

1153

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1163

'duration': 85,

1164

'upload_date': '20110310',

1165

'uploader_id': 'AllenMeow',

1166

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1167

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1168

'uploader': '孫ᄋᄅ',

1169

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1170

},

1171

},

1172

# url_encoded_fmt_stream_map is empty string

1173

{

1174

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1179

'description': '',

1180

'upload_date': '20150404',

1181

'uploader_id': 'spbelect',

1182

'uploader': 'Наблюдатели Петербурга',

1183

},

1184

'params': {

1185

'skip_download': 'requires avconv',

1186

},

1187

'skip': 'This live event has ended.',

1188

},

1189

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1190

{

1191

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1196

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1197

'duration': 220,

1198

'upload_date': '20150625',

1199

'uploader_id': 'dorappi2000',

1200

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1201

'uploader': 'dorappi2000',

1202

'formats': 'mincount:31',

1203

},

1204

'skip': 'not actual anymore',

1205

},

1206

# DASH manifest with segment_list

1207

{

1208

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1209

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1214

'uploader': 'Airtek',

1215

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1216

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1217

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1218

},

1219

'params': {

1220

'youtube_include_dash_manifest': True,

1221

'format': '135', # bestvideo

1222

},

1223

'skip': 'This live event has ended.',

1224

},

1225

{

1226

# Multifeed videos (multiple cameras), URL is for Main Camera

1227

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1228

'info_dict': {

1229

'id': 'jvGDaLqkpTg',

1230

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1231

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1238

'description': 'md5:e03b909557865076822aa169218d6a5d',

1239

'duration': 10643,

1240

'upload_date': '20161111',

1241

'uploader': 'Team PGP',

1242

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1243

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1250

'description': 'md5:e03b909557865076822aa169218d6a5d',

1251

'duration': 10991,

1252

'upload_date': '20161111',

1253

'uploader': 'Team PGP',

1254

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1255

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1262

'description': 'md5:e03b909557865076822aa169218d6a5d',

1263

'duration': 10995,

1264

'upload_date': '20161111',

1265

'uploader': 'Team PGP',

1266

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1267

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1274

'description': 'md5:e03b909557865076822aa169218d6a5d',

1275

'duration': 10990,

1276

'upload_date': '20161111',

1277

'uploader': 'Team PGP',

1278

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1279

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

},

},

{

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1288

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1289

'info_dict': {

1290

'id': 'gVfLd0zydlo',

1291

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1292

},

1293

'playlist_count': 2,

1294

'skip': 'Not multifeed anymore',

1295

},

1296

{

1297

'url': 'https://vid.plus/FlRa-iH7PGw',

1298

'only_matching': True,

1299

},

1300

{

1301

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1302

'only_matching': True,

1303

},

1304

{

1305

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1306

# Also tests cut-off URL expansion in video description (see

1307

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1308

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1309

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1314

'alt_title': 'Dark Walk - Position Music',

1315

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1316

'duration': 133,

1317

'upload_date': '20151119',

1318

'uploader_id': 'IronSoulElf',

1319

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1320

'uploader': 'IronSoulElf',

1321

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

1322

'track': 'Dark Walk - Position Music',

1323

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

1324

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1325

},

1326

'params': {

1327

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1332

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1333

'only_matching': True,

1334

},

1335

{

1336

# Video with yt:stretch=17:0

1337

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1342

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1343

'upload_date': '20151107',

1344

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1345

'uploader': 'CH GAMER DROID',

1346

},

1347

'params': {

1348

'skip_download': True,

1349

},

1350

'skip': 'This video does not exist.',

1351

},

1352

{

1353

# Video with incomplete 'yt:stretch=16:'

1354

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1355

'only_matching': True,

1356

},

1357

{

1358

# Video licensed under Creative Commons

1359

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1364

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1365

'duration': 721,

1366

'upload_date': '20150127',

1367

'uploader_id': 'BerkmanCenter',

1368

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1369

'uploader': 'The Berkman Klein Center for Internet & Society',

1370

'license': 'Creative Commons Attribution license (reuse allowed)',

1371

},

1372

'params': {

1373

'skip_download': True,

},

},

{

# Channel-like uploader_url

1378

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1383

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1384

'duration': 4060,

1385

'upload_date': '20151119',

1386

'uploader': 'Bernie Sanders',

1387

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1388

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1389

'license': 'Creative Commons Attribution license (reuse allowed)',

1390

},

1391

'params': {

1392

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1397

'only_matching': True,

1398

},

1399

{

1400

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1401

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1402

'only_matching': True,

1403

},

1404

{

1405

# Rental video preview

1406

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1411

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1412

'upload_date': '20150811',

1413

'uploader': 'FlixMatrix',

1414

'uploader_id': 'FlixMatrixKaravan',

1415

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1416

'license': 'Standard YouTube License',

1417

},

1418

'params': {

1419

'skip_download': True,

1420

},

1421

'skip': 'This video is not available.',

1422

},

1423

{

1424

# YouTube Red video with episode data

1425

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1430

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1431

'duration': 2085,

1432

'upload_date': '20170118',

1433

'uploader': 'Vsauce',

1434

'uploader_id': 'Vsauce',

1435

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1436

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

1442

},

1443

'expected_warnings': [

1444

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1449

# as inappropriate or offensive to some audiences.

1450

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1455

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1456

'duration': 965,

1457

'upload_date': '20140124',

1458

'uploader': 'New Century Foundation',

1459

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1460

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1461

},

1462

'params': {

1463

'skip_download': True,

1464

},

1465

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1470

'only_matching': True,

1471

},

1472

{

1473

# geo restricted to JP

1474

'url': 'sJL6WA-aGkQ',

1475

'only_matching': True,

1476

},

1477

{

1478

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1479

'only_matching': True,

1480

},

1481

{

1482

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1483

'only_matching': True,

1484

},

1485

{

1486

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1487

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1488

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1493

'only_matching': True,

1494

},

1495

{

1496

# Video with unsupported adaptive stream type formats

1497

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1502

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1503

'duration': 433,

1504

'upload_date': '20130923',

1505

'uploader': 'Amelia Putri Harwita',

1506

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1507

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1508

'formats': 'maxcount:10',

1509

},

1510

'params': {

1511

'skip_download': True,

1512

'youtube_include_dash_manifest': False,

1513

},

1514

'skip': 'not actual anymore',

1515

},

1516

{

1517

# Youtube Music Auto-generated description

1518

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1523

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1524

'upload_date': '20190312',

1525

'uploader': 'Stephen - Topic',

1526

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1527

'artist': 'Stephen',

1528

'track': 'Voyeur Girl',

1529

'album': 'it\'s too much love to know my dear',

1530

'release_date': '20190313',

1531

'release_year': 2019,

1532

},

1533

'params': {

1534

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1539

'only_matching': True,

1540

},

1541

{

1542

# invalid -> valid video id redirection

1543

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1548

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1549

'upload_date': '20090125',

1550

'uploader': 'Prochorowka',

1551

'uploader_id': 'Prochorowka',

1552

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1553

'artist': 'Panjabi MC',

1554

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1555

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1556

},

1557

'params': {

1558

'skip_download': True,

1559

},

1560

'skip': 'Video unavailable',

1561

},

1562

{

1563

# empty description results in an empty string

1564

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1571

'uploader_id': 'ElevageOrVert',

1572

'uploader': 'ElevageOrVert',

1573

},

1574

'params': {

1575

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1580

# see [2] for an example with '};' inside ytInitialPlayerResponse

1581

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1582

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1583

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1588

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1589

'upload_date': '20130831',

1590

'uploader_id': 'kudvenkat',

1591

'uploader': 'kudvenkat',

1592

},

1593

'params': {

1594

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1599

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1600

'only_matching': True,

1601

},

1602

{

1603

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1604

'only_matching': True,

1605

},

1606

{

1607

# https://github.com/ytdl-org/youtube-dl/pull/28094

1608

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1614

'upload_date': '20141120',

1615

'uploader': 'The Cinematic Orchestra - Topic',

1616

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1617

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1618

'artist': 'The Cinematic Orchestra',

1619

'track': 'Burn Out',

1620

'album': 'Every Day',

1621

'release_data': None,

1622

'release_year': None,

1623

},

1624

'params': {

1625

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1630

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1631

'only_matching': True,

1632

},

1633

{

1634

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1635

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1640

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1641

'upload_date': '20201120',

1642

'uploader': 'Walk around Japan',

1643

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1644

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1645

},

1646

'params': {

1647

'skip_download': True,

1648

},

1649

}, {

1650

# Has multiple audio streams

1651

'url': 'WaOKSUlf4TM',

1652

'only_matching': True

1653

}, {

1654

# Requires Premium: has format 141 when requested using YTM url

1655

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

1656

'only_matching': True

1657

}, {

1658

# multiple subtitles with same lang_code

1659

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

1660

'only_matching': True,

1661

}, {

1662

# Force use android client fallback

1663

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

1664

'info_dict': {

1665

'id': 'YOelRv7fMxY',

1666

'title': 'Digging a Secret Tunnel from my Workshop',

1667

'ext': '3gp',

1668

'upload_date': '20210624',

1669

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

1670

'uploader': 'colinfurze',

1671

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

1672

'description': 'md5:ecb672623246d98c6c562eed6ae798c3'

1673

},

1674

'params': {

1675

'format': '17', # 3gp format available on android

1676

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

1681

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

1682

'only_matching': True,

1683

'params': {

1684

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}

]

@classmethod

def suitable(cls, url):

1691

# Hack for lazy extractors until more generic solution is implemented

1692

# (see #28780)

1693

from .youtube import parse_qs

1694

qs = parse_qs(url)

1695

if qs.get('list', [None])[0]:

1696

return False

1697

return super(YoutubeIE, cls).suitable(url)

1698

1699

def __init__(self, *args, **kwargs):

1700

super(YoutubeIE, self).__init__(*args, **kwargs)

1701

self._code_cache = {}

1702

self._player_cache = {}

1703

1704

def _extract_player_url(self, ytcfg=None, webpage=None):

1705

player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)

1706

if not player_url:

1707

player_url = self._search_regex(

1708

r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',

1709

webpage, 'player URL', fatal=False)

1710

if player_url.startswith('//'):

1711

player_url = 'https:' + player_url

1712

elif not re.match(r'https?://', player_url):

1713

player_url = compat_urlparse.urljoin(

1714

'https://www.youtube.com', player_url)

1715

return player_url

1716

1717

def _signature_cache_id(self, example_sig):

1718

""" Return a string representation of a signature """

1719

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1720

1721

@classmethod

1722

def _extract_player_info(cls, player_url):

1723

for player_re in cls._PLAYER_INFO_RE:

1724

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1729

return id_m.group('id')

1730

1731

def _load_player(self, video_id, player_url, fatal=True) -> bool:

1732

player_id = self._extract_player_info(player_url)

1733

if player_id not in self._code_cache:

1734

self._code_cache[player_id] = self._download_webpage(

1735

player_url, video_id, fatal=fatal,

1736

note='Downloading player ' + player_id,

1737

errnote='Download of %s failed' % player_url)

1738

return player_id in self._code_cache

1739

1740

def _extract_signature_function(self, video_id, player_url, example_sig):

1741

player_id = self._extract_player_info(player_url)

1742

1743

# Read from filesystem cache

1744

func_id = 'js_%s_%s' % (

1745

player_id, self._signature_cache_id(example_sig))

1746

assert os.path.basename(func_id) == func_id

1747

1748

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1749

if cache_spec is not None:

1750

return lambda s: ''.join(s[i] for i in cache_spec)

1751

1752

if self._load_player(video_id, player_url):

1753

code = self._code_cache[player_id]

1754

res = self._parse_sig_js(code)

1755

1756

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1757

cache_res = res(test_string)

1758

cache_spec = [ord(c) for c in cache_res]

1759

1760

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1761

return res

1762

1763

def _print_sig_code(self, func, example_sig):

1764

def gen_sig_code(idxs):

1765

def _genslice(start, end, step):

1766

starts = '' if start == 0 else str(start)

1767

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1768

steps = '' if step == 1 else (':%d' % step)

1769

return 's[%s%s%s]' % (starts, ends, steps)

1770

1771

step = None

1772

# Quelch pyflakes warnings - start will be set when step is set

1773

start = '(Never used)'

1774

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1779

step = None

1780

continue

1781

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1791

1792

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1793

cache_res = func(test_string)

1794

cache_spec = [ord(c) for c in cache_res]

1795

expr_code = ' + '.join(gen_sig_code(cache_spec))

1796

signature_id_tuple = '(%s)' % (

1797

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1798

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1799

' return %s\n') % (signature_id_tuple, expr_code)

1800

self.to_screen('Extracted signature function:\n' + code)

1801

1802

def _parse_sig_js(self, jscode):

1803

funcname = self._search_regex(

1804

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1805

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1806

r'\bm=(?P<sig>[a-zA-Z0-9$]{2})$decodeURIComponent\(h\.s$\)',

1807

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c$\)',

1808

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

1809

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1810

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1811

# Obsolete patterns

1812

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1813

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1814

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1815

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1816

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1817

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1818

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1819

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1820

jscode, 'Initial JS player signature function name', group='sig')

1821

1822

jsi = JSInterpreter(jscode)

1823

initial_function = jsi.extract_function(funcname)

1824

return lambda s: initial_function([s])

1825

1826

def _decrypt_signature(self, s, video_id, player_url):

1827

"""Turn the encrypted s field into a working signature"""

1828

1829

if player_url is None:

1830

raise ExtractorError('Cannot decrypt signature without player_url')

1831

1832

try:

1833

player_id = (player_url, self._signature_cache_id(s))

1834

if player_id not in self._player_cache:

1835

func = self._extract_signature_function(

1836

video_id, player_url, s

1837

)

1838

self._player_cache[player_id] = func

1839

func = self._player_cache[player_id]

1840

if self.get_param('youtube_print_sig_code'):

1841

self._print_sig_code(func, s)

1842

return func(s)

1843

except Exception as e:

1844

tb = traceback.format_exc()

1845

raise ExtractorError(

1846

'Signature extraction failed: ' + tb, cause=e)

1847

1848

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

1849

"""

1850

Extract signatureTimestamp (sts)

1851

Required to tell API what sig/player version is in use.

1852

"""

1853

sts = None

1854

if isinstance(ytcfg, dict):

1855

sts = int_or_none(ytcfg.get('STS'))

1856

1857

if not sts:

1858

# Attempt to extract from player

1859

if player_url is None:

1860

error_msg = 'Cannot extract signature timestamp without player_url.'

1861

if fatal:

1862

raise ExtractorError(error_msg)

1863

self.report_warning(error_msg)

1864

return

1865

if self._load_player(video_id, player_url, fatal=fatal):

1866

player_id = self._extract_player_info(player_url)

1867

code = self._code_cache[player_id]

1868

sts = int_or_none(self._search_regex(

1869

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

1870

'JS player signature timestamp', group='sts', fatal=fatal))

1871

return sts

1872

1873

def _mark_watched(self, video_id, player_response):

1874

playback_url = url_or_none(try_get(

1875

player_response,

1876

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))

1877

if not playback_url:

1878

return

1879

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1880

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1881

1882

# cpn generation algorithm is reverse engineered from base.js.

1883

# In fact it works even with dummy cpn.

1884

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1885

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1892

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1893

1894

self._download_webpage(

1895

playback_url, video_id, 'Marking watched',

1896

'Unable to mark watched', fatal=False)

1897

1898

@staticmethod

1899

def _extract_urls(webpage):

1900

# Embedded YouTube player

1901

entries = [

1902

unescapeHTML(mobj.group('url'))

1903

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1914

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1915

\1''', webpage)]

1916

1917

# lazyYT YouTube embed

1918

entries.extend(list(map(

1919

unescapeHTML,

1920

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1921

1922

# Wordpress "YouTube Video Importer" plugin

1923

matches = re.findall(r'''(?x)<div[^>]+

1924

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1925

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1926

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1932

urls = YoutubeIE._extract_urls(webpage)

1933

return urls[0] if urls else None

1934

1935

@classmethod

1936

def extract_id(cls, url):

1937

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1938

if mobj is None:

1939

raise ExtractorError('Invalid URL: %s' % url)

1940

video_id = mobj.group(2)

1941

return video_id

1942

1943

def _extract_chapters_from_json(self, data, video_id, duration):

1944

chapters_list = try_get(

1945

data,

1946

lambda x: x['playerOverlays']

1947

['playerOverlayRenderer']

1948

['decoratedPlayerBarRenderer']

1949

['decoratedPlayerBarRenderer']

1950

['playerBar']

1951

['chapteredPlayerBarRenderer']

1952

['chapters'],

1953

list)

1954

if not chapters_list:

1955

return

1956

1957

def chapter_time(chapter):

1958

return float_or_none(

1959

try_get(

1960

chapter,

1961

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1966

start_time = chapter_time(chapter)

1967

if start_time is None:

1968

continue

1969

end_time = (chapter_time(chapters_list[next_num])

1970

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1975

compat_str)

1976

chapters.append({

1977

'start_time': start_time,

1978

'end_time': end_time,

'title': title,

})

return chapters

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

1984

return self._parse_json(self._search_regex(

1985

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

1986

regex), webpage, name, default='{}'), video_id, fatal=False)

1987

1988

@staticmethod

1989

def parse_time_text(time_text):

1990

"""

1991

Parse the comment time text

1992

time_text is in the format 'X units ago (edited)'

1993

"""

1994

time_text_split = time_text.split(' ')

1995

if len(time_text_split) >= 3:

1996

return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')

1997

1998

def _extract_comment(self, comment_renderer, parent=None):

1999

comment_id = comment_renderer.get('commentId')

2000

if not comment_id:

2001

return

2002

comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []

2003

text = self._join_text_entries(comment_text_runs) or ''

2004

comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []

2005

time_text = self._join_text_entries(comment_time_text)

2006

# note: timestamp is an estimate calculated from the current time and time_text

2007

timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())

2008

author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)

2009

author_id = try_get(comment_renderer,

2010

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2011

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2012

lambda x: x['likeCount']), compat_str)) or 0

2013

author_thumbnail = try_get(comment_renderer,

2014

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2015

2016

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2017

is_favorited = 'creatorHeart' in (try_get(

2018

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2023

'time_text': time_text,

2024

'like_count': votes,

2025

'is_favorited': is_favorited,

2026

'author': author,

2027

'author_id': author_id,

2028

'author_thumbnail': author_thumbnail,

2029

'author_is_uploader': author_is_uploader,

2030

'parent': parent or 'root'

2031

}

2032

2033

def _comment_entries(self, root_continuation_data, identity_token, account_syncid,

2034

ytcfg, video_id, parent=None, comment_counts=None):

2035

2036

def extract_header(contents):

2037

_total_comments = 0

2038

_continuation = None

2039

for content in contents:

2040

comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])

2041

expected_comment_count = try_get(comments_header_renderer,

2042

(lambda x: x['countText']['runs'][0]['text'],

2043

lambda x: x['commentsCount']['runs'][0]['text']),

2044

compat_str)

2045

if expected_comment_count:

2046

comment_counts[1] = str_to_int(expected_comment_count)

2047

self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))

2048

_total_comments = comment_counts[1]

2049

sort_mode_str = self._configuration_arg('comment_sort', [''])[0]

2050

comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top

2051

2052

sort_menu_item = try_get(

2053

comments_header_renderer,

2054

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2055

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2056

2057

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2058

if not _continuation:

2059

continue

2060

2061

sort_text = sort_menu_item.get('title')

2062

if isinstance(sort_text, compat_str):

2063

sort_text = sort_text.lower()

2064

else:

2065

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2066

self.to_screen('Sorting comments by %s' % sort_text)

2067

break

2068

return _total_comments, _continuation

2069

2070

def extract_thread(contents):

2071

if not parent:

2072

comment_counts[2] = 0

2073

for content in contents:

2074

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2075

comment_renderer = try_get(

2076

comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(

2077

content, (lambda x: x['commentRenderer'], dict))

2078

2079

if not comment_renderer:

2080

continue

2081

comment = self._extract_comment(comment_renderer, parent)

2082

if not comment:

2083

continue

2084

comment_counts[0] += 1

2085

yield comment

2086

# Attempt to get the replies

2087

comment_replies_renderer = try_get(

2088

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2089

2090

if comment_replies_renderer:

2091

comment_counts[2] += 1

2092

comment_entries_iter = self._comment_entries(

2093

comment_replies_renderer, identity_token, account_syncid, ytcfg,

2094

video_id, parent=comment.get('id'), comment_counts=comment_counts)

2095

2096

for reply_comment in comment_entries_iter:

2097

yield reply_comment

2098

2099

# YouTube comments have a max depth of 2

2100

max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')

2101

if max_depth == 1 and parent:

2102

return

2103

if not comment_counts:

2104

# comment so far, est. total comments, current comment thread #

2105

comment_counts = [0, 0, 0]

2106

2107

continuation = self._extract_continuation(root_continuation_data)

2108

if continuation and len(continuation['ctoken']) < 27:

2109

self.write_debug('Detected old API continuation token. Generating new API compatible token.')

2110

continuation_token = self._generate_comment_continuation(video_id)

2111

continuation = self._build_continuation_query(continuation_token, None)

2112

2113

visitor_data = None

2114

is_first_continuation = parent is None

2115

2116

for page_num in itertools.count(0):

2117

if not continuation:

2118

break

2119

headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)

2120

comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])

2121

if page_num == 0:

2122

if is_first_continuation:

2123

note_prefix = 'Downloading comment section API JSON'

2124

else:

2125

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2126

comment_counts[2], comment_prog_str)

2127

else:

2128

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2129

' ' if parent else '', ' replies' if parent else '',

2130

page_num, comment_prog_str)

2131

2132

response = self._extract_response(

2133

item_id=None, query=self._continuation_query_ajax_to_api(continuation),

2134

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2135

check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))

2136

if not response:

2137

break

2138

visitor_data = try_get(

2139

response,

2140

lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],

2141

compat_str) or visitor_data

2142

2143

continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))

2144

2145

continuation = None

2146

if isinstance(continuation_contents, list):

2147

for continuation_section in continuation_contents:

2148

if not isinstance(continuation_section, dict):

2149

continue

2150

continuation_items = try_get(

2151

continuation_section,

2152

(lambda x: x['reloadContinuationItemsCommand']['continuationItems'],

2153

lambda x: x['appendContinuationItemsAction']['continuationItems']),

2154

list) or []

2155

if is_first_continuation:

2156

total_comments, continuation = extract_header(continuation_items)

2157

if total_comments:

2158

yield total_comments

2159

is_first_continuation = False

if continuation:

break

continue

count = 0

for count, entry in enumerate(extract_thread(continuation_items)):

2165

yield entry

2166

continuation = self._extract_continuation({'contents': continuation_items})

2167

if continuation:

2168

# Sometimes YouTube provides a continuation without any comments

2169

# In most cases we end up just downloading these with very little comments to come.

2170

if count == 0:

2171

if not parent:

2172

self.report_warning('No comments received - assuming end of comments')

continuation = None

break

# Deprecated response structure

2177

elif isinstance(continuation_contents, dict):

2178

known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')

2179

for key, continuation_renderer in continuation_contents.items():

2180

if key not in known_continuation_renderers:

2181

continue

2182

if not isinstance(continuation_renderer, dict):

2183

continue

2184

if is_first_continuation:

2185

header_continuation_items = [continuation_renderer.get('header') or {}]

2186

total_comments, continuation = extract_header(header_continuation_items)

2187

if total_comments:

2188

yield total_comments

2189

is_first_continuation = False

if continuation:

break

# Sometimes YouTube provides a continuation without any comments

2194

# In most cases we end up just downloading these with very little comments to come.

2195

count = 0

2196

for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):

2197

yield entry

2198

continuation = self._extract_continuation(continuation_renderer)

2199

if count == 0:

2200

if not parent:

2201

self.report_warning('No comments received - assuming end of comments')

continuation = None

break

@staticmethod

def _generate_comment_continuation(video_id):

2207

"""

2208

Generates initial comment section continuation token from given video id

2209

"""

2210

b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))

2211

parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')

2212

new_continuation_intlist = list(itertools.chain.from_iterable(

2213

[bytes_to_intlist(base64.b64decode(part)) for part in parts]))

2214

return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')

2215

2216

def _extract_comments(self, ytcfg, video_id, contents, webpage):

2217

"""Entry for comment extraction"""

2218

def _real_comment_extract(contents):

2219

if isinstance(contents, list):

2220

for entry in contents:

2221

for key, renderer in entry.items():

2222

if key not in known_entry_comment_renderers:

2223

continue

2224

yield from self._comment_entries(

2225

renderer, video_id=video_id, ytcfg=ytcfg,

2226

identity_token=self._extract_identity_token(webpage, item_id=video_id),

2227

account_syncid=self._extract_account_syncid(ytcfg))

2228

break

2229

comments = []

2230

known_entry_comment_renderers = ('itemSectionRenderer',)

2231

estimated_total = 0

2232

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')

2233

2234

try:

2235

for comment in _real_comment_extract(contents):

2236

if len(comments) >= max_comments:

2237

break

2238

if isinstance(comment, int):

2239

estimated_total = comment

2240

continue

2241

comments.append(comment)

2242

except KeyboardInterrupt:

2243

self.to_screen('Interrupted by user')

2244

self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))

2245

return {

2246

'comments': comments,

2247

'comment_count': len(comments),

}

@staticmethod

def _generate_player_context(sts=None):

2252

context = {

2253

'html5Preference': 'HTML5_PREF_WANTS',

2254

}

2255

if sts is not None:

2256

context['signatureTimestamp'] = sts

2257

return {

2258

'playbackContext': {

2259

'contentPlaybackContext': context

}

}

@staticmethod

def _get_video_info_params(video_id, client='TVHTML5'):

GVI_CLIENTS = {

'ANDROID': {

'c': 'ANDROID',

'cver': '16.20',

},

'TVHTML5': {

'c': 'TVHTML5',

'cver': '6.20180913',

}

}

query = {

'video_id': video_id,

2277

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

2278

'html5': '1'

2279

}

2280

query.update(GVI_CLIENTS.get(client))

2281

return query

2282

2283

def _real_extract(self, url):

2284

url, smuggled_data = unsmuggle_url(url, {})

2285

video_id = self._match_id(url)

2286

2287

is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)

2288

2289

base_url = self.http_scheme() + '//www.youtube.com/'

2290

webpage_url = base_url + 'watch?v=' + video_id

2291

webpage = self._download_webpage(

2292

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

2293

2294

ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

2295

identity_token = self._extract_identity_token(webpage, video_id)

2296

syncid = self._extract_account_syncid(ytcfg)

2297

headers = self._generate_api_headers(ytcfg, identity_token, syncid)

2298

2299

player_url = self._extract_player_url(ytcfg, webpage)

2300

2301

player_client = self._configuration_arg('player_client', [''])[0]

2302

if player_client not in ('web', 'android', ''):

2303

self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')

2304

force_mobile_client = player_client != 'web'

2305

player_skip = self._configuration_arg('player_skip')

def get_text(x):

if not x:

return

text = x.get('simpleText')

2311

if text and isinstance(text, compat_str):

2312

return text

2313

runs = x.get('runs')

2314

if not isinstance(runs, list):

2315

return

2316

return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])

2317

2318

ytm_streaming_data = {}

2319

if is_music_url:

2320

ytm_webpage = None

2321

sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)

2322

if sts and not force_mobile_client and 'configs' not in player_skip:

2323

ytm_webpage = self._download_webpage(

2324

'https://music.youtube.com',

2325

video_id, fatal=False, note='Downloading remix client config')

2326

2327

ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}

2328

ytm_client = 'WEB_REMIX'

2329

if not sts or force_mobile_client:

2330

# Android client already has signature descrambled

2331

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562

2332

if not sts:

2333

self.report_warning('Falling back to android remix client for player API.')

2334

ytm_client = 'ANDROID_MUSIC'

2335

ytm_cfg = {}

2336

2337

ytm_headers = self._generate_api_headers(

2338

ytm_cfg, identity_token, syncid,

2339

client=ytm_client)

2340

ytm_query = {'videoId': video_id}

2341

ytm_query.update(self._generate_player_context(sts))

2342

2343

ytm_player_response = self._extract_response(

2344

item_id=video_id, ep='player', query=ytm_query,

2345

ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,

2346

default_client=ytm_client,

2347

note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))

2348

ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}

2349

2350

player_response = None

2351

if webpage:

2352

player_response = self._extract_yt_initial_variable(

2353

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2354

video_id, 'initial player response')

2355

2356

if not player_response or force_mobile_client:

2357

sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)

2358

yt_client = 'WEB'

2359

ytpcfg = ytcfg

2360

ytp_headers = headers

2361

if not sts or force_mobile_client:

2362

# Android client already has signature descrambled

2363

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562

2364

if not sts:

2365

self.report_warning('Falling back to android client for player API.')

2366

yt_client = 'ANDROID'

2367

ytpcfg = {}

2368

ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)

2369

2370

yt_query = {'videoId': video_id}

2371

yt_query.update(self._generate_player_context(sts))

2372

player_response = self._extract_response(

2373

item_id=video_id, ep='player', query=yt_query,

2374

ytcfg=ytpcfg, headers=ytp_headers, fatal=False,

2375

default_client=yt_client,

2376

note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')

2377

) or player_response

2378

2379

# Age-gate workarounds

2380

playability_status = player_response.get('playabilityStatus') or {}

2381

if playability_status.get('reason') in self._AGE_GATE_REASONS:

2382

gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')

2383

for gvi_client in gvi_clients:

2384

pr = self._parse_json(try_get(compat_parse_qs(

2385

self._download_webpage(

2386

base_url + 'get_video_info', video_id,

2387

'Refetching age-gated %s info webpage' % gvi_client.lower(),

2388

'unable to download video info webpage', fatal=False,

2389

query=self._get_video_info_params(video_id, client=gvi_client))),

2390

lambda x: x['player_response'][0],

2391

compat_str) or '{}', video_id)

if pr:

break

if not pr:

self.report_warning('Falling back to embedded-only age-gate workaround.')

2396

embed_webpage = None

2397

sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)

2398

if sts and not force_mobile_client and 'configs' not in player_skip:

2399

embed_webpage = self._download_webpage(

2400

'https://www.youtube.com/embed/%s?html5=1' % video_id,

2401

video_id=video_id, note='Downloading age-gated embed config')

2402

2403

ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}

2404

# If we extracted the embed webpage, it'll tell us if we can view the video

2405

embedded_pr = self._parse_json(

2406

try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',

2407

video_id=video_id)

2408

embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''

2409

if embedded_ps_reason not in self._AGE_GATE_REASONS:

2410

yt_client = 'WEB_EMBEDDED_PLAYER'

2411

if not sts or force_mobile_client:

2412

# Android client already has signature descrambled

2413

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562

2414

if not sts:

2415

self.report_warning(

2416

'Falling back to android embedded client for player API (note: some formats may be missing).')

2417

yt_client = 'ANDROID_EMBEDDED_PLAYER'

2418

ytcfg_age = {}

2419

2420

ytage_headers = self._generate_api_headers(

2421

ytcfg_age, identity_token, syncid, client=yt_client)

2422

yt_age_query = {'videoId': video_id}

2423

yt_age_query.update(self._generate_player_context(sts))

2424

pr = self._extract_response(

2425

item_id=video_id, ep='player', query=yt_age_query,

2426

ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,

2427

default_client=yt_client,

2428

note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')

) or {}

if pr:

player_response = pr

trailer_video_id = try_get(

2435

playability_status,

2436

lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],

2437

compat_str)

2438

if trailer_video_id:

2439

return self.url_result(

2440

trailer_video_id, self.ie_key(), trailer_video_id)

2441

2442

search_meta = (

2443

lambda x: self._html_search_meta(x, webpage, default=None)) \

2444

if webpage else lambda x: None

2445

2446

video_details = player_response.get('videoDetails') or {}

2447

microformat = try_get(

2448

player_response,

2449

lambda x: x['microformat']['playerMicroformatRenderer'],

2450

dict) or {}

2451

video_title = video_details.get('title') \

2452

or get_text(microformat.get('title')) \

2453

or search_meta(['og:title', 'twitter:title', 'title'])

2454

video_description = video_details.get('shortDescription')

2455

2456

if not smuggled_data.get('force_singlefeed', False):

2457

if not self.get_param('noplaylist'):

2458

multifeed_metadata_list = try_get(

2459

player_response,

2460

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

2461

compat_str)

2462

if multifeed_metadata_list:

2463

entries = []

2464

feed_ids = []

2465

for feed in multifeed_metadata_list.split(','):

2466

# Unquote should take place before split on comma (,) since textual

2467

# fields may contain comma as well (see

2468

# https://github.com/ytdl-org/youtube-dl/issues/8536)

2469

feed_data = compat_parse_qs(

2470

compat_urllib_parse_unquote_plus(feed))

2471

2472

def feed_entry(name):

2473

return try_get(

2474

feed_data, lambda x: x[name][0], compat_str)

2475

2476

feed_id = feed_entry('id')

2477

if not feed_id:

2478

continue

2479

feed_title = feed_entry('title')

2480

title = video_title

2481

if feed_title:

2482

title += ' (%s)' % feed_title

2483

entries.append({

2484

'_type': 'url_transparent',

2485

'ie_key': 'Youtube',

2486

'url': smuggle_url(

2487

base_url + 'watch?v=' + feed_data['id'][0],

2488

{'force_singlefeed': True}),

2489

'title': title,

2490

})

2491

feed_ids.append(feed_id)

2492

self.to_screen(

2493

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

2494

% (', '.join(feed_ids), video_id))

2495

return self.playlist_result(

2496

entries, video_id, video_title, video_description)

2497

else:

2498

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

2499

2500

formats, itags, stream_ids = [], [], []

2501

itag_qualities = {}

2502

q = qualities([

2503

# "tiny" is the smallest video-only format. But some audio-only formats

2504

# was also labeled "tiny". It is not clear if such formats still exist

2505

'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

2506

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

2507

])

2508

2509

streaming_data = player_response.get('streamingData') or {}

2510

streaming_formats = streaming_data.get('formats') or []

2511

streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])

2512

streaming_formats.extend(ytm_streaming_data.get('formats') or [])

2513

streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])

2514

2515

for fmt in streaming_formats:

2516

if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):

2517

continue

2518

2519

itag = str_or_none(fmt.get('itag'))

2520

audio_track = fmt.get('audioTrack') or {}

2521

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

2522

if stream_id in stream_ids:

2523

continue

2524

2525

quality = fmt.get('quality')

2526

if quality == 'tiny' or not quality:

2527

quality = fmt.get('audioQuality', '').lower() or quality

2528

if itag and quality:

2529

itag_qualities[itag] = quality

2530

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

2531

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

2532

# number of fragment that would subsequently requested with (`&sq=N`)

2533

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

2534

continue

2535

2536

fmt_url = fmt.get('url')

2537

if not fmt_url:

2538

sc = compat_parse_qs(fmt.get('signatureCipher'))

2539

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

2540

encrypted_sig = try_get(sc, lambda x: x['s'][0])

2541

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

2546

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

2547

fmt_url += '&' + sp + '=' + signature

if itag:

itags.append(itag)

stream_ids.append(stream_id)

2552

2553

tbr = float_or_none(

2554

fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

2555

dct = {

2556

'asr': int_or_none(fmt.get('audioSampleRate')),

2557

'filesize': int_or_none(fmt.get('contentLength')),

2558

'format_id': itag,

2559

'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,

2560

'fps': int_or_none(fmt.get('fps')),

2561

'height': int_or_none(fmt.get('height')),

2562

'quality': q(quality),

2563

'tbr': tbr,

2564

'url': fmt_url,

2565

'width': fmt.get('width'),

2566

'language': audio_track.get('id', '').split('.')[0],

2567

}

2568

mime_mobj = re.match(

2569

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

2570

if mime_mobj:

2571

dct['ext'] = mimetype2ext(mime_mobj.group(1))

2572

dct.update(parse_codecs(mime_mobj.group(2)))

2573

# The 3gp format in android client has a quality of "small",

2574

# but is actually worse than all other formats

2575

if dct['ext'] == '3gp':

2576

dct['quality'] = q('tiny')

2577

no_audio = dct.get('acodec') == 'none'

2578

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

2584

dct['downloader_options'] = {

2585

# Youtube throttles chunks >~10M

2586

'http_chunk_size': 10485760,

2587

}

2588

if dct.get('ext'):

2589

dct['container'] = dct['ext'] + '_dash'

2590

formats.append(dct)

2591

2592

skip_manifests = self._configuration_arg('skip')

2593

get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)

2594

get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)

2595

2596

for sd in (streaming_data, ytm_streaming_data):

2597

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

2598

if hls_manifest_url:

2599

for f in self._extract_m3u8_formats(

2600

hls_manifest_url, video_id, 'mp4', fatal=False):

2601

itag = self._search_regex(

2602

r'/itag/(\d+)', f['url'], 'itag', default=None)

2603

if itag:

2604

f['format_id'] = itag

2605

formats.append(f)

2606

2607

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

2608

if dash_manifest_url:

2609

for f in self._extract_mpd_formats(

2610

dash_manifest_url, video_id, fatal=False):

2611

itag = f['format_id']

2612

if itag in itags:

2613

continue

2614

if itag in itag_qualities:

2615

f['quality'] = q(itag_qualities[itag])

2616

filesize = int_or_none(self._search_regex(

2617

r'/clen/(\d+)', f.get('fragment_base_url')

2618

or f['url'], 'file size', default=None))

2619

if filesize:

2620

f['filesize'] = filesize

formats.append(f)

if not formats:

if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):

2625

self.raise_no_formats(

2626

'This video is DRM protected.', expected=True)

2627

pemr = try_get(

2628

playability_status,

2629

lambda x: x['errorScreen']['playerErrorMessageRenderer'],

2630

dict) or {}

2631

reason = get_text(pemr.get('reason')) or playability_status.get('reason')

2632

subreason = pemr.get('subreason')

2633

if subreason:

2634

subreason = clean_html(get_text(subreason))

2635

if subreason == 'The uploader has not made this video available in your country.':

2636

countries = microformat.get('availableCountries')

2637

if not countries:

2638

regions_allowed = search_meta('regionsAllowed')

2639

countries = regions_allowed.split(',') if regions_allowed else None

2640

self.raise_geo_restricted(subreason, countries, metadata_available=True)

2641

reason += '\n' + subreason

2642

if reason:

2643

self.raise_no_formats(reason, expected=True)

2644

2645

self._sort_formats(formats)

2646

2647

keywords = video_details.get('keywords') or []

2648

if not keywords and webpage:

2649

keywords = [

2650

unescapeHTML(m.group('content'))

2651

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

2652

for keyword in keywords:

2653

if keyword.startswith('yt:stretch='):

2654

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

2655

if mobj:

2656

# NB: float is intentional for forcing float division

2657

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2662

f['stretched_ratio'] = ratio

break

thumbnails = []

for container in (video_details, microformat):

2667

for thumbnail in (try_get(

2668

container,

2669

lambda x: x['thumbnail']['thumbnails'], list) or []):

2670

thumbnail_url = thumbnail.get('url')

2671

if not thumbnail_url:

2672

continue

2673

# Sometimes youtube gives a wrong thumbnail URL. See:

2674

# https://github.com/yt-dlp/yt-dlp/issues/233

2675

# https://github.com/ytdl-org/youtube-dl/issues/28023

2676

if 'maxresdefault' in thumbnail_url:

2677

thumbnail_url = thumbnail_url.split('?')[0]

2678

thumbnails.append({

2679

'url': thumbnail_url,

2680

'height': int_or_none(thumbnail.get('height')),

2681

'width': int_or_none(thumbnail.get('width')),

2682

'preference': 1 if 'maxresdefault' in thumbnail_url else -1

2683

})

2684

thumbnail_url = search_meta(['og:image', 'twitter:image'])

2685

if thumbnail_url:

2686

thumbnails.append({

2687

'url': thumbnail_url,

2688

'preference': 1 if 'maxresdefault' in thumbnail_url else -1

2689

})

2690

# All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage

2691

# See: https://github.com/ytdl-org/youtube-dl/issues/29049

2692

thumbnails.append({

2693

'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,

2694

'preference': 1,

2695

})

2696

self._remove_duplicate_formats(thumbnails)

2697

2698

category = microformat.get('category') or search_meta('genre')

2699

channel_id = video_details.get('channelId') \

2700

or microformat.get('externalChannelId') \

2701

or search_meta('channelId')

2702

duration = int_or_none(

2703

video_details.get('lengthSeconds')

2704

or microformat.get('lengthSeconds')) \

2705

or parse_duration(search_meta('duration'))

2706

is_live = video_details.get('isLive')

2707

is_upcoming = video_details.get('isUpcoming')

2708

owner_profile_url = microformat.get('ownerProfileUrl')

info = {

'id': video_id,

'title': self._live_title(video_title) if is_live else video_title,

2713

'formats': formats,

2714

'thumbnails': thumbnails,

2715

'description': video_description,

2716

'upload_date': unified_strdate(

2717

microformat.get('uploadDate')

2718

or search_meta('uploadDate')),

2719

'uploader': video_details['author'],

2720

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

2721

'uploader_url': owner_profile_url,

2722

'channel_id': channel_id,

2723

'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,

2724

'duration': duration,

2725

'view_count': int_or_none(

2726

video_details.get('viewCount')

2727

or microformat.get('viewCount')

2728

or search_meta('interactionCount')),

2729

'average_rating': float_or_none(video_details.get('averageRating')),

2730

'age_limit': 18 if (

2731

microformat.get('isFamilySafe') is False

2732

or search_meta('isFamilyFriendly') == 'false'

2733

or search_meta('og:restrictions:age') == '18+') else 0,

2734

'webpage_url': webpage_url,

2735

'categories': [category] if category else None,

2736

'tags': keywords,

2737

'is_live': is_live,

2738

'playable_in_embed': playability_status.get('playableInEmbed'),

2739

'was_live': video_details.get('isLiveContent'),

}

pctr = try_get(

player_response,

lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)

2745

subtitles = {}

2746

if pctr:

2747

def process_language(container, base_url, lang_code, sub_name, query):

2748

lang_subs = container.setdefault(lang_code, [])

2749

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': update_url_query(base_url, query),

'name': sub_name,

})

for caption_track in (pctr.get('captionTracks') or []):

2760

base_url = caption_track.get('baseUrl')

2761

if not base_url:

2762

continue

2763

if caption_track.get('kind') != 'asr':

2764

lang_code = (

2765

remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')

2766

or caption_track.get('languageCode'))

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code,

2771

try_get(caption_track, lambda x: x['name']['simpleText']),

2772

{})

2773

continue

2774

automatic_captions = {}

2775

for translation_language in (pctr.get('translationLanguages') or []):

2776

translation_language_code = translation_language.get('languageCode')

2777

if not translation_language_code:

2778

continue

2779

process_language(

2780

automatic_captions, base_url, translation_language_code,

2781

try_get(translation_language, (

2782

lambda x: x['languageName']['simpleText'],

2783

lambda x: x['languageName']['runs'][0]['text'])),

2784

{'tlang': translation_language_code})

2785

info['automatic_captions'] = automatic_captions

2786

info['subtitles'] = subtitles

2787

2788

parsed_url = compat_urllib_parse_urlparse(url)

2789

for component in [parsed_url.fragment, parsed_url.query]:

2790

query = compat_parse_qs(component)

2791

for k, v in query.items():

2792

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

2793

d_k += '_time'

2794

if d_k not in info and k in s_ks:

2795

info[d_k] = parse_duration(query[k][0])

2796

2797

# Youtube Music Auto-generated description

2798

if video_description:

2799

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

2800

if mobj:

2801

release_year = mobj.group('release_year')

2802

release_date = mobj.group('release_date')

2803

if release_date:

2804

release_date = release_date.replace('-', '')

2805

if not release_year:

2806

release_year = release_date[:4]

2807

info.update({

2808

'album': mobj.group('album'.strip()),

2809

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

2810

'track': mobj.group('track').strip(),

2811

'release_date': release_date,

2812

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

2818

webpage, self._YT_INITIAL_DATA_RE, video_id,

2819

'yt initial data')

2820

if not initial_data:

2821

initial_data = self._extract_response(

2822

item_id=video_id, ep='next', fatal=False,

2823

ytcfg=ytcfg, headers=headers, query={'videoId': video_id},

2824

note='Downloading initial data API JSON')

2825

2826

try:

2827

# This will error if there is no livechat

2828

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

2829

info['subtitles']['live_chat'] = [{

2830

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

2831

'video_id': video_id,

2832

'ext': 'json',

2833

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

2834

}]

2835

except (KeyError, IndexError, TypeError):

pass

if initial_data:

chapters = self._extract_chapters_from_json(

2840

initial_data, video_id, duration)

2841

if not chapters:

2842

for engagment_pannel in (initial_data.get('engagementPanels') or []):

2843

contents = try_get(

2844

engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],

list)

if not contents:

continue

def chapter_time(mmlir):

2850

return parse_duration(

2851

get_text(mmlir.get('timeDescription')))

2852

2853

chapters = []

2854

for next_num, content in enumerate(contents, start=1):

2855

mmlir = content.get('macroMarkersListItemRenderer') or {}

2856

start_time = chapter_time(mmlir)

2857

end_time = chapter_time(try_get(

2858

contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \

2859

if next_num < len(contents) else duration

2860

if start_time is None or end_time is None:

2861

continue

2862

chapters.append({

2863

'start_time': start_time,

2864

'end_time': end_time,

2865

'title': get_text(mmlir.get('title')),

})

if chapters:

break

if chapters:

info['chapters'] = chapters

contents = try_get(

initial_data,

lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],

2875

list) or []

2876

for content in contents:

2877

vpir = content.get('videoPrimaryInfoRenderer')

2878

if vpir:

2879

stl = vpir.get('superTitleLink')

if stl:

stl = get_text(stl)

if try_get(

vpir,

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

2885

info['location'] = stl

2886

else:

2887

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

2888

if mobj:

2889

info.update({

2890

'series': mobj.group(1),

2891

'season_number': int(mobj.group(2)),

2892

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

2897

list) or []):

2898

tbr = tlb.get('toggleButtonRenderer') or {}

2899

for getter, regex in [(

2900

lambda x: x['defaultText']['accessibility']['accessibilityData'],

2901

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

2902

lambda x: x['accessibility'],

2903

lambda x: x['accessibilityData']['accessibilityData'],

2904

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

2905

label = (try_get(tbr, getter, dict) or {}).get('label')

2906

if label:

2907

mobj = re.match(regex, label)

2908

if mobj:

2909

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

2910

break

2911

sbr_tooltip = try_get(

2912

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

2913

if sbr_tooltip:

2914

like_count, dislike_count = sbr_tooltip.split(' / ')

2915

info.update({

2916

'like_count': str_to_int(like_count),

2917

'dislike_count': str_to_int(dislike_count),

2918

})

2919

vsir = content.get('videoSecondaryInfoRenderer')

2920

if vsir:

2921

info['channel'] = get_text(try_get(

2922

vsir,

2923

lambda x: x['owner']['videoOwnerRenderer']['title'],

dict))

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

2928

list) or []

2929

multiple_songs = False

2930

for row in rows:

2931

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

2932

multiple_songs = True

2933

break

2934

for row in rows:

2935

mrr = row.get('metadataRowRenderer') or {}

2936

mrr_title = mrr.get('title')

2937

if not mrr_title:

2938

continue

2939

mrr_title = get_text(mrr['title'])

2940

mrr_contents_text = get_text(mrr['contents'][0])

2941

if mrr_title == 'License':

2942

info['license'] = mrr_contents_text

2943

elif not multiple_songs:

2944

if mrr_title == 'Album':

2945

info['album'] = mrr_contents_text

2946

elif mrr_title == 'Artist':

2947

info['artist'] = mrr_contents_text

2948

elif mrr_title == 'Song':

2949

info['track'] = mrr_contents_text

2950

2951

fallbacks = {

2952

'channel': 'uploader',

2953

'channel_id': 'uploader_id',

2954

'channel_url': 'uploader_url',

2955

}

2956

for to, frm in fallbacks.items():

2957

if not info.get(to):

2958

info[to] = info.get(frm)

2959

2960

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = bool_or_none(video_details.get('isPrivate'))

2966

is_unlisted = bool_or_none(microformat.get('isUnlisted'))

2967

is_membersonly = None

2968

is_premium = None

2969

if initial_data and is_private is not None:

2970

is_membersonly = False

2971

is_premium = False

2972

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

2973

badge_labels = set()

2974

for content in contents:

2975

if not isinstance(content, dict):

2976

continue

2977

badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))

2978

for badge_label in badge_labels:

2979

if badge_label.lower() == 'members only':

2980

is_membersonly = True

2981

elif badge_label.lower() == 'premium':

2982

is_premium = True

2983

elif badge_label.lower() == 'unlisted':

2984

is_unlisted = True

2985

2986

info['availability'] = self._availability(

2987

is_private=is_private,

2988

needs_premium=is_premium,

2989

needs_subscription=is_membersonly,

2990

needs_auth=info['age_limit'] >= 18,

2991

is_unlisted=None if is_private is None else is_unlisted)

2992

2993

# get xsrf for annotations or comments

2994

get_annotations = self.get_param('writeannotations', False)

2995

get_comments = self.get_param('getcomments', False)

2996

if get_annotations or get_comments:

2997

xsrf_token = None

2998

ytcfg = self._extract_ytcfg(video_id, webpage)

2999

if ytcfg:

3000

xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)

3001

if not xsrf_token:

3002

xsrf_token = self._search_regex(

3003

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',

3004

webpage, 'xsrf token', group='xsrf_token', fatal=False)

# annotations

if get_annotations:

invideo_url = try_get(

3009

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

3010

if xsrf_token and invideo_url:

3011

xsrf_field_name = None

3012

if ytcfg:

3013

xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)

3014

if not xsrf_field_name:

3015

xsrf_field_name = self._search_regex(

3016

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

3017

webpage, 'xsrf field name',

3018

group='xsrf_field_name', default='session_token')

3019

info['annotations'] = self._download_webpage(

3020

self._proto_relative_url(invideo_url),

3021

video_id, note='Downloading annotations',

3022

errnote='Unable to download video annotations', fatal=False,

3023

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

3024

3025

if get_comments:

3026

info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)

3027

3028

self.mark_watched(video_id, player_response)

return info

class YoutubeTabIE(YoutubeBaseInfoExtractor):

3034

IE_DESC = 'YouTube.com tab'

3035

_VALID_URL = r'''(?x)

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

invidio\.us

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

3044

(?P<not_channel>

3045

feed/|hashtag/|

3046

(?:playlist|watch)\?.*?\blist=

3047

)|

3048

(?!(?:%s)\b) # Direct URLs

3049

)

3050

(?P<id>[^/?\#&]+)

3051

''' % YoutubeBaseInfoExtractor._RESERVED_NAMES

3052

IE_NAME = 'youtube:tab'

3053

3054

_TESTS = [{

3055

'note': 'playlists, multipage',

3056

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

3057

'playlist_mincount': 94,

3058

'info_dict': {

3059

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

3060

'title': 'Игорь Клейнер - Playlists',

3061

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

3062

'uploader': 'Игорь Клейнер',

3063

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

3064

},

3065

}, {

3066

'note': 'playlists, multipage, different order',

3067

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

3068

'playlist_mincount': 94,

3069

'info_dict': {

3070

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

3071

'title': 'Игорь Клейнер - Playlists',

3072

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

3073

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

3074

'uploader': 'Игорь Клейнер',

3075

},

3076

}, {

3077

'note': 'playlists, series',

3078

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

3079

'playlist_mincount': 5,

3080

'info_dict': {

3081

'id': 'UCYO_jab_esuFRV4b17AJtAw',

3082

'title': '3Blue1Brown - Playlists',

3083

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

3084

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

3085

'uploader': '3Blue1Brown',

3086

},

3087

}, {

3088

'note': 'playlists, singlepage',

3089

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

3090

'playlist_mincount': 4,

3091

'info_dict': {

3092

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

3093

'title': 'ThirstForScience - Playlists',

3094

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

3095

'uploader': 'ThirstForScience',

3096

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

3097

}

3098

}, {

3099

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

3100

'only_matching': True,

3101

}, {

3102

'note': 'basic, single video playlist',

3103

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

3104

'info_dict': {

3105

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

3106

'uploader': 'Sergey M.',

3107

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

3108

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

3113

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

3114

'info_dict': {

3115

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

3116

'uploader': 'Sergey M.',

3117

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

3118

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

3124

'info_dict': {

3125

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3126

'title': 'lex will - Home',

3127

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3128

'uploader': 'lex will',

3129

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3130

},

3131

'playlist_mincount': 2,

3132

}, {

3133

'note': 'Videos tab',

3134

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

3135

'info_dict': {

3136

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3137

'title': 'lex will - Videos',

3138

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3139

'uploader': 'lex will',

3140

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3141

},

3142

'playlist_mincount': 975,

3143

}, {

3144

'note': 'Videos tab, sorted by popular',

3145

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

3146

'info_dict': {

3147

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3148

'title': 'lex will - Videos',

3149

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3150

'uploader': 'lex will',

3151

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3152

},

3153

'playlist_mincount': 199,

3154

}, {

3155

'note': 'Playlists tab',

3156

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

3157

'info_dict': {

3158

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3159

'title': 'lex will - Playlists',

3160

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3161

'uploader': 'lex will',

3162

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3163

},

3164

'playlist_mincount': 17,

3165

}, {

3166

'note': 'Community tab',

3167

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

3168

'info_dict': {

3169

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3170

'title': 'lex will - Community',

3171

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3172

'uploader': 'lex will',

3173

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3174

},

3175

'playlist_mincount': 18,

3176

}, {

3177

'note': 'Channels tab',

3178

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

3179

'info_dict': {

3180

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3181

'title': 'lex will - Channels',

3182

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3183

'uploader': 'lex will',

3184

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3185

},

3186

'playlist_mincount': 12,

3187

}, {

3188

'note': 'Search tab',

3189

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

3190

'playlist_mincount': 40,

3191

'info_dict': {

3192

'id': 'UCYO_jab_esuFRV4b17AJtAw',

3193

'title': '3Blue1Brown - Search - linear algebra',

3194

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

3195

'uploader': '3Blue1Brown',

3196

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

3197

},

3198

}, {

3199

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

3200

'only_matching': True,

3201

}, {

3202

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

3203

'only_matching': True,

3204

}, {

3205

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

3206

'only_matching': True,

3207

}, {

3208

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

3209

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

3210

'info_dict': {

3211

'title': '29C3: Not my department',

3212

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

3213

'uploader': 'Christiaan008',

3214

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

3215

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

3216

},

3217

'playlist_count': 96,

3218

}, {

3219

'note': 'Large playlist',

3220

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

3221

'info_dict': {

3222

'title': 'Uploads from Cauchemar',

3223

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

3224

'uploader': 'Cauchemar',

3225

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

3226

},

3227

'playlist_mincount': 1123,

3228

}, {

3229

'note': 'even larger playlist, 8832 videos',

3230

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

3231

'only_matching': True,

3232

}, {

3233

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

3234

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

3235

'info_dict': {

3236

'title': 'Uploads from Interstellar Movie',

3237

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

3238

'uploader': 'Interstellar Movie',

3239

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

3240

},

3241

'playlist_mincount': 21,

3242

}, {

3243

'note': 'Playlist with "show unavailable videos" button',

3244

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

3245

'info_dict': {

3246

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

3247

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

3248

'uploader': 'Phim Siêu Nhân Nhật Bản',

3249

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

3250

},

3251

'playlist_mincount': 200,

3252

}, {

3253

'note': 'Playlist with unavailable videos in page 7',

3254

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

3255

'info_dict': {

3256

'title': 'Uploads from BlankTV',

3257

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

3258

'uploader': 'BlankTV',

3259

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

3260

},

3261

'playlist_mincount': 1000,

3262

}, {

3263

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

3264

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

3265

'info_dict': {

3266

'title': 'Data Analysis with Dr Mike Pound',

3267

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

3268

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

3269

'uploader': 'Computerphile',

3270

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

3271

},

3272

'playlist_mincount': 11,

3273

}, {

3274

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

3275

'only_matching': True,

3276

}, {

3277

'note': 'Playlist URL that does not actually serve a playlist',

3278

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

3283

'uploader': 'STREEM',

3284

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

3285

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

3286

'upload_date': '20150526',

3287

'license': 'Standard YouTube License',

3288

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

3289

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

3294

},

3295

'params': {

3296

'skip_download': True,

3297

},

3298

'skip': 'This video is not available.',

3299

'add_ie': [YoutubeIE.ie_key()],

3300

}, {

3301

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

3302

'only_matching': True,

3303

}, {

3304

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

3305

'only_matching': True,

3306

}, {

3307

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

3308

'info_dict': {

3309

'id': 'X1whbWASnNQ', # This will keep changing

3310

'ext': 'mp4',

3311

'title': compat_str,

3312

'uploader': 'Sky News',

3313

'uploader_id': 'skynews',

3314

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

3315

'upload_date': r're:\d{8}',

3316

'description': compat_str,

3317

'categories': ['News & Politics'],

3318

'tags': list,

3319

'like_count': int,

3320

'dislike_count': int,

3321

},

3322

'params': {

3323

'skip_download': True,

3324

},

3325

'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],

3326

}, {

3327

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

3332

'uploader': 'The Young Turks',

3333

'uploader_id': 'TheYoungTurks',

3334

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

3335

'upload_date': '20150715',

3336

'license': 'Standard YouTube License',

3337

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

3338

'categories': ['News & Politics'],

3339

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

3340

'like_count': int,

3341

'dislike_count': int,

3342

},

3343

'params': {

3344

'skip_download': True,

3345

},

3346

'only_matching': True,

3347

}, {

3348

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

3349

'only_matching': True,

3350

}, {

3351

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

3352

'only_matching': True,

3353

}, {

3354

'note': 'A channel that is not live. Should raise error',

3355

'url': 'https://www.youtube.com/user/numberphile/live',

3356

'only_matching': True,

3357

}, {

3358

'url': 'https://www.youtube.com/feed/trending',

3359

'only_matching': True,

3360

}, {

3361

'url': 'https://www.youtube.com/feed/library',

3362

'only_matching': True,

3363

}, {

3364

'url': 'https://www.youtube.com/feed/history',

3365

'only_matching': True,

3366

}, {

3367

'url': 'https://www.youtube.com/feed/subscriptions',

3368

'only_matching': True,

3369

}, {

3370

'url': 'https://www.youtube.com/feed/watch_later',

3371

'only_matching': True,

3372

}, {

3373

'note': 'Recommended - redirects to home page',

3374

'url': 'https://www.youtube.com/feed/recommended',

3375

'only_matching': True,

3376

}, {

3377

'note': 'inline playlist with not always working continuations',

3378

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

3379

'only_matching': True,

3380

}, {

3381

'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',

3382

'only_matching': True,

3383

}, {

3384

'url': 'https://www.youtube.com/course',

3385

'only_matching': True,

3386

}, {

3387

'url': 'https://www.youtube.com/zsecurity',

3388

'only_matching': True,

3389

}, {

3390

'url': 'http://www.youtube.com/NASAgovVideo/videos',

3391

'only_matching': True,

3392

}, {

3393

'url': 'https://www.youtube.com/TheYoungTurks/live',

3394

'only_matching': True,

3395

}, {

3396

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

},

'playlist_mincount': 350,

3402

}, {

3403

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

3404

'only_matching': True,

3405

}, {

3406

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

3407

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

3408

'only_matching': True

3409

}, {

3410

'note': '/browse/ should redirect to /channel/',

3411

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

3412

'only_matching': True

3413

}, {

3414

'note': 'VLPL, should redirect to playlist?list=PL...',

3415

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

3416

'info_dict': {

3417

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

3418

'uploader': 'NoCopyrightSounds',

3419

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

3420

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

3421

'title': 'NCS Releases',

3422

},

3423

'playlist_mincount': 166,

3424

}, {

3425

'note': 'Topic, should redirect to playlist?list=UU...',

3426

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

3427

'info_dict': {

3428

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

3429

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

3430

'title': 'Uploads from Royalty Free Music - Topic',

3431

'uploader': 'Royalty Free Music - Topic',

3432

},

3433

'expected_warnings': [

3434

'A channel/user page was given',

3435

'The URL does not have a videos tab',

3436

],

3437

'playlist_mincount': 101,

3438

}, {

3439

'note': 'Topic without a UU playlist',

3440

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

3441

'info_dict': {

3442

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

3443

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

3444

},

3445

'expected_warnings': [

3446

'A channel/user page was given',

3447

'The URL does not have a videos tab',

3448

'Falling back to channel URL',

3449

],

3450

'playlist_mincount': 9,

3451

}, {

3452

'note': 'Youtube music Album',

3453

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

3454

'info_dict': {

3455

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

3456

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

3457

},

3458

'playlist_count': 50,

3459

}, {

3460

'note': 'unlisted single video playlist',

3461

'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

3462

'info_dict': {

3463

'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',

3464

'uploader': 'colethedj',

3465

'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',

3466

'title': 'yt-dlp unlisted playlist test',

3467

'availability': 'unlisted'

},

'playlist_count': 1,

}]

@classmethod

def suitable(cls, url):

3474

return False if YoutubeIE.suitable(url) else super(

3475

YoutubeTabIE, cls).suitable(url)

3476

3477

def _extract_channel_id(self, webpage):

3478

channel_id = self._html_search_meta(

3479

'channelId', webpage, 'channel id', default=None)

3480

if channel_id:

3481

return channel_id

3482

channel_url = self._html_search_meta(

3483

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3484

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3485

'twitter:app:url:googleplay'), webpage, 'channel url')

3486

return self._search_regex(

3487

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3488

channel_url, 'channel id')

3489

3490

@staticmethod

3491

def _extract_basic_item_renderer(item):

3492

# Modified from _extract_grid_item_renderer

3493

known_basic_renderers = (

3494

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'

3495

)

3496

for key, renderer in item.items():

3497

if not isinstance(renderer, dict):

3498

continue

3499

elif key in known_basic_renderers:

3500

return renderer

3501

elif key.startswith('grid') and key.endswith('Renderer'):

3502

return renderer

3503

3504

def _grid_entries(self, grid_renderer):

3505

for item in grid_renderer['items']:

3506

if not isinstance(item, dict):

3507

continue

3508

renderer = self._extract_basic_item_renderer(item)

3509

if not isinstance(renderer, dict):

3510

continue

3511

title = try_get(

3512

renderer, (lambda x: x['title']['runs'][0]['text'],

3513

lambda x: x['title']['simpleText']), compat_str)

3514

# playlist

3515

playlist_id = renderer.get('playlistId')

3516

if playlist_id:

3517

yield self.url_result(

3518

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3519

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3524

if video_id:

3525

yield self._extract_video(renderer)

3526

continue

3527

# channel

3528

channel_id = renderer.get('channelId')

3529

if channel_id:

3530

title = try_get(

3531

renderer, lambda x: x['title']['simpleText'], compat_str)

3532

yield self.url_result(

3533

'https://www.youtube.com/channel/%s' % channel_id,

3534

ie=YoutubeTabIE.ie_key(), video_title=title)

3535

continue

3536

# generic endpoint URL support

3537

ep_url = urljoin('https://www.youtube.com/', try_get(

3538

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3539

compat_str))

3540

if ep_url:

3541

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3542

if ie.suitable(ep_url):

3543

yield self.url_result(

3544

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3545

break

3546

3547

def _shelf_entries_from_content(self, shelf_renderer):

3548

content = shelf_renderer.get('content')

3549

if not isinstance(content, dict):

3550

return

3551

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3552

if renderer:

3553

# TODO: add support for nested playlists so each shelf is processed

3554

# as separate playlist

3555

# TODO: this includes only first N items

3556

for entry in self._grid_entries(renderer):

3557

yield entry

3558

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3564

ep = try_get(

3565

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3566

compat_str)

3567

shelf_url = urljoin('https://www.youtube.com', ep)

3568

if shelf_url:

3569

# Skipping links to another channels, note that checking for

3570

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3571

# will not work

3572

if skip_channels and '/channels?' in shelf_url:

3573

return

3574

title = try_get(

3575

shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

3576

yield self.url_result(shelf_url, video_title=title)

3577

# Shelf may not contain shelf URL, fallback to extraction from content

3578

for entry in self._shelf_entries_from_content(shelf_renderer):

3579

yield entry

3580

3581

def _playlist_entries(self, video_list_renderer):

3582

for content in video_list_renderer['contents']:

3583

if not isinstance(content, dict):

3584

continue

3585

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3586

if not isinstance(renderer, dict):

3587

continue

3588

video_id = renderer.get('videoId')

3589

if not video_id:

3590

continue

3591

yield self._extract_video(renderer)

3592

3593

def _rich_entries(self, rich_grid_renderer):

3594

renderer = try_get(

3595

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3596

video_id = renderer.get('videoId')

3597

if not video_id:

3598

return

3599

yield self._extract_video(renderer)

3600

3601

def _video_entry(self, video_renderer):

3602

video_id = video_renderer.get('videoId')

3603

if video_id:

3604

return self._extract_video(video_renderer)

3605

3606

def _post_thread_entries(self, post_thread_renderer):

3607

post_renderer = try_get(

3608

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3609

if not post_renderer:

3610

return

3611

# video attachment

3612

video_renderer = try_get(

3613

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3614

video_id = video_renderer.get('videoId')

3615

if video_id:

3616

entry = self._extract_video(video_renderer)

3617

if entry:

3618

yield entry

3619

# playlist attachment

3620

playlist_id = try_get(

3621

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3622

if playlist_id:

3623

yield self.url_result(

3624

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3625

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3626

# inline video links

3627

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3628

for run in runs:

3629

if not isinstance(run, dict):

3630

continue

3631

ep_url = try_get(

3632

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3633

if not ep_url:

3634

continue

3635

if not YoutubeIE.suitable(ep_url):

3636

continue

3637

ep_video_id = YoutubeIE._match_id(ep_url)

3638

if video_id == ep_video_id:

3639

continue

3640

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3641

3642

def _post_thread_continuation_entries(self, post_thread_continuation):

3643

contents = post_thread_continuation.get('contents')

3644

if not isinstance(contents, list):

3645

return

3646

for content in contents:

3647

renderer = content.get('backstagePostThreadRenderer')

3648

if not isinstance(renderer, dict):

3649

continue

3650

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3655

for content in contents:

3656

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3657

if video_renderer:

3658

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):

3663

3664

def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds

3665

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3666

for content in contents:

3667

if not isinstance(content, dict):

3668

continue

3669

is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)

3670

if not is_renderer:

3671

renderer = content.get('richItemRenderer')

3672

if renderer:

3673

for entry in self._rich_entries(renderer):

3674

yield entry

3675

continuation_list[0] = self._extract_continuation(parent_renderer)

3676

continue

3677

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3678

for isr_content in isr_contents:

3679

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3684

'gridRenderer': self._grid_entries,

3685

'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),

3686

'backstagePostThreadRenderer': self._post_thread_entries,

3687

'videoRenderer': lambda x: [self._video_entry(x)],

3688

}

3689

for key, renderer in isr_content.items():

3690

if key not in known_renderers:

3691

continue

3692

for entry in known_renderers[key](renderer):

3693

if entry:

3694

yield entry

3695

continuation_list[0] = self._extract_continuation(renderer)

3696

break

3697

3698

if not continuation_list[0]:

3699

continuation_list[0] = self._extract_continuation(is_renderer)

3700

3701

if not continuation_list[0]:

3702

continuation_list[0] = self._extract_continuation(parent_renderer)

3703

3704

continuation_list = [None] # Python 2 doesnot support nonlocal

3705

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3710

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3711

for entry in extract_entries(parent_renderer):

3712

yield entry

3713

continuation = continuation_list[0]

3714

context = self._extract_context(ytcfg)

3715

visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)

3716

3717

for page_num in itertools.count(1):

if not continuation:

break

query = {

'continuation': continuation['continuation'],

3722

'clickTracking': {'clickTrackingParams': continuation['itct']}

3723

}

3724

headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)

3725

response = self._extract_response(

3726

item_id='%s page %s' % (item_id, page_num),

3727

query=query, headers=headers, ytcfg=ytcfg,

3728

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

visitor_data = try_get(

3733

response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data

3734

3735

known_continuation_renderers = {

3736

'playlistVideoListContinuation': self._playlist_entries,

3737

'gridContinuation': self._grid_entries,

3738

'itemSectionContinuation': self._post_thread_continuation_entries,

3739

'sectionListContinuation': extract_entries, # for feeds

3740

}

3741

continuation_contents = try_get(

3742

response, lambda x: x['continuationContents'], dict) or {}

3743

continuation_renderer = None

3744

for key, value in continuation_contents.items():

3745

if key not in known_continuation_renderers:

3746

continue

3747

continuation_renderer = value

3748

continuation_list = [None]

3749

for entry in known_continuation_renderers[key](continuation_renderer):

3750

yield entry

3751

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

3752

break

3753

if continuation_renderer:

continue

known_renderers = {

'gridPlaylistRenderer': (self._grid_entries, 'items'),

3758

'gridVideoRenderer': (self._grid_entries, 'items'),

3759

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

3760

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

3761

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

3762

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

3763

}

3764

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

3765

continuation_items = try_get(

3766

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

3767

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

3768

video_items_renderer = None

3769

for key, value in continuation_item.items():

3770

if key not in known_renderers:

3771

continue

3772

video_items_renderer = {known_renderers[key][1]: continuation_items}

3773

continuation_list = [None]

3774

for entry in known_renderers[key][0](video_items_renderer):

3775

yield entry

3776

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

3777

break

3778

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3784

for tab in tabs:

3785

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

3786

if renderer.get('selected') is True:

3787

return renderer

3788

else:

3789

raise ExtractorError('Unable to find selected tab')

3790

3791

@classmethod

3792

def _extract_uploader(cls, data):

3793

uploader = {}

3794

renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}

3795

owner = try_get(

3796

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3797

if owner:

3798

uploader['uploader'] = owner.get('text')

3799

uploader['uploader_id'] = try_get(

3800

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3801

uploader['uploader_url'] = urljoin(

3802

'https://www.youtube.com/',

3803

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3804

return {k: v for k, v in uploader.items() if v is not None}

3805

3806

def _extract_from_tabs(self, item_id, webpage, data, tabs):

3807

playlist_id = title = description = channel_url = channel_name = channel_id = None

3808

thumbnails_list = tags = []

3809

3810

selected_tab = self._extract_selected_tab(tabs)

3811

renderer = try_get(

3812

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3813

if renderer:

3814

channel_name = renderer.get('title')

3815

channel_url = renderer.get('channelUrl')

3816

channel_id = renderer.get('externalId')

3817

else:

3818

renderer = try_get(

3819

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3820

3821

if renderer:

3822

title = renderer.get('title')

3823

description = renderer.get('description', '')

3824

playlist_id = channel_id

3825

tags = renderer.get('keywords', '').split()

3826

thumbnails_list = (

3827

try_get(renderer, lambda x: x['avatar']['thumbnails'], list)

3828

or try_get(

3829

self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),

3830

lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],

list)

or [])

thumbnails = []

for t in thumbnails_list:

3836

if not isinstance(t, dict):

3837

continue

3838

thumbnail_url = url_or_none(t.get('url'))

3839

if not thumbnail_url:

3840

continue

3841

thumbnails.append({

3842

'url': thumbnail_url,

3843

'width': int_or_none(t.get('width')),

3844

'height': int_or_none(t.get('height')),

3845

})

3846

if playlist_id is None:

3847

playlist_id = item_id

3848

if title is None:

3849

title = (

3850

try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])

3851

or playlist_id)

3852

title += format_field(selected_tab, 'title', ' - %s')

3853

title += format_field(selected_tab, 'expandedText', ' - %s')

3854

metadata = {

3855

'playlist_id': playlist_id,

3856

'playlist_title': title,

3857

'playlist_description': description,

3858

'uploader': channel_name,

3859

'uploader_id': channel_id,

3860

'uploader_url': channel_url,

3861

'thumbnails': thumbnails,

3862

'tags': tags,

3863

}

3864

availability = self._extract_availability(data)

3865

if availability:

3866

metadata['availability'] = availability

3867

if not channel_id:

3868

metadata.update(self._extract_uploader(data))

3869

metadata.update({

3870

'channel': metadata['uploader'],

3871

'channel_id': metadata['uploader_id'],

3872

'channel_url': metadata['uploader_url']})

3873

return self.playlist_result(

3874

self._entries(

3875

selected_tab, playlist_id,

3876

self._extract_identity_token(webpage, item_id),

3877

self._extract_account_syncid(data),

3878

self._extract_ytcfg(item_id, webpage)),

3879

**metadata)

3880

3881

def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):

3882

first_id = last_id = None

3883

ytcfg = self._extract_ytcfg(playlist_id, webpage)

3884

headers = self._generate_api_headers(

3885

ytcfg, account_syncid=self._extract_account_syncid(data),

3886

identity_token=self._extract_identity_token(webpage, item_id=playlist_id),

3887

visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))

3888

for page_num in itertools.count(1):

3889

videos = list(self._playlist_entries(playlist))

3890

if not videos:

3891

return

3892

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

3893

if start >= len(videos):

3894

return

3895

for video in videos[start:]:

3896

if video['id'] == first_id:

3897

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

3898

return

3899

yield video

3900

first_id = first_id or videos[0]['id']

3901

last_id = videos[-1]['id']

3902

watch_endpoint = try_get(

3903

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

3904

query = {

3905

'playlistId': playlist_id,

3906

'videoId': watch_endpoint.get('videoId') or last_id,

3907

'index': watch_endpoint.get('index') or len(videos),

3908

'params': watch_endpoint.get('params') or 'OAE%3D'

3909

}

3910

response = self._extract_response(

3911

item_id='%s page %d' % (playlist_id, page_num),

query=query,

ep='next',

headers=headers,

check_get_keys='contents'

3916

)

3917

playlist = try_get(

3918

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

3919

3920

def _extract_from_playlist(self, item_id, url, data, playlist, webpage):

3921

title = playlist.get('title') or try_get(

3922

data, lambda x: x['titleText']['simpleText'], compat_str)

3923

playlist_id = playlist.get('playlistId') or item_id

3924

3925

# Delegating everything except mix playlists to regular tab-based playlist URL

3926

playlist_url = urljoin(url, try_get(

3927

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3928

compat_str))

3929

if playlist_url and playlist_url != url:

3930

return self.url_result(

3931

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

3932

video_title=title)

3933

3934

return self.playlist_result(

3935

self._extract_mix_playlist(playlist, playlist_id, data, webpage),

3936

playlist_id=playlist_id, playlist_title=title)

3937

3938

def _extract_availability(self, data):

3939

"""

3940

Gets the availability of a given playlist/tab.

3941

Note: Unless YouTube tells us explicitly, we do not assume it is public

3942

@param data: response

3943

"""

3944

is_private = is_unlisted = None

3945

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}

3946

badge_labels = self._extract_badges(renderer)

3947

3948

# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge

3949

privacy_dropdown_entries = try_get(

3950

renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []

3951

for renderer_dict in privacy_dropdown_entries:

3952

is_selected = try_get(

3953

renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False

3954

if not is_selected:

3955

continue

3956

label = self._join_text_entries(

3957

try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label']['runs'], list) or [])

3958

if label:

3959

badge_labels.add(label.lower())

3960

break

3961

3962

for badge_label in badge_labels:

3963

if badge_label == 'unlisted':

3964

is_unlisted = True

3965

elif badge_label == 'private':

3966

is_private = True

3967

elif badge_label == 'public':

3968

is_unlisted = is_private = False

3969

return self._availability(is_private, False, False, False, is_unlisted)

3970

3971

@staticmethod

3972

def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):

3973

sidebar_renderer = try_get(

3974

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []

3975

for item in sidebar_renderer:

3976

renderer = try_get(item, lambda x: x[info_renderer], expected_type)

if renderer:

return renderer

def _reload_with_unavailable_videos(self, item_id, data, webpage):

3981

"""

3982

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

3983

"""

3984

browse_id = params = None

3985

renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')

3986

if not renderer:

3987

return

3988

menu_renderer = try_get(

3989

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

3990

for menu_item in menu_renderer:

3991

if not isinstance(menu_item, dict):

3992

continue

3993

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

3994

text = try_get(

3995

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

3996

if not text or text.lower() != 'show unavailable videos':

3997

continue

3998

browse_endpoint = try_get(

3999

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

4000

browse_id = browse_endpoint.get('browseId')

4001

params = browse_endpoint.get('params')

4002

break

4003

4004

ytcfg = self._extract_ytcfg(item_id, webpage)

4005

headers = self._generate_api_headers(

4006

ytcfg, account_syncid=self._extract_account_syncid(ytcfg),

4007

identity_token=self._extract_identity_token(webpage, item_id=item_id),

4008

visitor_data=try_get(

4009

self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))

4010

query = {

4011

'params': params or 'wgYCCAA=',

4012

'browseId': browse_id or 'VL%s' % item_id

4013

}

4014

return self._extract_response(

4015

item_id=item_id, headers=headers, query=query,

4016

check_get_keys='contents', fatal=False,

4017

note='Downloading API JSON with unavailable videos')

4018

4019

def _extract_webpage(self, url, item_id):

4020

retries = self.get_param('extractor_retries', 3)

4021

count = -1

4022

last_error = 'Incomplete yt initial data recieved'

4023

while count < retries:

4024

count += 1

4025

# Sometimes youtube returns a webpage with incomplete ytInitialData

4026

# See: https://github.com/yt-dlp/yt-dlp/issues/116

4027

if count:

4028

self.report_warning('%s. Retrying ...' % last_error)

4029

webpage = self._download_webpage(

4030

url, item_id,

4031

'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))

4032

data = self._extract_yt_initial_data(item_id, webpage)

4033

if data.get('contents') or data.get('currentVideoEndpoint'):

4034

break

4035

# Extract alerts here only when there is error

4036

self._extract_and_report_alerts(data)

4037

if count >= retries:

4038

raise ExtractorError(last_error)

return webpage, data

@staticmethod

def _smuggle_data(entries, data):

4043

for entry in entries:

4044

if data:

4045

entry['url'] = smuggle_url(entry['url'], data)

4046

yield entry

4047

4048

def _real_extract(self, url):

4049

url, smuggled_data = unsmuggle_url(url, {})

4050

if self.is_music_url(url):

4051

smuggled_data['is_music_url'] = True

4052

info_dict = self.__real_extract(url, smuggled_data)

4053

if info_dict.get('entries'):

4054

info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)

4055

return info_dict

4056

4057

_url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)

4058

4059

def __real_extract(self, url, smuggled_data):

4060

item_id = self._match_id(url)

4061

url = compat_urlparse.urlunparse(

4062

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

4063

compat_opts = self.get_param('compat_opts', [])

4064

4065

def get_mobj(url):

4066

mobj = self._url_re.match(url).groupdict()

4067

mobj.update((k, '') for k, v in mobj.items() if v is None)

return mobj

mobj = get_mobj(url)

# Youtube returns incomplete data if tabname is not lower case

4072

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

4073

4074

if is_channel:

4075

if smuggled_data.get('is_music_url'):

4076

if item_id[:2] == 'VL':

4077

# Youtube music VL channels have an equivalent playlist

4078

item_id = item_id[2:]

4079

pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False

4080

elif item_id[:2] == 'MP':

4081

# Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage

4082

item_id = self._search_regex(

4083

r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',

4084

self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),

4085

'playlist id')

4086

pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False

4087

elif mobj['channel_type'] == 'browse':

4088

# Youtube music /browse/ should be changed to /channel/

4089

pre = 'https://www.youtube.com/channel/%s' % item_id

4090

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

4091

# Home URLs should redirect to /videos/

4092

self.report_warning(

4093

'A channel/user page was given. All the channel\'s videos will be downloaded. '

4094

'To download only the videos in the home page, add a "/featured" to the URL')

4095

tab = '/videos'

4096

4097

url = ''.join((pre, tab, post))

4098

mobj = get_mobj(url)

4099

4100

# Handle both video/playlist URLs

4101

qs = parse_qs(url)

4102

video_id = qs.get('v', [None])[0]

4103

playlist_id = qs.get('list', [None])[0]

4104

4105

if not video_id and mobj['not_channel'].startswith('watch'):

4106

if not playlist_id:

4107

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

4108

raise ExtractorError('Unable to recognize tab page')

4109

# Common mistake: https://www.youtube.com/watch?list=playlist_id

4110

self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)

4111

url = 'https://www.youtube.com/playlist?list=%s' % playlist_id

4112

mobj = get_mobj(url)

4113

4114

if video_id and playlist_id:

4115

if self.get_param('noplaylist'):

4116

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

4117

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

4118

self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))

4119

4120

webpage, data = self._extract_webpage(url, item_id)

4121

4122

tabs = try_get(

4123

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

4124

if tabs:

4125

selected_tab = self._extract_selected_tab(tabs)

4126

tab_name = selected_tab.get('title', '')

4127

if 'no-youtube-channel-redirect' not in compat_opts:

4128

if mobj['tab'] == '/live':

4129

# Live tab should have redirected to the video

4130

raise ExtractorError('The channel is not currently live', expected=True)

4131

if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:

4132

if not mobj['not_channel'] and item_id[:2] == 'UC':

4133

# Topic channels don't have /videos. Use the equivalent playlist instead

4134

self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))

4135

pl_id = 'UU%s' % item_id[2:]

4136

pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])

4137

try:

4138

pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)

4139

for alert_type, alert_message in self._extract_alerts(pl_data):

4140

if alert_type == 'error':

4141

raise ExtractorError('Youtube said: %s' % alert_message)

4142

item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data

4143

except ExtractorError:

4144

self.report_warning('The playlist gave error. Falling back to channel URL')

4145

else:

4146

self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))

4147

4148

self.write_debug('Final URL: %s' % url)

4149

4150

# YouTube sometimes provides a button to reload playlist with unavailable videos.

4151

if 'no-youtube-unavailable-videos' not in compat_opts:

4152

data = self._reload_with_unavailable_videos(item_id, data, webpage) or data

4153

self._extract_and_report_alerts(data)

4154

tabs = try_get(

4155

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

4156

if tabs:

4157

return self._extract_from_tabs(item_id, webpage, data, tabs)

4158

4159

playlist = try_get(

4160

data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4161

if playlist:

4162

return self._extract_from_playlist(item_id, url, data, playlist, webpage)

4163

4164

video_id = try_get(

4165

data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],

4166

compat_str) or video_id

4167

if video_id:

4168

if mobj['tab'] != '/live': # live tab is expected to redirect to video

4169

self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)

4170

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

4171

4172

raise ExtractorError('Unable to recognize tab page')

4173

4174

4175

class YoutubePlaylistIE(InfoExtractor):

4176

IE_DESC = 'YouTube.com playlists'

4177

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

4188

)''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

4189

IE_NAME = 'youtube:playlist'

4190

_TESTS = [{

4191

'note': 'issue #673',

4192

'url': 'PLBB231211A4F62143',

4193

'info_dict': {

4194

'title': '[OLD]Team Fortress 2 (Class-based LP)',

4195

'id': 'PLBB231211A4F62143',

4196

'uploader': 'Wickydoo',

4197

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

4198

},

4199

'playlist_mincount': 29,

4200

}, {

4201

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

4202

'info_dict': {

4203

'title': 'YDL_safe_search',

4204

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

4205

},

4206

'playlist_count': 2,

4207

'skip': 'This playlist is private',

4208

}, {

4209

'note': 'embedded',

4210

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

4215

'uploader': 'milan',

4216

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

4217

}

4218

}, {

4219

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

4220

'playlist_mincount': 982,

4221

'info_dict': {

4222

'title': '2018 Chinese New Singles (11/6 updated)',

4223

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

4224

'uploader': 'LBK',

4225

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

4226

}

4227

}, {

4228

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

4229

'only_matching': True,

4230

}, {

4231

# music album playlist

4232

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

4233

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

4238

if YoutubeTabIE.suitable(url):

4239

return False

4240

# Hack for lazy extractors until more generic solution is implemented

4241

# (see #28780)

4242

from .youtube import parse_qs

4243

qs = parse_qs(url)

4244

if qs.get('v', [None])[0]:

4245

return False

4246

return super(YoutubePlaylistIE, cls).suitable(url)

4247

4248

def _real_extract(self, url):

4249

playlist_id = self._match_id(url)

4250

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

4251

url = update_url_query(

4252

'https://www.youtube.com/playlist',

4253

parse_qs(url) or {'list': playlist_id})

4254

if is_music_url:

4255

url = smuggle_url(url, {'is_music_url': True})

4256

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4257

4258

4259

class YoutubeYtBeIE(InfoExtractor):

4260

IE_DESC = 'youtu.be'

4261

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

4262

_TESTS = [{

4263

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

4268

'uploader': 'Backus-Page House Museum',

4269

'uploader_id': 'backuspagemuseum',

4270

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

4271

'upload_date': '20161008',

4272

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

4273

'categories': ['Nonprofits & Activism'],

4274

'tags': list,

4275

'like_count': int,

4276

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

4281

},

4282

}, {

4283

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

4284

'only_matching': True,

4285

}]

4286

4287

def _real_extract(self, url):

4288

mobj = re.match(self._VALID_URL, url)

4289

video_id = mobj.group('id')

4290

playlist_id = mobj.group('playlist_id')

4291

return self.url_result(

4292

update_url_query('https://www.youtube.com/watch', {

4293

'v': video_id,

4294

'list': playlist_id,

4295

'feature': 'youtu.be',

4296

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4297

4298

4299

class YoutubeYtUserIE(InfoExtractor):

4300

IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'

4301

_VALID_URL = r'ytuser:(?P<id>.+)'

4302

_TESTS = [{

4303

'url': 'ytuser:phihag',

4304

'only_matching': True,

4305

}]

4306

4307

def _real_extract(self, url):

4308

user_id = self._match_id(url)

4309

return self.url_result(

4310

'https://www.youtube.com/user/%s' % user_id,

4311

ie=YoutubeTabIE.ie_key(), video_id=user_id)

4312

4313

4314

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

4315

IE_NAME = 'youtube:favorites'

4316

IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'

4317

_VALID_URL = r':ytfav(?:ou?rite)?s?'

4318

_LOGIN_REQUIRED = True

4319

_TESTS = [{

4320

'url': ':ytfav',

4321

'only_matching': True,

4322

}, {

4323

'url': ':ytfavorites',

4324

'only_matching': True,

4325

}]

4326

4327

def _real_extract(self, url):

4328

return self.url_result(

4329

'https://www.youtube.com/playlist?list=LL',

4330

ie=YoutubeTabIE.ie_key())

4331

4332

4333

class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):

4334

IE_DESC = 'YouTube.com searches, "ytsearch" keyword'

4335

# there doesn't appear to be a real limit, for example if you search for

4336

# 'python' you get more than 8.000.000 results

4337

_MAX_RESULTS = float('inf')

4338

IE_NAME = 'youtube:search'

4339

_SEARCH_KEY = 'ytsearch'

4340

_SEARCH_PARAMS = None

4341

_TESTS = []

4342

4343

def _entries(self, query, n):

4344

data = {'query': query}

4345

if self._SEARCH_PARAMS:

4346

data['params'] = self._SEARCH_PARAMS

4347

total = 0

4348

for page_num in itertools.count(1):

4349

search = self._extract_response(

4350

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4351

check_get_keys=('contents', 'onResponseReceivedCommands')

)

if not search:

break

slr_contents = try_get(

4356

search,

4357

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

4358

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

list)

if not slr_contents:

break

# Youtube sometimes adds promoted content to searches,

4364

# changing the index location of videos and token.

4365

# So we search through all entries till we find them.

4366

continuation_token = None

4367

for slr_content in slr_contents:

4368

if continuation_token is None:

4369

continuation_token = try_get(

4370

slr_content,

4371

lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],

4372

compat_str)

4373

4374

isr_contents = try_get(

4375

slr_content,

4376

lambda x: x['itemSectionRenderer']['contents'],

list)

if not isr_contents:

continue

for content in isr_contents:

4381

if not isinstance(content, dict):

4382

continue

4383

video = content.get('videoRenderer')

4384

if not isinstance(video, dict):

4385

continue

4386

video_id = video.get('videoId')

if not video_id:

continue

yield self._extract_video(video)

total += 1

if total == n:

return

if not continuation_token:

4396

break

4397

data['continuation'] = continuation_token

4398

4399

def _get_n_results(self, query, n):

4400

"""Get a specified number of results for a query"""

4401

return self.playlist_result(self._entries(query, n), query)

4402

4403

4404

class YoutubeSearchDateIE(YoutubeSearchIE):

4405

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

4406

_SEARCH_KEY = 'ytsearchdate'

4407

IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'

4408

_SEARCH_PARAMS = 'CAI%3D'

4409

4410

4411

class YoutubeSearchURLIE(YoutubeSearchIE):

4412

IE_DESC = 'YouTube.com search URLs'

4413

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

4414

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'

4415

# _MAX_RESULTS = 100

4416

_TESTS = [{

4417

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

4418

'playlist_mincount': 5,

4419

'info_dict': {

4420

'title': 'youtube-dl test video',

4421

}

4422

}, {

4423

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

4424

'only_matching': True,

}]

@classmethod

def _make_valid_url(cls):

4429

return cls._VALID_URL

4430

4431

def _real_extract(self, url):

4432

qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

4433

query = (qs.get('search_query') or qs.get('q'))[0]

4434

self._SEARCH_PARAMS = qs.get('sp', ('',))[0]

4435

return self._get_n_results(query, self._MAX_RESULTS)

4436

4437

4438

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

4439

"""

4440

Base class for feed extractors

4441

Subclasses must define the _FEED_NAME property.

4442

"""

4443

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

4449

4450

def _real_extract(self, url):

4451

return self.url_result(

4452

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

4453

ie=YoutubeTabIE.ie_key())

4454

4455

4456

class YoutubeWatchLaterIE(InfoExtractor):

4457

IE_NAME = 'youtube:watchlater'

4458

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

4459

_VALID_URL = r':ytwatchlater'

4460

_TESTS = [{

4461

'url': ':ytwatchlater',

4462

'only_matching': True,

4463

}]

4464

4465

def _real_extract(self, url):

4466

return self.url_result(

4467

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

4468

4469

4470

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

4471

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

4472

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

4473

_FEED_NAME = 'recommended'

4474

_LOGIN_REQUIRED = False

4475

_TESTS = [{

4476

'url': ':ytrec',

4477

'only_matching': True,

4478

}, {

4479

'url': ':ytrecommended',

4480

'only_matching': True,

4481

}, {

4482

'url': 'https://youtube.com',

4483

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

4488

IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'

4489

_VALID_URL = r':ytsub(?:scription)?s?'

4490

_FEED_NAME = 'subscriptions'

4491

_TESTS = [{

4492

'url': ':ytsubs',

4493

'only_matching': True,

4494

}, {

4495

'url': ':ytsubscriptions',

4496

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

4501

IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'

4502

_VALID_URL = r':ythis(?:tory)?'

4503

_FEED_NAME = 'history'

4504

_TESTS = [{

4505

'url': ':ythistory',

4506

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

4511

IE_NAME = 'youtube:truncated_url'

4512

IE_DESC = False # Do not list

4513

_VALID_URL = r'''(?x)

4514

(?:https?://)?

4515

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

4516

(?:watch\?(?:

4517

feature=[a-z_]+|

4518

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

4531

'only_matching': True,

4532

}, {

4533

'url': 'https://www.youtube.com/watch?',

4534

'only_matching': True,

4535

}, {

4536

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

4537

'only_matching': True,

4538

}, {

4539

'url': 'https://www.youtube.com/watch?feature=foo',

4540

'only_matching': True,

4541

}, {

4542

'url': 'https://www.youtube.com/watch?hl=en-GB',

4543

'only_matching': True,

4544

}, {

4545

'url': 'https://www.youtube.com/watch?t=2372',

4546

'only_matching': True,

4547

}]

4548

4549

def _real_extract(self, url):

4550

raise ExtractorError(

4551

'Did you forget to quote the URL? Remember that & is a meta '

4552

'character in most shells, so you want to put the URL in quotes, '

4553

'like youtube-dl '

4554

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

4555

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

4560

IE_NAME = 'youtube:truncated_id'

4561

IE_DESC = False # Do not list

4562

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

4563

4564

_TESTS = [{

4565

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

4566

'only_matching': True,

4567

}]

4568

4569

def _real_extract(self, url):

4570

video_id = self._match_id(url)

4571

raise ExtractorError(

4572

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

4573

expected=True)