jfr.im git - yt-dlp.git/blame_incremental - yt

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5	import base64
	6	import calendar
	7	import copy
	8	import hashlib
	9	import itertools
	10	import json
	11	import os.path
	12	import random
	13	import re
	14	import time
	15	import traceback
	16
	17	from .common import InfoExtractor, SearchInfoExtractor
	18	from ..compat import (
	19	compat_chr,
	20	compat_HTTPError,
	21	compat_parse_qs,
	22	compat_str,
	23	compat_urllib_parse_unquote_plus,
	24	compat_urllib_parse_urlencode,
	25	compat_urllib_parse_urlparse,
	26	compat_urlparse,
	27	)
	28	from ..jsinterp import JSInterpreter
	29	from ..utils import (
	30	bool_or_none,
	31	bytes_to_intlist,
	32	clean_html,
	33	dict_get,
	34	datetime_from_str,
	35	error_to_compat_str,
	36	ExtractorError,
	37	format_field,
	38	float_or_none,
	39	int_or_none,
	40	intlist_to_bytes,
	41	mimetype2ext,
	42	parse_codecs,
	43	parse_count,
	44	parse_duration,
	45	qualities,
	46	remove_start,
	47	smuggle_url,
	48	str_or_none,
	49	str_to_int,
	50	try_get,
	51	unescapeHTML,
	52	unified_strdate,
	53	unsmuggle_url,
	54	update_url_query,
	55	url_or_none,
	56	urlencode_postdata,
	57	urljoin
	58	)
	59
	60
	61	def parse_qs(url):
	62	return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
	63
	64
	65	class YoutubeBaseInfoExtractor(InfoExtractor):
	66	"""Provide base functions for Youtube extractors"""
	67	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	68	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	69
	70	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	71	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	72	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	73
	74	_RESERVED_NAMES = (
	75	r'channel\|c\|user\|browse\|playlist\|watch\|w\|v\|embed\|e\|watch_popup\|shorts\|'
	76	r'movies\|results\|shared\|hashtag\|trending\|feed\|feeds\|oembed\|get_video_info\|'
	77	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout')
	78
	79	_NETRC_MACHINE = 'youtube'
	80	# If True it will raise an error if no login info is provided
	81	_LOGIN_REQUIRED = False
	82
	83	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	84
	85	def _login(self):
	86	"""
	87	Attempt to log in to YouTube.
	88	True is returned if successful or skipped.
	89	False is returned if login failed.
	90
	91	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	92	"""
	93
	94	def warn(message):
	95	self.report_warning(message)
	96
	97	# username+password login is broken
	98	if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
	99	self.raise_login_required(
	100	'Login details are needed to download this content', method='cookies')
	101	username, password = self._get_login_info()
	102	if username:
	103	warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
	104	return
	105
	106	# Everything below this is broken!
	107	r'''
	108	# No authentication to be performed
	109	if username is None:
	110	if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
	111	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	112	# if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
	113	# self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
	114	return True
	115
	116	login_page = self._download_webpage(
	117	self._LOGIN_URL, None,
	118	note='Downloading login page',
	119	errnote='unable to fetch login page', fatal=False)
	120	if login_page is False:
	121	return
	122
	123	login_form = self._hidden_inputs(login_page)
	124
	125	def req(url, f_req, note, errnote):
	126	data = login_form.copy()
	127	data.update({
	128	'pstMsg': 1,
	129	'checkConnection': 'youtube',
	130	'checkedDomains': 'youtube',
	131	'hl': 'en',
	132	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	133	'f.req': json.dumps(f_req),
	134	'flowName': 'GlifWebSignIn',
	135	'flowEntry': 'ServiceLogin',
	136	# TODO: reverse actual botguard identifier generation algo
	137	'bgRequest': '["identifier",""]',
	138	})
	139	return self._download_json(
	140	url, None, note=note, errnote=errnote,
	141	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	142	fatal=False,
	143	data=urlencode_postdata(data), headers={
	144	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	145	'Google-Accounts-XSRF': 1,
	146	})
	147
	148	lookup_req = [
	149	username,
	150	None, [], None, 'US', None, None, 2, False, True,
	151	[
	152	None, None,
	153	[2, 1, None, 1,
	154	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	155	None, [], 4],
	156	1, [None, None, []], None, None, None, True
	157	],
	158	username,
	159	]
	160
	161	lookup_results = req(
	162	self._LOOKUP_URL, lookup_req,
	163	'Looking up account info', 'Unable to look up account info')
	164
	165	if lookup_results is False:
	166	return False
	167
	168	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	169	if not user_hash:
	170	warn('Unable to extract user hash')
	171	return False
	172
	173	challenge_req = [
	174	user_hash,
	175	None, 1, None, [1, None, None, None, [password, None, True]],
	176	[
	177	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	178	1, [None, None, []], None, None, None, True
	179	]]
	180
	181	challenge_results = req(
	182	self._CHALLENGE_URL, challenge_req,
	183	'Logging in', 'Unable to log in')
	184
	185	if challenge_results is False:
	186	return
	187
	188	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	189	if login_res:
	190	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	191	warn(
	192	'Unable to login: %s' % 'Invalid password'
	193	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	194	return False
	195
	196	res = try_get(challenge_results, lambda x: x[0][-1], list)
	197	if not res:
	198	warn('Unable to extract result entry')
	199	return False
	200
	201	login_challenge = try_get(res, lambda x: x[0][0], list)
	202	if login_challenge:
	203	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	204	if challenge_str == 'TWO_STEP_VERIFICATION':
	205	# SEND_SUCCESS - TFA code has been successfully sent to phone
	206	# QUOTA_EXCEEDED - reached the limit of TFA codes
	207	status = try_get(login_challenge, lambda x: x[5], compat_str)
	208	if status == 'QUOTA_EXCEEDED':
	209	warn('Exceeded the limit of TFA codes, try later')
	210	return False
	211
	212	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	213	if not tl:
	214	warn('Unable to extract TL')
	215	return False
	216
	217	tfa_code = self._get_tfa_info('2-step verification code')
	218
	219	if not tfa_code:
	220	warn(
	221	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	222	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	223	return False
	224
	225	tfa_code = remove_start(tfa_code, 'G-')
	226
	227	tfa_req = [
	228	user_hash, None, 2, None,
	229	[
	230	9, None, None, None, None, None, None, None,
	231	[None, tfa_code, True, 2]
	232	]]
	233
	234	tfa_results = req(
	235	self._TFA_URL.format(tl), tfa_req,
	236	'Submitting TFA code', 'Unable to submit TFA code')
	237
	238	if tfa_results is False:
	239	return False
	240
	241	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	242	if tfa_res:
	243	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	244	warn(
	245	'Unable to finish TFA: %s' % 'Invalid TFA code'
	246	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	247	return False
	248
	249	check_cookie_url = try_get(
	250	tfa_results, lambda x: x[0][-1][2], compat_str)
	251	else:
	252	CHALLENGES = {
	253	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	254	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	255	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	256	}
	257	challenge = CHALLENGES.get(
	258	challenge_str,
	259	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	260	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	261	return False
	262	else:
	263	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	264
	265	if not check_cookie_url:
	266	warn('Unable to extract CheckCookie URL')
	267	return False
	268
	269	check_cookie_results = self._download_webpage(
	270	check_cookie_url, None, 'Checking cookie', fatal=False)
	271
	272	if check_cookie_results is False:
	273	return False
	274
	275	if 'https://myaccount.google.com/' not in check_cookie_results:
	276	warn('Unable to log in')
	277	return False
	278
	279	return True
	280	'''
	281
	282	def _initialize_consent(self):
	283	cookies = self._get_cookies('https://www.youtube.com/')
	284	if cookies.get('__Secure-3PSID'):
	285	return
	286	consent_id = None
	287	consent = cookies.get('CONSENT')
	288	if consent:
	289	if 'YES' in consent.value:
	290	return
	291	consent_id = self._search_regex(
	292	r'PENDING\+(\d+)', consent.value, 'consent', default=None)
	293	if not consent_id:
	294	consent_id = random.randint(100, 999)
	295	self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
	296
	297	def _real_initialize(self):
	298	self._initialize_consent()
	299	if self._downloader is None:
	300	return
	301	if not self._login():
	302	return
	303
	304	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	305	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	306	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	307
	308	_YT_DEFAULT_YTCFGS = {
	309	'WEB': {
	310	'INNERTUBE_API_VERSION': 'v1',
	311	'INNERTUBE_CLIENT_NAME': 'WEB',
	312	'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
	313	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	314	'INNERTUBE_CONTEXT': {
	315	'client': {
	316	'clientName': 'WEB',
	317	'clientVersion': '2.20210622.10.00',
	318	'hl': 'en',
	319	}
	320	},
	321	'INNERTUBE_CONTEXT_CLIENT_NAME': 1
	322	},
	323	'WEB_REMIX': {
	324	'INNERTUBE_API_VERSION': 'v1',
	325	'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
	326	'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
	327	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	328	'INNERTUBE_CONTEXT': {
	329	'client': {
	330	'clientName': 'WEB_REMIX',
	331	'clientVersion': '1.20210621.00.00',
	332	'hl': 'en',
	333	}
	334	},
	335	'INNERTUBE_CONTEXT_CLIENT_NAME': 67
	336	},
	337	'WEB_EMBEDDED_PLAYER': {
	338	'INNERTUBE_API_VERSION': 'v1',
	339	'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
	340	'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
	341	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	342	'INNERTUBE_CONTEXT': {
	343	'client': {
	344	'clientName': 'WEB_EMBEDDED_PLAYER',
	345	'clientVersion': '1.20210620.0.1',
	346	'hl': 'en',
	347	}
	348	},
	349	'INNERTUBE_CONTEXT_CLIENT_NAME': 56
	350	},
	351	'ANDROID': {
	352	'INNERTUBE_API_VERSION': 'v1',
	353	'INNERTUBE_CLIENT_NAME': 'ANDROID',
	354	'INNERTUBE_CLIENT_VERSION': '16.20',
	355	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	356	'INNERTUBE_CONTEXT': {
	357	'client': {
	358	'clientName': 'ANDROID',
	359	'clientVersion': '16.20',
	360	'hl': 'en',
	361	}
	362	},
	363	'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
	364	},
	365	'ANDROID_EMBEDDED_PLAYER': {
	366	'INNERTUBE_API_VERSION': 'v1',
	367	'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
	368	'INNERTUBE_CLIENT_VERSION': '16.20',
	369	'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
	370	'INNERTUBE_CONTEXT': {
	371	'client': {
	372	'clientName': 'ANDROID_EMBEDDED_PLAYER',
	373	'clientVersion': '16.20',
	374	'hl': 'en',
	375	}
	376	},
	377	'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
	378	},
	379	'ANDROID_MUSIC': {
	380	'INNERTUBE_API_VERSION': 'v1',
	381	'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
	382	'INNERTUBE_CLIENT_VERSION': '4.32',
	383	'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
	384	'INNERTUBE_CONTEXT': {
	385	'client': {
	386	'clientName': 'ANDROID_MUSIC',
	387	'clientVersion': '4.32',
	388	'hl': 'en',
	389	}
	390	},
	391	'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
	392	}
	393	}
	394
	395	_YT_DEFAULT_INNERTUBE_HOSTS = {
	396	'DIRECT': 'youtubei.googleapis.com',
	397	'WEB': 'www.youtube.com',
	398	'WEB_REMIX': 'music.youtube.com',
	399	'ANDROID_MUSIC': 'music.youtube.com'
	400	}
	401
	402	def _get_default_ytcfg(self, client='WEB'):
	403	if client in self._YT_DEFAULT_YTCFGS:
	404	return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
	405	self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
	406	return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
	407
	408	def _get_innertube_host(self, client='WEB'):
	409	return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
	410
	411	def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
	412	# try_get but with fallback to default ytcfg client values when present
	413	_func = lambda y: try_get(y, getter, expected_type)
	414	return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
	415
	416	def _extract_client_name(self, ytcfg, default_client='WEB'):
	417	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
	418
	419	def _extract_client_version(self, ytcfg, default_client='WEB'):
	420	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
	421
	422	def _extract_api_key(self, ytcfg=None, default_client='WEB'):
	423	return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
	424
	425	def _extract_context(self, ytcfg=None, default_client='WEB'):
	426	_get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
	427	context = _get_context(ytcfg)
	428	if context:
	429	return context
	430
	431	context = _get_context(self._get_default_ytcfg(default_client))
	432	if not ytcfg:
	433	return context
	434
	435	# Recreate the client context (required)
	436	context['client'].update({
	437	'clientVersion': self._extract_client_version(ytcfg, default_client),
	438	'clientName': self._extract_client_name(ytcfg, default_client),
	439	})
	440	visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
	441	if visitor_data:
	442	context['client']['visitorData'] = visitor_data
	443	return context
	444
	445	def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
	446	# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
	447	# See: https://github.com/yt-dlp/yt-dlp/issues/393
	448	yt_cookies = self._get_cookies('https://www.youtube.com')
	449	sapisid_cookie = dict_get(
	450	yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
	451	if sapisid_cookie is None:
	452	return
	453	time_now = round(time.time())
	454	# SAPISID cookie is required if not already present
	455	if not yt_cookies.get('SAPISID'):
	456	self._set_cookie(
	457	'.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
	458	# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
	459	sapisidhash = hashlib.sha1(
	460	f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
	461	return f'SAPISIDHASH {time_now}_{sapisidhash}'
	462
	463	def _call_api(self, ep, query, video_id, fatal=True, headers=None,
	464	note='Downloading API JSON', errnote='Unable to download API page',
	465	context=None, api_key=None, api_hostname=None, default_client='WEB'):
	466
	467	data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
	468	data.update(query)
	469	real_headers = self._generate_api_headers(client=default_client)
	470	real_headers.update({'content-type': 'application/json'})
	471	if headers:
	472	real_headers.update(headers)
	473	return self._download_json(
	474	'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
	475	video_id=video_id, fatal=fatal, note=note, errnote=errnote,
	476	data=json.dumps(data).encode('utf8'), headers=real_headers,
	477	query={'key': api_key or self._extract_api_key()})
	478
	479	def _extract_yt_initial_data(self, video_id, webpage):
	480	return self._parse_json(
	481	self._search_regex(
	482	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	483	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
	484	video_id)
	485
	486	def _extract_identity_token(self, webpage, item_id):
	487	ytcfg = self._extract_ytcfg(item_id, webpage)
	488	if ytcfg:
	489	token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
	490	if token:
	491	return token
	492	return self._search_regex(
	493	r'\bID_TOKEN["\']\s:\s["\'](.+?)["\']', webpage,
	494	'identity token', default=None)
	495
	496	@staticmethod
	497	def _extract_account_syncid(data):
	498	"""
	499	Extract syncId required to download private playlists of secondary channels
	500	@param data Either response or ytcfg

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import base64

import calendar

import copy

import hashlib

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

18

from ..compat import (

compat_chr,

compat_HTTPError,

compat_parse_qs,

compat_str,

compat_urllib_parse_unquote_plus,

24

compat_urllib_parse_urlencode,

25

compat_urllib_parse_urlparse,

26

compat_urlparse,

27

)

28

from ..jsinterp import JSInterpreter

29

from ..utils import (

bool_or_none,

bytes_to_intlist,

clean_html,

dict_get,

datetime_from_str,

error_to_compat_str,

ExtractorError,

format_field,

float_or_none,

int_or_none,

intlist_to_bytes,

mimetype2ext,

parse_codecs,

parse_count,

parse_duration,

qualities,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

update_url_query,

url_or_none,

urlencode_postdata,

urljoin

)

def parse_qs(url):

return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

63

64

65

class YoutubeBaseInfoExtractor(InfoExtractor):

66

"""Provide base functions for Youtube extractors"""

67

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

68

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

69

70

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

71

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

72

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

_RESERVED_NAMES = (

_NETRC_MACHINE = 'youtube'

80

# If True it will raise an error if no login info is provided

81

_LOGIN_REQUIRED = False

82

83

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

def _login(self):

"""

Attempt to log in to YouTube.

88

True is returned if successful or skipped.

89

False is returned if login failed.

90

91

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

"""

def warn(message):

self.report_warning(message)

96

97

# username+password login is broken

98

if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:

99

self.raise_login_required(

100

'Login details are needed to download this content', method='cookies')

101

username, password = self._get_login_info()

102

if username:

103

warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])

104

return

105

106

# Everything below this is broken!

107

r'''

108

# No authentication to be performed

109

if username is None:

110

if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:

111

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

112

# if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.

113

# self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')

114

return True

115

116

login_page = self._download_webpage(

117

self._LOGIN_URL, None,

118

note='Downloading login page',

119

errnote='unable to fetch login page', fatal=False)

120

if login_page is False:

121

return

122

123

login_form = self._hidden_inputs(login_page)

124

125

def req(url, f_req, note, errnote):

126

data = login_form.copy()

127

data.update({

128

'pstMsg': 1,

129

'checkConnection': 'youtube',

130

'checkedDomains': 'youtube',

131

'hl': 'en',

132

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

133

'f.req': json.dumps(f_req),

134

'flowName': 'GlifWebSignIn',

135

'flowEntry': 'ServiceLogin',

136

# TODO: reverse actual botguard identifier generation algo

137

'bgRequest': '["identifier",""]',

138

})

139

return self._download_json(

140

url, None, note=note, errnote=errnote,

141

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

142

fatal=False,

143

data=urlencode_postdata(data), headers={

144

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

145

'Google-Accounts-XSRF': 1,

})

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

155

None, [], 4],

156

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

162

self._LOOKUP_URL, lookup_req,

163

'Looking up account info', 'Unable to look up account info')

164

165

if lookup_results is False:

166

return False

167

168

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

169

if not user_hash:

170

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

176

[

177

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

178

1, [None, None, []], None, None, None, True

179

]]

180

181

challenge_results = req(

182

self._CHALLENGE_URL, challenge_req,

183

'Logging in', 'Unable to log in')

184

185

if challenge_results is False:

186

return

187

188

login_res = try_get(challenge_results, lambda x: x[0][5], list)

189

if login_res:

190

login_msg = try_get(login_res, lambda x: x[5], compat_str)

191

warn(

192

'Unable to login: %s' % 'Invalid password'

193

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

194

return False

195

196

res = try_get(challenge_results, lambda x: x[0][-1], list)

197

if not res:

198

warn('Unable to extract result entry')

199

return False

200

201

login_challenge = try_get(res, lambda x: x[0][0], list)

202

if login_challenge:

203

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

204

if challenge_str == 'TWO_STEP_VERIFICATION':

205

# SEND_SUCCESS - TFA code has been successfully sent to phone

206

# QUOTA_EXCEEDED - reached the limit of TFA codes

207

status = try_get(login_challenge, lambda x: x[5], compat_str)

208

if status == 'QUOTA_EXCEEDED':

209

warn('Exceeded the limit of TFA codes, try later')

210

return False

211

212

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

213

if not tl:

214

warn('Unable to extract TL')

215

return False

216

217

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

222

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

223

return False

224

225

tfa_code = remove_start(tfa_code, 'G-')

226

227

tfa_req = [

228

user_hash, None, 2, None,

229

[

230

9, None, None, None, None, None, None, None,

231

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

236

'Submitting TFA code', 'Unable to submit TFA code')

237

238

if tfa_results is False:

239

return False

240

241

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

242

if tfa_res:

243

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

244

warn(

245

'Unable to finish TFA: %s' % 'Invalid TFA code'

246

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

247

return False

248

249

check_cookie_url = try_get(

250

tfa_results, lambda x: x[0][-1][2], compat_str)

251

else:

252

CHALLENGES = {

253

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

254

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

255

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

256

}

257

challenge = CHALLENGES.get(

258

challenge_str,

259

'%s returned error %s.' % (self.IE_NAME, challenge_str))

260

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

261

return False

262

else:

263

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

264

265

if not check_cookie_url:

266

warn('Unable to extract CheckCookie URL')

267

return False

268

269

check_cookie_results = self._download_webpage(

270

check_cookie_url, None, 'Checking cookie', fatal=False)

271

272

if check_cookie_results is False:

273

return False

274

275

if 'https://myaccount.google.com/' not in check_cookie_results:

276

warn('Unable to log in')

return False

return True

'''

def _initialize_consent(self):

283

cookies = self._get_cookies('https://www.youtube.com/')

284

if cookies.get('__Secure-3PSID'):

285

return

286

consent_id = None

287

consent = cookies.get('CONSENT')

288

if consent:

289

if 'YES' in consent.value:

290

return

291

consent_id = self._search_regex(

292

r'PENDING\+(\d+)', consent.value, 'consent', default=None)

293

if not consent_id:

294

consent_id = random.randint(100, 999)

295

self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)

296

297

def _real_initialize(self):

298

self._initialize_consent()

299

if self._downloader is None:

300

return

301

if not self._login():

302

return

303

304

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

305

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

306

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

307

308

_YT_DEFAULT_YTCFGS = {

309

'WEB': {

310

'INNERTUBE_API_VERSION': 'v1',

311

'INNERTUBE_CLIENT_NAME': 'WEB',

312

'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',

313

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

314

'INNERTUBE_CONTEXT': {

315

'client': {

316

'clientName': 'WEB',

317

'clientVersion': '2.20210622.10.00',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 1

322

},

323

'WEB_REMIX': {

324

'INNERTUBE_API_VERSION': 'v1',

325

'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',

326

'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',

327

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

328

'INNERTUBE_CONTEXT': {

329

'client': {

330

'clientName': 'WEB_REMIX',

331

'clientVersion': '1.20210621.00.00',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 67

336

},

337

'WEB_EMBEDDED_PLAYER': {

338

'INNERTUBE_API_VERSION': 'v1',

339

'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',

340

'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',

341

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

342

'INNERTUBE_CONTEXT': {

343

'client': {

344

'clientName': 'WEB_EMBEDDED_PLAYER',

345

'clientVersion': '1.20210620.0.1',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 56

350

},

351

'ANDROID': {

352

'INNERTUBE_API_VERSION': 'v1',

353

'INNERTUBE_CLIENT_NAME': 'ANDROID',

354

'INNERTUBE_CLIENT_VERSION': '16.20',

355

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

356

'INNERTUBE_CONTEXT': {

357

'client': {

358

'clientName': 'ANDROID',

359

'clientVersion': '16.20',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'

364

},

365

'ANDROID_EMBEDDED_PLAYER': {

366

'INNERTUBE_API_VERSION': 'v1',

367

'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',

368

'INNERTUBE_CLIENT_VERSION': '16.20',

369

'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

370

'INNERTUBE_CONTEXT': {

371

'client': {

372

'clientName': 'ANDROID_EMBEDDED_PLAYER',

373

'clientVersion': '16.20',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'

378

},

379

'ANDROID_MUSIC': {

380

'INNERTUBE_API_VERSION': 'v1',

381

'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',

382

'INNERTUBE_CLIENT_VERSION': '4.32',

383

'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',

384

'INNERTUBE_CONTEXT': {

385

'client': {

386

'clientName': 'ANDROID_MUSIC',

387

'clientVersion': '4.32',

'hl': 'en',

}

},

'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'

}

}

_YT_DEFAULT_INNERTUBE_HOSTS = {

396

'DIRECT': 'youtubei.googleapis.com',

397

'WEB': 'www.youtube.com',

398

'WEB_REMIX': 'music.youtube.com',

399

'ANDROID_MUSIC': 'music.youtube.com'

400

}

401

402

def _get_default_ytcfg(self, client='WEB'):

403

if client in self._YT_DEFAULT_YTCFGS:

404

return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])

405

self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')

406

return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])

407

408

def _get_innertube_host(self, client='WEB'):

409

return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))

410

411

def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):

412

# try_get but with fallback to default ytcfg client values when present

413

_func = lambda y: try_get(y, getter, expected_type)

414

return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))

415

416

def _extract_client_name(self, ytcfg, default_client='WEB'):

417

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)

418

419

def _extract_client_version(self, ytcfg, default_client='WEB'):

420

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)

421

422

def _extract_api_key(self, ytcfg=None, default_client='WEB'):

423

return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)

424

425

def _extract_context(self, ytcfg=None, default_client='WEB'):

426

_get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)

427

context = _get_context(ytcfg)

if context:

return context

context = _get_context(self._get_default_ytcfg(default_client))

if not ytcfg:

return context

# Recreate the client context (required)

436

context['client'].update({

437

'clientVersion': self._extract_client_version(ytcfg, default_client),

438

'clientName': self._extract_client_name(ytcfg, default_client),

439

})

440

visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)

441

if visitor_data:

442

context['client']['visitorData'] = visitor_data

443

return context

444

445

def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):

446

# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.

447

# See: https://github.com/yt-dlp/yt-dlp/issues/393

448

yt_cookies = self._get_cookies('https://www.youtube.com')

449

sapisid_cookie = dict_get(

450

yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))

451

if sapisid_cookie is None:

452

return

453

time_now = round(time.time())

454

# SAPISID cookie is required if not already present

455

if not yt_cookies.get('SAPISID'):

456

self._set_cookie(

457

'.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)

458

# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323

459

sapisidhash = hashlib.sha1(

460

f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()

461

return f'SAPISIDHASH {time_now}_{sapisidhash}'

462

463

def _call_api(self, ep, query, video_id, fatal=True, headers=None,

464

note='Downloading API JSON', errnote='Unable to download API page',

465

context=None, api_key=None, api_hostname=None, default_client='WEB'):

466

467

data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}

468

data.update(query)

469

real_headers = self._generate_api_headers(client=default_client)

470

real_headers.update({'content-type': 'application/json'})

471

if headers:

472

real_headers.update(headers)

473

return self._download_json(

474

'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),

475

video_id=video_id, fatal=fatal, note=note, errnote=errnote,

476

data=json.dumps(data).encode('utf8'), headers=real_headers,

477

query={'key': api_key or self._extract_api_key()})

478

479

def _extract_yt_initial_data(self, video_id, webpage):

480

return self._parse_json(

481

self._search_regex(

482

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

483

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),

484

video_id)

485

486

def _extract_identity_token(self, webpage, item_id):

487

ytcfg = self._extract_ytcfg(item_id, webpage)

488

if ytcfg:

489

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

490

if token:

491

return token

492

return self._search_regex(

493

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

494

'identity token', default=None)

495

496

@staticmethod

497

def _extract_account_syncid(data):

498

"""

499

Extract syncId required to download private playlists of secondary channels

500

@param data Either response or ytcfg

501

"""

502

sync_ids = (try_get(

503

data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],

504

lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")

505

if len(sync_ids) >= 2 and sync_ids[1]:

506

# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel

507

# and just "user_syncid||" for primary channel. We only want the channel_syncid

508

return sync_ids[0]

509

# ytcfg includes channel_syncid if on secondary channel

510

return data.get('DELEGATED_SESSION_ID')

511

512

def _extract_ytcfg(self, video_id, webpage):

513

if not webpage:

514

return {}

515

return self._parse_json(

516

self._search_regex(

517

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

518

default='{}'), video_id, fatal=False) or {}

519

520

def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,

521

visitor_data=None, api_hostname=None, client='WEB'):

522

origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))

523

headers = {

524

'X-YouTube-Client-Name': compat_str(

525

self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),

526

'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),

527

'Origin': origin

528

}

529

if not visitor_data and ytcfg:

530

visitor_data = try_get(

531

self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)

532

if identity_token:

533

headers['X-Youtube-Identity-Token'] = identity_token

534

if account_syncid:

535

headers['X-Goog-PageId'] = account_syncid

536

headers['X-Goog-AuthUser'] = 0

537

if visitor_data:

538

headers['X-Goog-Visitor-Id'] = visitor_data

539

auth = self._generate_sapisidhash_header(origin)

540

if auth is not None:

541

headers['Authorization'] = auth

542

headers['X-Origin'] = origin

return headers

@staticmethod

def _build_api_continuation_query(continuation, ctp=None):

547

query = {

548

'continuation': continuation

549

}

550

# TODO: Inconsistency with clickTrackingParams.

551

# Currently we have a fixed ctp contained within context (from ytcfg)

552

# and a ctp in root query for continuation.

553

if ctp:

554

query['clickTracking'] = {'clickTrackingParams': ctp}

return query

@classmethod

def _continuation_query_ajax_to_api(cls, continuation_query):

559

continuation = dict_get(continuation_query, ('continuation', 'ctoken'))

560

return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))

561

562

@staticmethod

563

def _build_continuation_query(continuation, ctp=None):

564

query = {

565

'ctoken': continuation,

566

'continuation': continuation,

}

if ctp:

query['itct'] = ctp

return query

@classmethod

def _extract_next_continuation_data(cls, renderer):

574

next_continuation = try_get(

575

renderer, (lambda x: x['continuations'][0]['nextContinuationData'],

576

lambda x: x['continuation']['reloadContinuationData']), dict)

577

if not next_continuation:

578

return

579

continuation = next_continuation.get('continuation')

580

if not continuation:

581

return

582

ctp = next_continuation.get('clickTrackingParams')

583

return cls._build_continuation_query(continuation, ctp)

584

585

@classmethod

586

def _extract_continuation_ep_data(cls, continuation_ep: dict):

587

if isinstance(continuation_ep, dict):

588

continuation = try_get(

589

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

590

if not continuation:

591

return

592

ctp = continuation_ep.get('clickTrackingParams')

593

return cls._build_continuation_query(continuation, ctp)

594

595

@classmethod

596

def _extract_continuation(cls, renderer):

597

next_continuation = cls._extract_next_continuation_data(renderer)

598

if next_continuation:

599

return next_continuation

600

contents = []

601

for key in ('contents', 'items'):

602

contents.extend(try_get(renderer, lambda x: x[key], list) or [])

603

for content in contents:

604

if not isinstance(content, dict):

605

continue

606

continuation_ep = try_get(

607

content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],

608

lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),

609

dict)

610

continuation = cls._extract_continuation_ep_data(continuation_ep)

if continuation:

return continuation

@staticmethod

def _extract_alerts(data):

616

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

617

if not isinstance(alert_dict, dict):

618

continue

619

for alert in alert_dict.values():

620

alert_type = alert.get('type')

621

if not alert_type:

622

continue

623

message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''

624

if message:

625

yield alert_type, message

626

for run in try_get(alert, lambda x: x['text']['runs'], list) or []:

627

message += try_get(run, lambda x: x['text'], compat_str)

628

if message:

629

yield alert_type, message

630

631

def _report_alerts(self, alerts, expected=True):

632

errors = []

633

warnings = []

634

for alert_type, alert_message in alerts:

635

if alert_type.lower() == 'error':

636

errors.append([alert_type, alert_message])

637

else:

638

warnings.append([alert_type, alert_message])

639

640

for alert_type, alert_message in (warnings + errors[:-1]):

641

self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))

642

if errors:

643

raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)

644

645

def _extract_and_report_alerts(self, data, *args, **kwargs):

646

return self._report_alerts(self._extract_alerts(data), *args, **kwargs)

647

648

def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,

649

ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,

650

default_client='WEB'):

response = None

last_error = None

count = -1

retries = self.get_param('extractor_retries', 3)

655

if check_get_keys is None:

656

check_get_keys = []

657

while count < retries:

658

count += 1

659

if last_error:

660

self.report_warning('%s. Retrying ...' % last_error)

661

try:

662

response = self._call_api(

663

ep=ep, fatal=True, headers=headers,

664

video_id=item_id, query=query,

665

context=self._extract_context(ytcfg, default_client),

666

api_key=self._extract_api_key(ytcfg, default_client),

667

api_hostname=api_hostname, default_client=default_client,

668

note='%s%s' % (note, ' (retry #%d)' % count if count else ''))

669

except ExtractorError as e:

670

if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):

671

# Downloading page may result in intermittent 5xx HTTP error

672

# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289

673

last_error = 'HTTP Error %s' % e.cause.code

if count < retries:

continue

if fatal:

raise

else:

self.report_warning(error_to_compat_str(e))

return

else:

# Youtube may send alerts if there was an issue with the continuation page

684

try:

685

self._extract_and_report_alerts(response, expected=False)

686

except ExtractorError as e:

687

if fatal:

688

raise

689

self.report_warning(error_to_compat_str(e))

690

return

691

if not check_get_keys or dict_get(response, check_get_keys):

692

break

693

# Youtube sometimes sends incomplete data

694

# See: https://github.com/ytdl-org/youtube-dl/issues/28194

695

last_error = 'Incomplete data received'

696

if count >= retries:

697

if fatal:

698

raise ExtractorError(last_error)

699

else:

700

self.report_warning(last_error)

return

return response

@staticmethod

def is_music_url(url):

706

return re.match(r'https?://music\.youtube\.com/', url) is not None

707

708

def _extract_video(self, renderer):

709

video_id = renderer.get('videoId')

710

title = try_get(

711

renderer,

712

(lambda x: x['title']['runs'][0]['text'],

713

lambda x: x['title']['simpleText']), compat_str)

714

description = try_get(

715

renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],

716

compat_str)

717

duration = parse_duration(try_get(

718

renderer, lambda x: x['lengthText']['simpleText'], compat_str))

719

view_count_text = try_get(

720

renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

721

view_count = str_to_int(self._search_regex(

722

r'^([\d,]+)', re.sub(r'\s', '', view_count_text),

723

'view count', default=None))

724

uploader = try_get(

725

renderer,

726

(lambda x: x['ownerText']['runs'][0]['text'],

727

lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)

728

return {

729

'_type': 'url',

730

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

735

'duration': duration,

736

'view_count': view_count,

737

'uploader': uploader,

}

class YoutubeIE(YoutubeBaseInfoExtractor):

742

IE_DESC = 'YouTube.com'

743

_INVIDIOUS_SITES = (

744

# invidious-redirect websites

745

r'(?:www\.)?redirect\.invidious\.io',

746

r'(?:(?:www|dev)\.)?invidio\.us',

747

# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md

748

r'(?:www\.)?invidious\.pussthecat\.org',

749

r'(?:www\.)?invidious\.zee\.li',

750

r'(?:www\.)?invidious\.ethibox\.fr',

751

r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',

752

# youtube-dl invidious instances list

753

r'(?:(?:www|no)\.)?invidiou\.sh',

754

r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',

755

r'(?:www\.)?invidious\.kabi\.tk',

756

r'(?:www\.)?invidious\.mastodon\.host',

757

r'(?:www\.)?invidious\.zapashcanon\.fr',

758

r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',

759

r'(?:www\.)?invidious\.tinfoil-hat\.net',

760

r'(?:www\.)?invidious\.himiko\.cloud',

761

r'(?:www\.)?invidious\.reallyancient\.tech',

762

r'(?:www\.)?invidious\.tube',

763

r'(?:www\.)?invidiou\.site',

764

r'(?:www\.)?invidious\.site',

765

r'(?:www\.)?invidious\.xyz',

766

r'(?:www\.)?invidious\.nixnet\.xyz',

767

r'(?:www\.)?invidious\.048596\.xyz',

768

r'(?:www\.)?invidious\.drycat\.fr',

769

r'(?:www\.)?inv\.skyn3t\.in',

770

r'(?:www\.)?tube\.poal\.co',

771

r'(?:www\.)?tube\.connect\.cafe',

772

r'(?:www\.)?vid\.wxzm\.sx',

773

r'(?:www\.)?vid\.mint\.lgbt',

774

r'(?:www\.)?vid\.puffyan\.us',

775

r'(?:www\.)?yewtu\.be',

776

r'(?:www\.)?yt\.elukerio\.org',

777

r'(?:www\.)?yt\.lelux\.fi',

778

r'(?:www\.)?invidious\.ggc-project\.de',

779

r'(?:www\.)?yt\.maisputain\.ovh',

780

r'(?:www\.)?ytprivate\.com',

781

r'(?:www\.)?invidious\.13ad\.de',

782

r'(?:www\.)?invidious\.toot\.koeln',

783

r'(?:www\.)?invidious\.fdn\.fr',

784

r'(?:www\.)?watch\.nettohikari\.com',

785

r'(?:www\.)?invidious\.namazso\.eu',

786

r'(?:www\.)?invidious\.silkky\.cloud',

787

r'(?:www\.)?invidious\.exonip\.de',

788

r'(?:www\.)?invidious\.riverside\.rocks',

789

r'(?:www\.)?invidious\.blamefran\.net',

790

r'(?:www\.)?invidious\.moomoo\.de',

791

r'(?:www\.)?ytb\.trom\.tf',

792

r'(?:www\.)?yt\.cyberhost\.uk',

793

r'(?:www\.)?kgg2m7yk5aybusll\.onion',

794

r'(?:www\.)?qklhadlycap4cnod\.onion',

795

r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',

796

r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',

797

r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',

798

r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',

799

r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',

800

r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',

801

r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',

802

r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',

803

r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',

804

r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',

805

)

806

_VALID_URL = r"""(?x)^

807

(

808

(?:https?://|//) # http(s):// or protocol-independent URL

809

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|

810

(?:www\.)?deturl\.com/www\.youtube\.com|

811

(?:www\.)?pwnyoutube\.com|

812

(?:www\.)?hooktube\.com|

813

(?:www\.)?yourepeat\.com|

814

tube\.majestyc\.net|

815

%(invidious)s|

816

youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains

817

(?:.*?\#/)? # handle anchor (#/) redirect urls

818

(?: # the various things that can precede the ID:

819

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

820

|(?: # or the v= param in all its forms

821

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

822

(?:\?|\#!?) # the params delimiter ? or # or #!

823

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

829

vid\.plus| # or vid.plus/xxxx

830

zwearz\.com/watch| # or zwearz.com/watch/xxxx

831

%(invidious)s

832

)/

833

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

834

)

835

)? # all until now is optional -> you can pass the naked ID

836

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

837

(?(1).+)? # if we found the ID, everything can follow

838

(?:\#|$)""" % {

839

'invidious': '|'.join(_INVIDIOUS_SITES),

840

}

841

_PLAYER_INFO_RE = (

842

r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',

843

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',

844

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',

845

)

846

_formats = {

847

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

848

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

849

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

850

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

851

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

852

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

853

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

854

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

855

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

856

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

857

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

858

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

859

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

860

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

861

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

862

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

863

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

864

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

869

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

870

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

871

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

872

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

873

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

874

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

875

876

# Apple HTTP Live Streaming

877

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

878

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

879

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

880

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

881

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

882

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

883

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

884

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

885

886

# DASH mp4 video

887

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

888

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

889

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

890

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

891

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

892

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

893

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

894

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

895

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

896

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

897

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

898

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

899

900

# Dash mp4 audio

901

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

902

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

903

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

904

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

905

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

906

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

907

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

908

909

# Dash webm

910

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

911

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

912

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

913

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

914

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

915

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

916

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

917

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

918

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

919

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

920

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

921

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

922

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

923

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

924

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

925

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

926

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

927

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

928

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

929

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

930

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

931

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

932

933

# Dash webm audio

934

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

935

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

936

937

# Dash webm audio with opus inside

938

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

939

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

940

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

941

942

# RTMP (unnamed)

943

'_rtmp': {'protocol': 'rtmp'},

944

945

# av01 video only formats sometimes served with "unknown" codecs

946

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

947

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

948

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

949

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

950

}

951

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

952

953

_AGE_GATE_REASONS = (

954

'Sign in to confirm your age',

955

'This video may be inappropriate for some users.',

956

'Sorry, this content is age-restricted.')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

968

'uploader': 'Philipp Hagemeister',

969

'uploader_id': 'phihag',

970

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

971

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

972

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

973

'upload_date': '20121002',

974

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

975

'categories': ['Science & Technology'],

976

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

987

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

992

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

993

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

994

'uploader': 'SET India',

995

'uploader_id': 'setindia',

996

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

997

'age_limit': 18,

998

},

999

'skip': 'Private video',

1000

},

1001

{

1002

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

1003

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

1008

'uploader': 'Philipp Hagemeister',

1009

'uploader_id': 'phihag',

1010

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

1011

'upload_date': '20121002',

1012

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

1013

'categories': ['Science & Technology'],

1014

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

1019

},

1020

'params': {

1021

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

1026

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

1031

'uploader_id': '8KVIDEO',

1032

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

1033

'description': '',

1034

'uploader': '8KVIDEO',

1035

'title': 'UHDTV TEST 8K VIDEO.mp4'

1036

},

1037

'params': {

1038

'youtube_include_dash_manifest': True,

1039

'format': '141',

1040

},

1041

'skip': 'format 141 not served anymore',

1042

},

1043

# DASH manifest with encrypted signature

1044

{

1045

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

1050

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

1051

'duration': 244,

1052

'uploader': 'AfrojackVEVO',

1053

'uploader_id': 'AfrojackVEVO',

1054

'upload_date': '20131011',

'abr': 129.495,

},

'params': {

'youtube_include_dash_manifest': True,

1059

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

1070

'uploader': 'Amazing Atheist',

1071

'uploader_id': 'TheAmazingAtheist',

1072

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

1073

'title': 'Burning Everyone\'s Koran',

1074

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

1075

}

1076

},

1077

# Normal age-gate video (embed allowed)

1078

{

1079

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

1084

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

1085

'duration': 142,

1086

'uploader': 'The Witcher',

1087

'uploader_id': 'WitcherGame',

1088

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

1089

'upload_date': '20140605',

'age_limit': 18,

},

},

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

1094

# YouTube Red ad is not captured for creator

1095

{

1096

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

1102

'uploader_id': 'deadmau5',

1103

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

1104

'creator': 'deadmau5',

1105

'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',

1106

'uploader': 'deadmau5',

1107

'title': 'Deadmau5 - Some Chords (HD)',

1108

'alt_title': 'Some Chords',

1109

},

1110

'expected_warnings': [

1111

'DASH manifest missing',

1112

]

1113

},

1114

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

1115

{

1116

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

1122

'uploader_id': 'olympic',

1123

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

1124

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

1125

'uploader': 'Olympic',

1126

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

1127

},

1128

'params': {

1129

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

1139

'duration': 85,

1140

'upload_date': '20110310',

1141

'uploader_id': 'AllenMeow',

1142

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

1143

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

1144

'uploader': '孫ᄋᄅ',

1145

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

1146

},

1147

},

1148

# url_encoded_fmt_stream_map is empty string

1149

{

1150

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

1155

'description': '',

1156

'upload_date': '20150404',

1157

'uploader_id': 'spbelect',

1158

'uploader': 'Наблюдатели Петербурга',

1159

},

1160

'params': {

1161

'skip_download': 'requires avconv',

1162

},

1163

'skip': 'This live event has ended.',

1164

},

1165

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

1166

{

1167

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

1172

'description': 'md5:116377fd2963b81ec4ce64b542173306',

1173

'duration': 220,

1174

'upload_date': '20150625',

1175

'uploader_id': 'dorappi2000',

1176

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

1177

'uploader': 'dorappi2000',

1178

'formats': 'mincount:31',

1179

},

1180

'skip': 'not actual anymore',

1181

},

1182

# DASH manifest with segment_list

1183

{

1184

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

1185

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

1190

'uploader': 'Airtek',

1191

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

1192

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

1193

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

1194

},

1195

'params': {

1196

'youtube_include_dash_manifest': True,

1197

'format': '135', # bestvideo

1198

},

1199

'skip': 'This live event has ended.',

1200

},

1201

{

1202

# Multifeed videos (multiple cameras), URL is for Main Camera

1203

'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',

1204

'info_dict': {

1205

'id': 'jvGDaLqkpTg',

1206

'title': 'Tom Clancy Free Weekend Rainbow Whatever',

1207

'description': 'md5:e03b909557865076822aa169218d6a5d',

},

'playlist': [{

'info_dict': {

'id': 'jvGDaLqkpTg',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',

1214

'description': 'md5:e03b909557865076822aa169218d6a5d',

1215

'duration': 10643,

1216

'upload_date': '20161111',

1217

'uploader': 'Team PGP',

1218

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1219

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '3AKt1R1aDnw',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',

1226

'description': 'md5:e03b909557865076822aa169218d6a5d',

1227

'duration': 10991,

1228

'upload_date': '20161111',

1229

'uploader': 'Team PGP',

1230

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1231

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': 'RtAMM00gpVc',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',

1238

'description': 'md5:e03b909557865076822aa169218d6a5d',

1239

'duration': 10995,

1240

'upload_date': '20161111',

1241

'uploader': 'Team PGP',

1242

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1243

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}, {

'info_dict': {

'id': '6N2fdlP3C5U',

'ext': 'mp4',

'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',

1250

'description': 'md5:e03b909557865076822aa169218d6a5d',

1251

'duration': 10990,

1252

'upload_date': '20161111',

1253

'uploader': 'Team PGP',

1254

'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',

1255

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',

},

}],

'params': {

'skip_download': True,

},

},

{

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

1264

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

1265

'info_dict': {

1266

'id': 'gVfLd0zydlo',

1267

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

1268

},

1269

'playlist_count': 2,

1270

'skip': 'Not multifeed anymore',

1271

},

1272

{

1273

'url': 'https://vid.plus/FlRa-iH7PGw',

1274

'only_matching': True,

1275

},

1276

{

1277

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

1278

'only_matching': True,

1279

},

1280

{

1281

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1282

# Also tests cut-off URL expansion in video description (see

1283

# https://github.com/ytdl-org/youtube-dl/issues/1892,

1284

# https://github.com/ytdl-org/youtube-dl/issues/8164)

1285

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

1290

'alt_title': 'Dark Walk - Position Music',

1291

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

1292

'duration': 133,

1293

'upload_date': '20151119',

1294

'uploader_id': 'IronSoulElf',

1295

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

1296

'uploader': 'IronSoulElf',

1297

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

1298

'track': 'Dark Walk - Position Music',

1299

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

1300

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

1301

},

1302

'params': {

1303

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

1308

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

1309

'only_matching': True,

1310

},

1311

{

1312

# Video with yt:stretch=17:0

1313

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

1318

'description': 'md5:ee18a25c350637c8faff806845bddee9',

1319

'upload_date': '20151107',

1320

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

1321

'uploader': 'CH GAMER DROID',

1322

},

1323

'params': {

1324

'skip_download': True,

1325

},

1326

'skip': 'This video does not exist.',

1327

},

1328

{

1329

# Video with incomplete 'yt:stretch=16:'

1330

'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',

1331

'only_matching': True,

1332

},

1333

{

1334

# Video licensed under Creative Commons

1335

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1340

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1341

'duration': 721,

1342

'upload_date': '20150127',

1343

'uploader_id': 'BerkmanCenter',

1344

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1345

'uploader': 'The Berkman Klein Center for Internet & Society',

1346

'license': 'Creative Commons Attribution license (reuse allowed)',

1347

},

1348

'params': {

1349

'skip_download': True,

},

},

{

# Channel-like uploader_url

1354

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1359

'description': 'md5:13a2503d7b5904ef4b223aa101628f39',

1360

'duration': 4060,

1361

'upload_date': '20151119',

1362

'uploader': 'Bernie Sanders',

1363

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1364

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1365

'license': 'Creative Commons Attribution license (reuse allowed)',

1366

},

1367

'params': {

1368

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1373

'only_matching': True,

1374

},

1375

{

1376

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1377

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1378

'only_matching': True,

1379

},

1380

{

1381

# Rental video preview

1382

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1387

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1388

'upload_date': '20150811',

1389

'uploader': 'FlixMatrix',

1390

'uploader_id': 'FlixMatrixKaravan',

1391

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1392

'license': 'Standard YouTube License',

1393

},

1394

'params': {

1395

'skip_download': True,

1396

},

1397

'skip': 'This video is not available.',

1398

},

1399

{

1400

# YouTube Red video with episode data

1401

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1406

'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',

1407

'duration': 2085,

1408

'upload_date': '20170118',

1409

'uploader': 'Vsauce',

1410

'uploader_id': 'Vsauce',

1411

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1412

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

1418

},

1419

'expected_warnings': [

1420

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1425

# as inappropriate or offensive to some audiences.

1426

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1431

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1432

'duration': 965,

1433

'upload_date': '20140124',

1434

'uploader': 'New Century Foundation',

1435

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1436

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1437

},

1438

'params': {

1439

'skip_download': True,

1440

},

1441

'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',

},

{

# itag 212

'url': '1t24XAntNCY',

1446

'only_matching': True,

1447

},

1448

{

1449

# geo restricted to JP

1450

'url': 'sJL6WA-aGkQ',

1451

'only_matching': True,

1452

},

1453

{

1454

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1455

'only_matching': True,

1456

},

1457

{

1458

'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',

1459

'only_matching': True,

1460

},

1461

{

1462

# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m

1463

'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',

1464

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1469

'only_matching': True,

1470

},

1471

{

1472

# Video with unsupported adaptive stream type formats

1473

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1478

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1479

'duration': 433,

1480

'upload_date': '20130923',

1481

'uploader': 'Amelia Putri Harwita',

1482

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1483

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1484

'formats': 'maxcount:10',

1485

},

1486

'params': {

1487

'skip_download': True,

1488

'youtube_include_dash_manifest': False,

1489

},

1490

'skip': 'not actual anymore',

1491

},

1492

{

1493

# Youtube Music Auto-generated description

1494

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1499

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1500

'upload_date': '20190312',

1501

'uploader': 'Stephen - Topic',

1502

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1503

'artist': 'Stephen',

1504

'track': 'Voyeur Girl',

1505

'album': 'it\'s too much love to know my dear',

1506

'release_date': '20190313',

1507

'release_year': 2019,

1508

},

1509

'params': {

1510

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1515

'only_matching': True,

1516

},

1517

{

1518

# invalid -> valid video id redirection

1519

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1524

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1525

'upload_date': '20090125',

1526

'uploader': 'Prochorowka',

1527

'uploader_id': 'Prochorowka',

1528

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1529

'artist': 'Panjabi MC',

1530

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1531

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1532

},

1533

'params': {

1534

'skip_download': True,

1535

},

1536

'skip': 'Video unavailable',

1537

},

1538

{

1539

# empty description results in an empty string

1540

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1547

'uploader_id': 'ElevageOrVert',

1548

'uploader': 'ElevageOrVert',

1549

},

1550

'params': {

1551

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1556

# see [2] for an example with '};' inside ytInitialPlayerResponse

1557

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1558

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1559

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1564

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1565

'upload_date': '20130831',

1566

'uploader_id': 'kudvenkat',

1567

'uploader': 'kudvenkat',

1568

},

1569

'params': {

1570

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1575

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1576

'only_matching': True,

1577

},

1578

{

1579

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1580

'only_matching': True,

1581

},

1582

{

1583

# https://github.com/ytdl-org/youtube-dl/pull/28094

1584

'url': 'OtqTfy26tG0',

'info_dict': {

'id': 'OtqTfy26tG0',

'ext': 'mp4',

'title': 'Burn Out',

'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',

1590

'upload_date': '20141120',

1591

'uploader': 'The Cinematic Orchestra - Topic',

1592

'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',

1593

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',

1594

'artist': 'The Cinematic Orchestra',

1595

'track': 'Burn Out',

1596

'album': 'Every Day',

1597

'release_data': None,

1598

'release_year': None,

1599

},

1600

'params': {

1601

'skip_download': True,

},

},

{

# controversial video, only works with bpctr when authenticated with cookies

1606

'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',

1607

'only_matching': True,

1608

},

1609

{

1610

# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685

1611

'url': 'cBvYw8_A0vQ',

'info_dict': {

'id': 'cBvYw8_A0vQ',

'ext': 'mp4',

'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',

1616

'description': 'md5:ea770e474b7cd6722b4c95b833c03630',

1617

'upload_date': '20201120',

1618

'uploader': 'Walk around Japan',

1619

'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',

1620

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',

1621

},

1622

'params': {

1623

'skip_download': True,

1624

},

1625

}, {

1626

# Has multiple audio streams

1627

'url': 'WaOKSUlf4TM',

1628

'only_matching': True

1629

}, {

1630

# Requires Premium: has format 141 when requested using YTM url

1631

'url': 'https://music.youtube.com/watch?v=XclachpHxis',

1632

'only_matching': True

1633

}, {

1634

# multiple subtitles with same lang_code

1635

'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',

1636

'only_matching': True,

1637

}, {

1638

# Force use android client fallback

1639

'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',

1640

'info_dict': {

1641

'id': 'YOelRv7fMxY',

1642

'title': 'Digging a Secret Tunnel from my Workshop',

1643

'ext': '3gp',

1644

'upload_date': '20210624',

1645

'channel_id': 'UCp68_FLety0O-n9QU6phsgw',

1646

'uploader': 'colinfurze',

1647

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',

1648

'description': 'md5:ecb672623246d98c6c562eed6ae798c3'

1649

},

1650

'params': {

1651

'format': '17', # 3gp format available on android

1652

'extractor_args': {'youtube': {'player_client': ['android']}},

},

},

{

# Skip download of additional client configs (remix client config in this case)

1657

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

1658

'only_matching': True,

1659

'params': {

1660

'extractor_args': {'youtube': {'player_skip': ['configs']}},

},

}

]

@classmethod

def suitable(cls, url):

1667

# Hack for lazy extractors until more generic solution is implemented

1668

# (see #28780)

1669

from .youtube import parse_qs

1670

qs = parse_qs(url)

1671

if qs.get('list', [None])[0]:

1672

return False

1673

return super(YoutubeIE, cls).suitable(url)

1674

1675

def __init__(self, *args, **kwargs):

1676

super(YoutubeIE, self).__init__(*args, **kwargs)

1677

self._code_cache = {}

1678

self._player_cache = {}

1679

1680

def _extract_player_url(self, ytcfg=None, webpage=None):

1681

player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)

1682

if not player_url:

1683

player_url = self._search_regex(

1684

r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',

1685

webpage, 'player URL', fatal=False)

1686

if player_url.startswith('//'):

1687

player_url = 'https:' + player_url

1688

elif not re.match(r'https?://', player_url):

1689

player_url = compat_urlparse.urljoin(

1690

'https://www.youtube.com', player_url)

1691

return player_url

1692

1693

def _signature_cache_id(self, example_sig):

1694

""" Return a string representation of a signature """

1695

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1696

1697

@classmethod

1698

def _extract_player_info(cls, player_url):

1699

for player_re in cls._PLAYER_INFO_RE:

1700

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1705

return id_m.group('id')

1706

1707

def _load_player(self, video_id, player_url, fatal=True) -> bool:

1708

player_id = self._extract_player_info(player_url)

1709

if player_id not in self._code_cache:

1710

self._code_cache[player_id] = self._download_webpage(

1711

player_url, video_id, fatal=fatal,

1712

note='Downloading player ' + player_id,

1713

errnote='Download of %s failed' % player_url)

1714

return player_id in self._code_cache

1715

1716

def _extract_signature_function(self, video_id, player_url, example_sig):

1717

player_id = self._extract_player_info(player_url)

1718

1719

# Read from filesystem cache

1720

func_id = 'js_%s_%s' % (

1721

player_id, self._signature_cache_id(example_sig))

1722

assert os.path.basename(func_id) == func_id

1723

1724

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1725

if cache_spec is not None:

1726

return lambda s: ''.join(s[i] for i in cache_spec)

1727

1728

if self._load_player(video_id, player_url):

1729

code = self._code_cache[player_id]

1730

res = self._parse_sig_js(code)

1731

1732

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1733

cache_res = res(test_string)

1734

cache_spec = [ord(c) for c in cache_res]

1735

1736

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1737

return res

1738

1739

def _print_sig_code(self, func, example_sig):

1740

def gen_sig_code(idxs):

1741

def _genslice(start, end, step):

1742

starts = '' if start == 0 else str(start)

1743

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1744

steps = '' if step == 1 else (':%d' % step)

1745

return 's[%s%s%s]' % (starts, ends, steps)

1746

1747

step = None

1748

# Quelch pyflakes warnings - start will be set when step is set

1749

start = '(Never used)'

1750

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1755

step = None

1756

continue

1757

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1767

1768

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1769

cache_res = func(test_string)

1770

cache_spec = [ord(c) for c in cache_res]

1771

expr_code = ' + '.join(gen_sig_code(cache_spec))

1772

signature_id_tuple = '(%s)' % (

1773

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1774

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1775

' return %s\n') % (signature_id_tuple, expr_code)

1776

self.to_screen('Extracted signature function:\n' + code)

1777

1778

def _parse_sig_js(self, jscode):

1779

funcname = self._search_regex(

1780

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1781

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1782

r'\bm=(?P<sig>[a-zA-Z0-9$]{2})$decodeURIComponent\(h\.s$\)',

1783

r'\bc&&$c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c$\)',

1784

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}$a,\d+$',

1785

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1786

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1787

# Obsolete patterns

1788

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1789

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1790

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1791

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1792

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1793

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1794

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1795

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1796

jscode, 'Initial JS player signature function name', group='sig')

1797

1798

jsi = JSInterpreter(jscode)

1799

initial_function = jsi.extract_function(funcname)

1800

return lambda s: initial_function([s])

1801

1802

def _decrypt_signature(self, s, video_id, player_url):

1803

"""Turn the encrypted s field into a working signature"""

1804

1805

if player_url is None:

1806

raise ExtractorError('Cannot decrypt signature without player_url')

1807

1808

try:

1809

player_id = (player_url, self._signature_cache_id(s))

1810

if player_id not in self._player_cache:

1811

func = self._extract_signature_function(

1812

video_id, player_url, s

1813

)

1814

self._player_cache[player_id] = func

1815

func = self._player_cache[player_id]

1816

if self.get_param('youtube_print_sig_code'):

1817

self._print_sig_code(func, s)

1818

return func(s)

1819

except Exception as e:

1820

tb = traceback.format_exc()

1821

raise ExtractorError(

1822

'Signature extraction failed: ' + tb, cause=e)

1823

1824

def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):

1825

"""

1826

Extract signatureTimestamp (sts)

1827

Required to tell API what sig/player version is in use.

1828

"""

1829

sts = None

1830

if isinstance(ytcfg, dict):

1831

sts = int_or_none(ytcfg.get('STS'))

1832

1833

if not sts:

1834

# Attempt to extract from player

1835

if player_url is None:

1836

error_msg = 'Cannot extract signature timestamp without player_url.'

1837

if fatal:

1838

raise ExtractorError(error_msg)

1839

self.report_warning(error_msg)

1840

return

1841

if self._load_player(video_id, player_url, fatal=fatal):

1842

player_id = self._extract_player_info(player_url)

1843

code = self._code_cache[player_id]

1844

sts = int_or_none(self._search_regex(

1845

r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,

1846

'JS player signature timestamp', group='sts', fatal=fatal))

1847

return sts

1848

1849

def _mark_watched(self, video_id, player_response):

1850

playback_url = url_or_none(try_get(

1851

player_response,

1852

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))

1853

if not playback_url:

1854

return

1855

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1856

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1857

1858

# cpn generation algorithm is reverse engineered from base.js.

1859

# In fact it works even with dummy cpn.

1860

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1861

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1868

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1869

1870

self._download_webpage(

1871

playback_url, video_id, 'Marking watched',

1872

'Unable to mark watched', fatal=False)

1873

1874

@staticmethod

1875

def _extract_urls(webpage):

1876

# Embedded YouTube player

1877

entries = [

1878

unescapeHTML(mobj.group('url'))

1879

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1890

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1891

\1''', webpage)]

1892

1893

# lazyYT YouTube embed

1894

entries.extend(list(map(

1895

unescapeHTML,

1896

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1897

1898

# Wordpress "YouTube Video Importer" plugin

1899

matches = re.findall(r'''(?x)<div[^>]+

1900

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1901

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1902

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1908

urls = YoutubeIE._extract_urls(webpage)

1909

return urls[0] if urls else None

1910

1911

@classmethod

1912

def extract_id(cls, url):

1913

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1914

if mobj is None:

1915

raise ExtractorError('Invalid URL: %s' % url)

1916

video_id = mobj.group(2)

1917

return video_id

1918

1919

def _extract_chapters_from_json(self, data, video_id, duration):

1920

chapters_list = try_get(

1921

data,

1922

lambda x: x['playerOverlays']

1923

['playerOverlayRenderer']

1924

['decoratedPlayerBarRenderer']

1925

['decoratedPlayerBarRenderer']

1926

['playerBar']

1927

['chapteredPlayerBarRenderer']

1928

['chapters'],

1929

list)

1930

if not chapters_list:

1931

return

1932

1933

def chapter_time(chapter):

1934

return float_or_none(

1935

try_get(

1936

chapter,

1937

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1942

start_time = chapter_time(chapter)

1943

if start_time is None:

1944

continue

1945

end_time = (chapter_time(chapters_list[next_num])

1946

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1951

compat_str)

1952

chapters.append({

1953

'start_time': start_time,

1954

'end_time': end_time,

'title': title,

})

return chapters

def _extract_yt_initial_variable(self, webpage, regex, video_id, name):

1960

return self._parse_json(self._search_regex(

1961

(r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),

1962

regex), webpage, name, default='{}'), video_id, fatal=False)

1963

1964

@staticmethod

1965

def parse_time_text(time_text):

1966

"""

1967

Parse the comment time text

1968

time_text is in the format 'X units ago (edited)'

1969

"""

1970

time_text_split = time_text.split(' ')

1971

if len(time_text_split) >= 3:

1972

return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')

1973

1974

@staticmethod

1975

def _join_text_entries(runs):

1976

text = None

1977

for run in runs:

1978

if not isinstance(run, dict):

1979

continue

1980

sub_text = try_get(run, lambda x: x['text'], compat_str)

if sub_text:

if not text:

text = sub_text

continue

text += sub_text

return text

def _extract_comment(self, comment_renderer, parent=None):

1989

comment_id = comment_renderer.get('commentId')

1990

if not comment_id:

1991

return

1992

comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []

1993

text = self._join_text_entries(comment_text_runs) or ''

1994

comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []

1995

time_text = self._join_text_entries(comment_time_text)

1996

# note: timestamp is an estimate calculated from the current time and time_text

1997

timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())

1998

author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)

1999

author_id = try_get(comment_renderer,

2000

lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)

2001

votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],

2002

lambda x: x['likeCount']), compat_str)) or 0

2003

author_thumbnail = try_get(comment_renderer,

2004

lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)

2005

2006

author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)

2007

is_favorited = 'creatorHeart' in (try_get(

2008

comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})

return {

'id': comment_id,

'text': text,

'timestamp': timestamp,

2013

'time_text': time_text,

2014

'like_count': votes,

2015

'is_favorited': is_favorited,

2016

'author': author,

2017

'author_id': author_id,

2018

'author_thumbnail': author_thumbnail,

2019

'author_is_uploader': author_is_uploader,

2020

'parent': parent or 'root'

2021

}

2022

2023

def _comment_entries(self, root_continuation_data, identity_token, account_syncid,

2024

ytcfg, video_id, parent=None, comment_counts=None):

2025

2026

def extract_header(contents):

2027

_total_comments = 0

2028

_continuation = None

2029

for content in contents:

2030

comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])

2031

expected_comment_count = try_get(comments_header_renderer,

2032

(lambda x: x['countText']['runs'][0]['text'],

2033

lambda x: x['commentsCount']['runs'][0]['text']),

2034

compat_str)

2035

if expected_comment_count:

2036

comment_counts[1] = str_to_int(expected_comment_count)

2037

self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))

2038

_total_comments = comment_counts[1]

2039

sort_mode_str = self._configuration_arg('comment_sort', [''])[0]

2040

comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top

2041

2042

sort_menu_item = try_get(

2043

comments_header_renderer,

2044

lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}

2045

sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}

2046

2047

_continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)

2048

if not _continuation:

2049

continue

2050

2051

sort_text = sort_menu_item.get('title')

2052

if isinstance(sort_text, compat_str):

2053

sort_text = sort_text.lower()

2054

else:

2055

sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'

2056

self.to_screen('Sorting comments by %s' % sort_text)

2057

break

2058

return _total_comments, _continuation

2059

2060

def extract_thread(contents):

2061

if not parent:

2062

comment_counts[2] = 0

2063

for content in contents:

2064

comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])

2065

comment_renderer = try_get(

2066

comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(

2067

content, (lambda x: x['commentRenderer'], dict))

2068

2069

if not comment_renderer:

2070

continue

2071

comment = self._extract_comment(comment_renderer, parent)

2072

if not comment:

2073

continue

2074

comment_counts[0] += 1

2075

yield comment

2076

# Attempt to get the replies

2077

comment_replies_renderer = try_get(

2078

comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)

2079

2080

if comment_replies_renderer:

2081

comment_counts[2] += 1

2082

comment_entries_iter = self._comment_entries(

2083

comment_replies_renderer, identity_token, account_syncid, ytcfg,

2084

video_id, parent=comment.get('id'), comment_counts=comment_counts)

2085

2086

for reply_comment in comment_entries_iter:

2087

yield reply_comment

2088

2089

# YouTube comments have a max depth of 2

2090

max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')

2091

if max_depth == 1 and parent:

2092

return

2093

if not comment_counts:

2094

# comment so far, est. total comments, current comment thread #

2095

comment_counts = [0, 0, 0]

2096

2097

continuation = self._extract_continuation(root_continuation_data)

2098

if continuation and len(continuation['ctoken']) < 27:

2099

self.write_debug('Detected old API continuation token. Generating new API compatible token.')

2100

continuation_token = self._generate_comment_continuation(video_id)

2101

continuation = self._build_continuation_query(continuation_token, None)

2102

2103

visitor_data = None

2104

is_first_continuation = parent is None

2105

2106

for page_num in itertools.count(0):

2107

if not continuation:

2108

break

2109

headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)

2110

comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])

2111

if page_num == 0:

2112

if is_first_continuation:

2113

note_prefix = 'Downloading comment section API JSON'

2114

else:

2115

note_prefix = ' Downloading comment API JSON reply thread %d %s' % (

2116

comment_counts[2], comment_prog_str)

2117

else:

2118

note_prefix = '%sDownloading comment%s API JSON page %d %s' % (

2119

' ' if parent else '', ' replies' if parent else '',

2120

page_num, comment_prog_str)

2121

2122

response = self._extract_response(

2123

item_id=None, query=self._continuation_query_ajax_to_api(continuation),

2124

ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,

2125

check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))

2126

if not response:

2127

break

2128

visitor_data = try_get(

2129

response,

2130

lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],

2131

compat_str) or visitor_data

2132

2133

continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))

2134

2135

continuation = None

2136

if isinstance(continuation_contents, list):

2137

for continuation_section in continuation_contents:

2138

if not isinstance(continuation_section, dict):

2139

continue

2140

continuation_items = try_get(

2141

continuation_section,

2142

(lambda x: x['reloadContinuationItemsCommand']['continuationItems'],

2143

lambda x: x['appendContinuationItemsAction']['continuationItems']),

2144

list) or []

2145

if is_first_continuation:

2146

total_comments, continuation = extract_header(continuation_items)

2147

if total_comments:

2148

yield total_comments

2149

is_first_continuation = False

if continuation:

break

continue

count = 0

for count, entry in enumerate(extract_thread(continuation_items)):

2155

yield entry

2156

continuation = self._extract_continuation({'contents': continuation_items})

2157

if continuation:

2158

# Sometimes YouTube provides a continuation without any comments

2159

# In most cases we end up just downloading these with very little comments to come.

2160

if count == 0:

2161

if not parent:

2162

self.report_warning('No comments received - assuming end of comments')

continuation = None

break

# Deprecated response structure

2167

elif isinstance(continuation_contents, dict):

2168

known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')

2169

for key, continuation_renderer in continuation_contents.items():

2170

if key not in known_continuation_renderers:

2171

continue

2172

if not isinstance(continuation_renderer, dict):

2173

continue

2174

if is_first_continuation:

2175

header_continuation_items = [continuation_renderer.get('header') or {}]

2176

total_comments, continuation = extract_header(header_continuation_items)

2177

if total_comments:

2178

yield total_comments

2179

is_first_continuation = False

if continuation:

break

# Sometimes YouTube provides a continuation without any comments

2184

# In most cases we end up just downloading these with very little comments to come.

2185

count = 0

2186

for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):

2187

yield entry

2188

continuation = self._extract_continuation(continuation_renderer)

2189

if count == 0:

2190

if not parent:

2191

self.report_warning('No comments received - assuming end of comments')

continuation = None

break

@staticmethod

def _generate_comment_continuation(video_id):

2197

"""

2198

Generates initial comment section continuation token from given video id

2199

"""

2200

b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))

2201

parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')

2202

new_continuation_intlist = list(itertools.chain.from_iterable(

2203

[bytes_to_intlist(base64.b64decode(part)) for part in parts]))

2204

return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')

2205

2206

def _extract_comments(self, ytcfg, video_id, contents, webpage):

2207

"""Entry for comment extraction"""

2208

def _real_comment_extract(contents):

2209

if isinstance(contents, list):

2210

for entry in contents:

2211

for key, renderer in entry.items():

2212

if key not in known_entry_comment_renderers:

2213

continue

2214

yield from self._comment_entries(

2215

renderer, video_id=video_id, ytcfg=ytcfg,

2216

identity_token=self._extract_identity_token(webpage, item_id=video_id),

2217

account_syncid=self._extract_account_syncid(ytcfg))

2218

break

2219

comments = []

2220

known_entry_comment_renderers = ('itemSectionRenderer',)

2221

estimated_total = 0

2222

max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')

2223

2224

try:

2225

for comment in _real_comment_extract(contents):

2226

if len(comments) >= max_comments:

2227

break

2228

if isinstance(comment, int):

2229

estimated_total = comment

2230

continue

2231

comments.append(comment)

2232

except KeyboardInterrupt:

2233

self.to_screen('Interrupted by user')

2234

self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))

2235

return {

2236

'comments': comments,

2237

'comment_count': len(comments),

}

@staticmethod

def _generate_player_context(sts=None):

2242

context = {

2243

'html5Preference': 'HTML5_PREF_WANTS',

2244

}

2245

if sts is not None:

2246

context['signatureTimestamp'] = sts

2247

return {

2248

'playbackContext': {

2249

'contentPlaybackContext': context

}

}

@staticmethod

def _get_video_info_params(video_id, client='TVHTML5'):

GVI_CLIENTS = {

'ANDROID': {

'c': 'ANDROID',

'cver': '16.20',

},

'TVHTML5': {

'c': 'TVHTML5',

'cver': '6.20180913',

}

}

query = {

'video_id': video_id,

2267

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

2268

'html5': '1'

2269

}

2270

query.update(GVI_CLIENTS.get(client))

2271

return query

2272

2273

def _real_extract(self, url):

2274

url, smuggled_data = unsmuggle_url(url, {})

2275

video_id = self._match_id(url)

2276

2277

is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)

2278

2279

base_url = self.http_scheme() + '//www.youtube.com/'

2280

webpage_url = base_url + 'watch?v=' + video_id

2281

webpage = self._download_webpage(

2282

webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)

2283

2284

ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

2285

identity_token = self._extract_identity_token(webpage, video_id)

2286

syncid = self._extract_account_syncid(ytcfg)

2287

headers = self._generate_api_headers(ytcfg, identity_token, syncid)

2288

2289

player_url = self._extract_player_url(ytcfg, webpage)

2290

2291

player_client = self._configuration_arg('player_client', [''])[0]

2292

if player_client not in ('web', 'android', ''):

2293

self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')

2294

force_mobile_client = player_client != 'web'

2295

player_skip = self._configuration_arg('player_skip')

def get_text(x):

if not x:

return

text = x.get('simpleText')

2301

if text and isinstance(text, compat_str):

2302

return text

2303

runs = x.get('runs')

2304

if not isinstance(runs, list):

2305

return

2306

return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])

2307

2308

ytm_streaming_data = {}

2309

if is_music_url:

2310

ytm_webpage = None

2311

sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)

2312

if sts and not force_mobile_client and 'configs' not in player_skip:

2313

ytm_webpage = self._download_webpage(

2314

'https://music.youtube.com',

2315

video_id, fatal=False, note='Downloading remix client config')

2316

2317

ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}

2318

ytm_client = 'WEB_REMIX'

2319

if not sts or force_mobile_client:

2320

# Android client already has signature descrambled

2321

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562

2322

if not sts:

2323

self.report_warning('Falling back to android remix client for player API.')

2324

ytm_client = 'ANDROID_MUSIC'

2325

ytm_cfg = {}

2326

2327

ytm_headers = self._generate_api_headers(

2328

ytm_cfg, identity_token, syncid,

2329

client=ytm_client)

2330

ytm_query = {'videoId': video_id}

2331

ytm_query.update(self._generate_player_context(sts))

2332

2333

ytm_player_response = self._extract_response(

2334

item_id=video_id, ep='player', query=ytm_query,

2335

ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,

2336

default_client=ytm_client,

2337

note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))

2338

ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}

2339

2340

player_response = None

2341

if webpage:

2342

player_response = self._extract_yt_initial_variable(

2343

webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,

2344

video_id, 'initial player response')

2345

2346

if not player_response or force_mobile_client:

2347

sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)

2348

yt_client = 'WEB'

2349

ytpcfg = ytcfg

2350

ytp_headers = headers

2351

if not sts or force_mobile_client:

2352

# Android client already has signature descrambled

2353

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562

2354

if not sts:

2355

self.report_warning('Falling back to android client for player API.')

2356

yt_client = 'ANDROID'

2357

ytpcfg = {}

2358

ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)

2359

2360

yt_query = {'videoId': video_id}

2361

yt_query.update(self._generate_player_context(sts))

2362

player_response = self._extract_response(

2363

item_id=video_id, ep='player', query=yt_query,

2364

ytcfg=ytpcfg, headers=ytp_headers, fatal=False,

2365

default_client=yt_client,

2366

note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')

2367

) or player_response

2368

2369

# Age-gate workarounds

2370

playability_status = player_response.get('playabilityStatus') or {}

2371

if playability_status.get('reason') in self._AGE_GATE_REASONS:

2372

gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')

2373

for gvi_client in gvi_clients:

2374

pr = self._parse_json(try_get(compat_parse_qs(

2375

self._download_webpage(

2376

base_url + 'get_video_info', video_id,

2377

'Refetching age-gated %s info webpage' % gvi_client.lower(),

2378

'unable to download video info webpage', fatal=False,

2379

query=self._get_video_info_params(video_id, client=gvi_client))),

2380

lambda x: x['player_response'][0],

2381

compat_str) or '{}', video_id)

if pr:

break

if not pr:

self.report_warning('Falling back to embedded-only age-gate workaround.')

2386

embed_webpage = None

2387

sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)

2388

if sts and not force_mobile_client and 'configs' not in player_skip:

2389

embed_webpage = self._download_webpage(

2390

'https://www.youtube.com/embed/%s?html5=1' % video_id,

2391

video_id=video_id, note='Downloading age-gated embed config')

2392

2393

ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}

2394

# If we extracted the embed webpage, it'll tell us if we can view the video

2395

embedded_pr = self._parse_json(

2396

try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',

2397

video_id=video_id)

2398

embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''

2399

if embedded_ps_reason not in self._AGE_GATE_REASONS:

2400

yt_client = 'WEB_EMBEDDED_PLAYER'

2401

if not sts or force_mobile_client:

2402

# Android client already has signature descrambled

2403

# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562

2404

if not sts:

2405

self.report_warning(

2406

'Falling back to android embedded client for player API (note: some formats may be missing).')

2407

yt_client = 'ANDROID_EMBEDDED_PLAYER'

2408

ytcfg_age = {}

2409

2410

ytage_headers = self._generate_api_headers(

2411

ytcfg_age, identity_token, syncid, client=yt_client)

2412

yt_age_query = {'videoId': video_id}

2413

yt_age_query.update(self._generate_player_context(sts))

2414

pr = self._extract_response(

2415

item_id=video_id, ep='player', query=yt_age_query,

2416

ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,

2417

default_client=yt_client,

2418

note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')

) or {}

if pr:

player_response = pr

trailer_video_id = try_get(

2425

playability_status,

2426

lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],

2427

compat_str)

2428

if trailer_video_id:

2429

return self.url_result(

2430

trailer_video_id, self.ie_key(), trailer_video_id)

2431

2432

search_meta = (

2433

lambda x: self._html_search_meta(x, webpage, default=None)) \

2434

if webpage else lambda x: None

2435

2436

video_details = player_response.get('videoDetails') or {}

2437

microformat = try_get(

2438

player_response,

2439

lambda x: x['microformat']['playerMicroformatRenderer'],

2440

dict) or {}

2441

video_title = video_details.get('title') \

2442

or get_text(microformat.get('title')) \

2443

or search_meta(['og:title', 'twitter:title', 'title'])

2444

video_description = video_details.get('shortDescription')

2445

2446

if not smuggled_data.get('force_singlefeed', False):

2447

if not self.get_param('noplaylist'):

2448

multifeed_metadata_list = try_get(

2449

player_response,

2450

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

2451

compat_str)

2452

if multifeed_metadata_list:

2453

entries = []

2454

feed_ids = []

2455

for feed in multifeed_metadata_list.split(','):

2456

# Unquote should take place before split on comma (,) since textual

2457

# fields may contain comma as well (see

2458

# https://github.com/ytdl-org/youtube-dl/issues/8536)

2459

feed_data = compat_parse_qs(

2460

compat_urllib_parse_unquote_plus(feed))

2461

2462

def feed_entry(name):

2463

return try_get(

2464

feed_data, lambda x: x[name][0], compat_str)

2465

2466

feed_id = feed_entry('id')

2467

if not feed_id:

2468

continue

2469

feed_title = feed_entry('title')

2470

title = video_title

2471

if feed_title:

2472

title += ' (%s)' % feed_title

2473

entries.append({

2474

'_type': 'url_transparent',

2475

'ie_key': 'Youtube',

2476

'url': smuggle_url(

2477

base_url + 'watch?v=' + feed_data['id'][0],

2478

{'force_singlefeed': True}),

2479

'title': title,

2480

})

2481

feed_ids.append(feed_id)

2482

self.to_screen(

2483

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

2484

% (', '.join(feed_ids), video_id))

2485

return self.playlist_result(

2486

entries, video_id, video_title, video_description)

2487

else:

2488

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

2489

2490

formats, itags, stream_ids = [], [], []

2491

itag_qualities = {}

2492

q = qualities([

2493

# "tiny" is the smallest video-only format. But some audio-only formats

2494

# was also labeled "tiny". It is not clear if such formats still exist

2495

'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats

2496

'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'

2497

])

2498

2499

streaming_data = player_response.get('streamingData') or {}

2500

streaming_formats = streaming_data.get('formats') or []

2501

streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])

2502

streaming_formats.extend(ytm_streaming_data.get('formats') or [])

2503

streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])

2504

2505

for fmt in streaming_formats:

2506

if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):

2507

continue

2508

2509

itag = str_or_none(fmt.get('itag'))

2510

audio_track = fmt.get('audioTrack') or {}

2511

stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))

2512

if stream_id in stream_ids:

2513

continue

2514

2515

quality = fmt.get('quality')

2516

if quality == 'tiny' or not quality:

2517

quality = fmt.get('audioQuality', '').lower() or quality

2518

if itag and quality:

2519

itag_qualities[itag] = quality

2520

# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment

2521

# (adding `&sq=0` to the URL) and parsing emsg box to determine the

2522

# number of fragment that would subsequently requested with (`&sq=N`)

2523

if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':

2524

continue

2525

2526

fmt_url = fmt.get('url')

2527

if not fmt_url:

2528

sc = compat_parse_qs(fmt.get('signatureCipher'))

2529

fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))

2530

encrypted_sig = try_get(sc, lambda x: x['s'][0])

2531

if not (sc and fmt_url and encrypted_sig):

continue

if not player_url:

continue

signature = self._decrypt_signature(sc['s'][0], video_id, player_url)

2536

sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'

2537

fmt_url += '&' + sp + '=' + signature

if itag:

itags.append(itag)

stream_ids.append(stream_id)

2542

2543

tbr = float_or_none(

2544

fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)

2545

dct = {

2546

'asr': int_or_none(fmt.get('audioSampleRate')),

2547

'filesize': int_or_none(fmt.get('contentLength')),

2548

'format_id': itag,

2549

'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,

2550

'fps': int_or_none(fmt.get('fps')),

2551

'height': int_or_none(fmt.get('height')),

2552

'quality': q(quality),

2553

'tbr': tbr,

2554

'url': fmt_url,

2555

'width': fmt.get('width'),

2556

'language': audio_track.get('id', '').split('.')[0],

2557

}

2558

mime_mobj = re.match(

2559

r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')

2560

if mime_mobj:

2561

dct['ext'] = mimetype2ext(mime_mobj.group(1))

2562

dct.update(parse_codecs(mime_mobj.group(2)))

2563

# The 3gp format in android client has a quality of "small",

2564

# but is actually worse than all other formats

2565

if dct['ext'] == '3gp':

2566

dct['quality'] = q('tiny')

2567

no_audio = dct.get('acodec') == 'none'

2568

no_video = dct.get('vcodec') == 'none'

if no_audio:

dct['vbr'] = tbr

if no_video:

dct['abr'] = tbr

if no_audio or no_video:

2574

dct['downloader_options'] = {

2575

# Youtube throttles chunks >~10M

2576

'http_chunk_size': 10485760,

2577

}

2578

if dct.get('ext'):

2579

dct['container'] = dct['ext'] + '_dash'

2580

formats.append(dct)

2581

2582

skip_manifests = self._configuration_arg('skip')

2583

get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)

2584

get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)

2585

2586

for sd in (streaming_data, ytm_streaming_data):

2587

hls_manifest_url = get_hls and sd.get('hlsManifestUrl')

2588

if hls_manifest_url:

2589

for f in self._extract_m3u8_formats(

2590

hls_manifest_url, video_id, 'mp4', fatal=False):

2591

itag = self._search_regex(

2592

r'/itag/(\d+)', f['url'], 'itag', default=None)

2593

if itag:

2594

f['format_id'] = itag

2595

formats.append(f)

2596

2597

dash_manifest_url = get_dash and sd.get('dashManifestUrl')

2598

if dash_manifest_url:

2599

for f in self._extract_mpd_formats(

2600

dash_manifest_url, video_id, fatal=False):

2601

itag = f['format_id']

2602

if itag in itags:

2603

continue

2604

if itag in itag_qualities:

2605

f['quality'] = q(itag_qualities[itag])

2606

filesize = int_or_none(self._search_regex(

2607

r'/clen/(\d+)', f.get('fragment_base_url')

2608

or f['url'], 'file size', default=None))

2609

if filesize:

2610

f['filesize'] = filesize

formats.append(f)

if not formats:

if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):

2615

self.raise_no_formats(

2616

'This video is DRM protected.', expected=True)

2617

pemr = try_get(

2618

playability_status,

2619

lambda x: x['errorScreen']['playerErrorMessageRenderer'],

2620

dict) or {}

2621

reason = get_text(pemr.get('reason')) or playability_status.get('reason')

2622

subreason = pemr.get('subreason')

2623

if subreason:

2624

subreason = clean_html(get_text(subreason))

2625

if subreason == 'The uploader has not made this video available in your country.':

2626

countries = microformat.get('availableCountries')

2627

if not countries:

2628

regions_allowed = search_meta('regionsAllowed')

2629

countries = regions_allowed.split(',') if regions_allowed else None

2630

self.raise_geo_restricted(subreason, countries, metadata_available=True)

2631

reason += '\n' + subreason

2632

if reason:

2633

self.raise_no_formats(reason, expected=True)

2634

2635

self._sort_formats(formats)

2636

2637

keywords = video_details.get('keywords') or []

2638

if not keywords and webpage:

2639

keywords = [

2640

unescapeHTML(m.group('content'))

2641

for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]

2642

for keyword in keywords:

2643

if keyword.startswith('yt:stretch='):

2644

mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)

2645

if mobj:

2646

# NB: float is intentional for forcing float division

2647

w, h = (float(v) for v in mobj.groups())

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2652

f['stretched_ratio'] = ratio

break

thumbnails = []

for container in (video_details, microformat):

2657

for thumbnail in (try_get(

2658

container,

2659

lambda x: x['thumbnail']['thumbnails'], list) or []):

2660

thumbnail_url = thumbnail.get('url')

2661

if not thumbnail_url:

2662

continue

2663

# Sometimes youtube gives a wrong thumbnail URL. See:

2664

# https://github.com/yt-dlp/yt-dlp/issues/233

2665

# https://github.com/ytdl-org/youtube-dl/issues/28023

2666

if 'maxresdefault' in thumbnail_url:

2667

thumbnail_url = thumbnail_url.split('?')[0]

2668

thumbnails.append({

2669

'url': thumbnail_url,

2670

'height': int_or_none(thumbnail.get('height')),

2671

'width': int_or_none(thumbnail.get('width')),

2672

'preference': 1 if 'maxresdefault' in thumbnail_url else -1

2673

})

2674

thumbnail_url = search_meta(['og:image', 'twitter:image'])

2675

if thumbnail_url:

2676

thumbnails.append({

2677

'url': thumbnail_url,

2678

'preference': 1 if 'maxresdefault' in thumbnail_url else -1

2679

})

2680

# All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage

2681

# See: https://github.com/ytdl-org/youtube-dl/issues/29049

2682

thumbnails.append({

2683

'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,

2684

'preference': 1,

2685

})

2686

self._remove_duplicate_formats(thumbnails)

2687

2688

category = microformat.get('category') or search_meta('genre')

2689

channel_id = video_details.get('channelId') \

2690

or microformat.get('externalChannelId') \

2691

or search_meta('channelId')

2692

duration = int_or_none(

2693

video_details.get('lengthSeconds')

2694

or microformat.get('lengthSeconds')) \

2695

or parse_duration(search_meta('duration'))

2696

is_live = video_details.get('isLive')

2697

is_upcoming = video_details.get('isUpcoming')

2698

owner_profile_url = microformat.get('ownerProfileUrl')

info = {

'id': video_id,

'title': self._live_title(video_title) if is_live else video_title,

2703

'formats': formats,

2704

'thumbnails': thumbnails,

2705

'description': video_description,

2706

'upload_date': unified_strdate(

2707

microformat.get('uploadDate')

2708

or search_meta('uploadDate')),

2709

'uploader': video_details['author'],

2710

'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,

2711

'uploader_url': owner_profile_url,

2712

'channel_id': channel_id,

2713

'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,

2714

'duration': duration,

2715

'view_count': int_or_none(

2716

video_details.get('viewCount')

2717

or microformat.get('viewCount')

2718

or search_meta('interactionCount')),

2719

'average_rating': float_or_none(video_details.get('averageRating')),

2720

'age_limit': 18 if (

2721

microformat.get('isFamilySafe') is False

2722

or search_meta('isFamilyFriendly') == 'false'

2723

or search_meta('og:restrictions:age') == '18+') else 0,

2724

'webpage_url': webpage_url,

2725

'categories': [category] if category else None,

2726

'tags': keywords,

2727

'is_live': is_live,

2728

'playable_in_embed': playability_status.get('playableInEmbed'),

2729

'was_live': video_details.get('isLiveContent'),

}

pctr = try_get(

player_response,

lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)

2735

subtitles = {}

2736

if pctr:

2737

def process_language(container, base_url, lang_code, sub_name, query):

2738

lang_subs = container.setdefault(lang_code, [])

2739

for fmt in self._SUBTITLE_FORMATS:

query.update({

'fmt': fmt,

})

lang_subs.append({

'ext': fmt,

'url': update_url_query(base_url, query),

'name': sub_name,

})

for caption_track in (pctr.get('captionTracks') or []):

2750

base_url = caption_track.get('baseUrl')

2751

if not base_url:

2752

continue

2753

if caption_track.get('kind') != 'asr':

2754

lang_code = (

2755

remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')

2756

or caption_track.get('languageCode'))

if not lang_code:

continue

process_language(

subtitles, base_url, lang_code,

2761

try_get(caption_track, lambda x: x['name']['simpleText']),

2762

{})

2763

continue

2764

automatic_captions = {}

2765

for translation_language in (pctr.get('translationLanguages') or []):

2766

translation_language_code = translation_language.get('languageCode')

2767

if not translation_language_code:

2768

continue

2769

process_language(

2770

automatic_captions, base_url, translation_language_code,

2771

try_get(translation_language, (

2772

lambda x: x['languageName']['simpleText'],

2773

lambda x: x['languageName']['runs'][0]['text'])),

2774

{'tlang': translation_language_code})

2775

info['automatic_captions'] = automatic_captions

2776

info['subtitles'] = subtitles

2777

2778

parsed_url = compat_urllib_parse_urlparse(url)

2779

for component in [parsed_url.fragment, parsed_url.query]:

2780

query = compat_parse_qs(component)

2781

for k, v in query.items():

2782

for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:

2783

d_k += '_time'

2784

if d_k not in info and k in s_ks:

2785

info[d_k] = parse_duration(query[k][0])

2786

2787

# Youtube Music Auto-generated description

2788

if video_description:

2789

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

2790

if mobj:

2791

release_year = mobj.group('release_year')

2792

release_date = mobj.group('release_date')

2793

if release_date:

2794

release_date = release_date.replace('-', '')

2795

if not release_year:

2796

release_year = release_date[:4]

2797

info.update({

2798

'album': mobj.group('album'.strip()),

2799

'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),

2800

'track': mobj.group('track').strip(),

2801

'release_date': release_date,

2802

'release_year': int_or_none(release_year),

})

initial_data = None

if webpage:

initial_data = self._extract_yt_initial_variable(

2808

webpage, self._YT_INITIAL_DATA_RE, video_id,

2809

'yt initial data')

2810

if not initial_data:

2811

initial_data = self._extract_response(

2812

item_id=video_id, ep='next', fatal=False,

2813

ytcfg=ytcfg, headers=headers, query={'videoId': video_id},

2814

note='Downloading initial data API JSON')

2815

2816

try:

2817

# This will error if there is no livechat

2818

initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

2819

info['subtitles']['live_chat'] = [{

2820

'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies

2821

'video_id': video_id,

2822

'ext': 'json',

2823

'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',

2824

}]

2825

except (KeyError, IndexError, TypeError):

pass

if initial_data:

chapters = self._extract_chapters_from_json(

2830

initial_data, video_id, duration)

2831

if not chapters:

2832

for engagment_pannel in (initial_data.get('engagementPanels') or []):

2833

contents = try_get(

2834

engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],

list)

if not contents:

continue

def chapter_time(mmlir):

2840

return parse_duration(

2841

get_text(mmlir.get('timeDescription')))

2842

2843

chapters = []

2844

for next_num, content in enumerate(contents, start=1):

2845

mmlir = content.get('macroMarkersListItemRenderer') or {}

2846

start_time = chapter_time(mmlir)

2847

end_time = chapter_time(try_get(

2848

contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \

2849

if next_num < len(contents) else duration

2850

if start_time is None or end_time is None:

2851

continue

2852

chapters.append({

2853

'start_time': start_time,

2854

'end_time': end_time,

2855

'title': get_text(mmlir.get('title')),

})

if chapters:

break

if chapters:

info['chapters'] = chapters

contents = try_get(

initial_data,

lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],

2865

list) or []

2866

for content in contents:

2867

vpir = content.get('videoPrimaryInfoRenderer')

2868

if vpir:

2869

stl = vpir.get('superTitleLink')

if stl:

stl = get_text(stl)

if try_get(

vpir,

lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':

2875

info['location'] = stl

2876

else:

2877

mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)

2878

if mobj:

2879

info.update({

2880

'series': mobj.group(1),

2881

'season_number': int(mobj.group(2)),

2882

'episode_number': int(mobj.group(3)),

})

for tlb in (try_get(

vpir,

lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],

2887

list) or []):

2888

tbr = tlb.get('toggleButtonRenderer') or {}

2889

for getter, regex in [(

2890

lambda x: x['defaultText']['accessibility']['accessibilityData'],

2891

r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([

2892

lambda x: x['accessibility'],

2893

lambda x: x['accessibilityData']['accessibilityData'],

2894

], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:

2895

label = (try_get(tbr, getter, dict) or {}).get('label')

2896

if label:

2897

mobj = re.match(regex, label)

2898

if mobj:

2899

info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))

2900

break

2901

sbr_tooltip = try_get(

2902

vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])

2903

if sbr_tooltip:

2904

like_count, dislike_count = sbr_tooltip.split(' / ')

2905

info.update({

2906

'like_count': str_to_int(like_count),

2907

'dislike_count': str_to_int(dislike_count),

2908

})

2909

vsir = content.get('videoSecondaryInfoRenderer')

2910

if vsir:

2911

info['channel'] = get_text(try_get(

2912

vsir,

2913

lambda x: x['owner']['videoOwnerRenderer']['title'],

dict))

rows = try_get(

vsir,

lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],

2918

list) or []

2919

multiple_songs = False

2920

for row in rows:

2921

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

2922

multiple_songs = True

2923

break

2924

for row in rows:

2925

mrr = row.get('metadataRowRenderer') or {}

2926

mrr_title = mrr.get('title')

2927

if not mrr_title:

2928

continue

2929

mrr_title = get_text(mrr['title'])

2930

mrr_contents_text = get_text(mrr['contents'][0])

2931

if mrr_title == 'License':

2932

info['license'] = mrr_contents_text

2933

elif not multiple_songs:

2934

if mrr_title == 'Album':

2935

info['album'] = mrr_contents_text

2936

elif mrr_title == 'Artist':

2937

info['artist'] = mrr_contents_text

2938

elif mrr_title == 'Song':

2939

info['track'] = mrr_contents_text

2940

2941

fallbacks = {

2942

'channel': 'uploader',

2943

'channel_id': 'uploader_id',

2944

'channel_url': 'uploader_url',

2945

}

2946

for to, frm in fallbacks.items():

2947

if not info.get(to):

2948

info[to] = info.get(frm)

2949

2950

for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:

v = info.get(s_k)

if v:

info[d_k] = v

is_private = bool_or_none(video_details.get('isPrivate'))

2956

is_unlisted = bool_or_none(microformat.get('isUnlisted'))

2957

is_membersonly = None

2958

is_premium = None

2959

if initial_data and is_private is not None:

2960

is_membersonly = False

2961

is_premium = False

2962

contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)

2963

for content in contents or []:

2964

badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)

2965

for badge in badges or []:

2966

label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''

2967

if label.lower() == 'members only':

2968

is_membersonly = True

2969

break

2970

elif label.lower() == 'premium':

2971

is_premium = True

2972

break

2973

if is_membersonly or is_premium:

2974

break

2975

2976

# TODO: Add this for playlists

2977

info['availability'] = self._availability(

2978

is_private=is_private,

2979

needs_premium=is_premium,

2980

needs_subscription=is_membersonly,

2981

needs_auth=info['age_limit'] >= 18,

2982

is_unlisted=None if is_private is None else is_unlisted)

2983

2984

# get xsrf for annotations or comments

2985

get_annotations = self.get_param('writeannotations', False)

2986

get_comments = self.get_param('getcomments', False)

2987

if get_annotations or get_comments:

2988

xsrf_token = None

2989

ytcfg = self._extract_ytcfg(video_id, webpage)

2990

if ytcfg:

2991

xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)

2992

if not xsrf_token:

2993

xsrf_token = self._search_regex(

2994

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',

2995

webpage, 'xsrf token', group='xsrf_token', fatal=False)

# annotations

if get_annotations:

invideo_url = try_get(

3000

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

3001

if xsrf_token and invideo_url:

3002

xsrf_field_name = None

3003

if ytcfg:

3004

xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)

3005

if not xsrf_field_name:

3006

xsrf_field_name = self._search_regex(

3007

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

3008

webpage, 'xsrf field name',

3009

group='xsrf_field_name', default='session_token')

3010

info['annotations'] = self._download_webpage(

3011

self._proto_relative_url(invideo_url),

3012

video_id, note='Downloading annotations',

3013

errnote='Unable to download video annotations', fatal=False,

3014

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

3015

3016

if get_comments:

3017

info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)

3018

3019

self.mark_watched(video_id, player_response)

return info

class YoutubeTabIE(YoutubeBaseInfoExtractor):

3025

IE_DESC = 'YouTube.com tab'

3026

_VALID_URL = r'''(?x)

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

invidio\.us

)/

(?:

(?P<channel_type>channel|c|user|browse)/|

3035

(?P<not_channel>

3036

feed/|hashtag/|

3037

(?:playlist|watch)\?.*?\blist=

3038

)|

3039

(?!(?:%s)\b) # Direct URLs

3040

)

3041

(?P<id>[^/?\#&]+)

3042

''' % YoutubeBaseInfoExtractor._RESERVED_NAMES

3043

IE_NAME = 'youtube:tab'

3044

3045

_TESTS = [{

3046

'note': 'playlists, multipage',

3047

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

3048

'playlist_mincount': 94,

3049

'info_dict': {

3050

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

3051

'title': 'Игорь Клейнер - Playlists',

3052

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

3053

'uploader': 'Игорь Клейнер',

3054

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

3055

},

3056

}, {

3057

'note': 'playlists, multipage, different order',

3058

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

3059

'playlist_mincount': 94,

3060

'info_dict': {

3061

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

3062

'title': 'Игорь Клейнер - Playlists',

3063

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

3064

'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',

3065

'uploader': 'Игорь Клейнер',

3066

},

3067

}, {

3068

'note': 'playlists, series',

3069

'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',

3070

'playlist_mincount': 5,

3071

'info_dict': {

3072

'id': 'UCYO_jab_esuFRV4b17AJtAw',

3073

'title': '3Blue1Brown - Playlists',

3074

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

3075

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

3076

'uploader': '3Blue1Brown',

3077

},

3078

}, {

3079

'note': 'playlists, singlepage',

3080

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

3081

'playlist_mincount': 4,

3082

'info_dict': {

3083

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

3084

'title': 'ThirstForScience - Playlists',

3085

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

3086

'uploader': 'ThirstForScience',

3087

'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

3088

}

3089

}, {

3090

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

3091

'only_matching': True,

3092

}, {

3093

'note': 'basic, single video playlist',

3094

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

3095

'info_dict': {

3096

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

3097

'uploader': 'Sergey M.',

3098

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

3099

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

'note': 'empty playlist',

3104

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

3105

'info_dict': {

3106

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

3107

'uploader': 'Sergey M.',

3108

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

3109

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

'note': 'Home tab',

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

3115

'info_dict': {

3116

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3117

'title': 'lex will - Home',

3118

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3119

'uploader': 'lex will',

3120

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3121

},

3122

'playlist_mincount': 2,

3123

}, {

3124

'note': 'Videos tab',

3125

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

3126

'info_dict': {

3127

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3128

'title': 'lex will - Videos',

3129

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3130

'uploader': 'lex will',

3131

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3132

},

3133

'playlist_mincount': 975,

3134

}, {

3135

'note': 'Videos tab, sorted by popular',

3136

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

3137

'info_dict': {

3138

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3139

'title': 'lex will - Videos',

3140

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3141

'uploader': 'lex will',

3142

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3143

},

3144

'playlist_mincount': 199,

3145

}, {

3146

'note': 'Playlists tab',

3147

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

3148

'info_dict': {

3149

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3150

'title': 'lex will - Playlists',

3151

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3152

'uploader': 'lex will',

3153

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3154

},

3155

'playlist_mincount': 17,

3156

}, {

3157

'note': 'Community tab',

3158

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

3159

'info_dict': {

3160

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3161

'title': 'lex will - Community',

3162

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3163

'uploader': 'lex will',

3164

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3165

},

3166

'playlist_mincount': 18,

3167

}, {

3168

'note': 'Channels tab',

3169

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

3170

'info_dict': {

3171

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3172

'title': 'lex will - Channels',

3173

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

3174

'uploader': 'lex will',

3175

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3176

},

3177

'playlist_mincount': 12,

3178

}, {

3179

'note': 'Search tab',

3180

'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',

3181

'playlist_mincount': 40,

3182

'info_dict': {

3183

'id': 'UCYO_jab_esuFRV4b17AJtAw',

3184

'title': '3Blue1Brown - Search - linear algebra',

3185

'description': 'md5:e1384e8a133307dd10edee76e875d62f',

3186

'uploader': '3Blue1Brown',

3187

'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',

3188

},

3189

}, {

3190

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

3191

'only_matching': True,

3192

}, {

3193

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

3194

'only_matching': True,

3195

}, {

3196

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

3197

'only_matching': True,

3198

}, {

3199

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

3200

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

3201

'info_dict': {

3202

'title': '29C3: Not my department',

3203

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

3204

'uploader': 'Christiaan008',

3205

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

3206

'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',

3207

},

3208

'playlist_count': 96,

3209

}, {

3210

'note': 'Large playlist',

3211

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

3212

'info_dict': {

3213

'title': 'Uploads from Cauchemar',

3214

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

3215

'uploader': 'Cauchemar',

3216

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

3217

},

3218

'playlist_mincount': 1123,

3219

}, {

3220

'note': 'even larger playlist, 8832 videos',

3221

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

3222

'only_matching': True,

3223

}, {

3224

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

3225

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

3226

'info_dict': {

3227

'title': 'Uploads from Interstellar Movie',

3228

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

3229

'uploader': 'Interstellar Movie',

3230

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

3231

},

3232

'playlist_mincount': 21,

3233

}, {

3234

'note': 'Playlist with "show unavailable videos" button',

3235

'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',

3236

'info_dict': {

3237

'title': 'Uploads from Phim Siêu Nhân Nhật Bản',

3238

'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',

3239

'uploader': 'Phim Siêu Nhân Nhật Bản',

3240

'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',

3241

},

3242

'playlist_mincount': 200,

3243

}, {

3244

'note': 'Playlist with unavailable videos in page 7',

3245

'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',

3246

'info_dict': {

3247

'title': 'Uploads from BlankTV',

3248

'id': 'UU8l9frL61Yl5KFOl87nIm2w',

3249

'uploader': 'BlankTV',

3250

'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',

3251

},

3252

'playlist_mincount': 1000,

3253

}, {

3254

'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',

3255

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

3256

'info_dict': {

3257

'title': 'Data Analysis with Dr Mike Pound',

3258

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

3259

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

3260

'uploader': 'Computerphile',

3261

'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',

3262

},

3263

'playlist_mincount': 11,

3264

}, {

3265

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

3266

'only_matching': True,

3267

}, {

3268

'note': 'Playlist URL that does not actually serve a playlist',

3269

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

3274

'uploader': 'STREEM',

3275

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

3276

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

3277

'upload_date': '20150526',

3278

'license': 'Standard YouTube License',

3279

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

3280

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

3285

},

3286

'params': {

3287

'skip_download': True,

3288

},

3289

'skip': 'This video is not available.',

3290

'add_ie': [YoutubeIE.ie_key()],

3291

}, {

3292

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

3293

'only_matching': True,

3294

}, {

3295

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

3296

'only_matching': True,

3297

}, {

3298

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

3299

'info_dict': {

3300

'id': 'X1whbWASnNQ', # This will keep changing

3301

'ext': 'mp4',

3302

'title': compat_str,

3303

'uploader': 'Sky News',

3304

'uploader_id': 'skynews',

3305

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

3306

'upload_date': r're:\d{8}',

3307

'description': compat_str,

3308

'categories': ['News & Politics'],

3309

'tags': list,

3310

'like_count': int,

3311

'dislike_count': int,

3312

},

3313

'params': {

3314

'skip_download': True,

3315

},

3316

'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],

3317

}, {

3318

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

3323

'uploader': 'The Young Turks',

3324

'uploader_id': 'TheYoungTurks',

3325

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

3326

'upload_date': '20150715',

3327

'license': 'Standard YouTube License',

3328

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

3329

'categories': ['News & Politics'],

3330

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

3331

'like_count': int,

3332

'dislike_count': int,

3333

},

3334

'params': {

3335

'skip_download': True,

3336

},

3337

'only_matching': True,

3338

}, {

3339

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

3340

'only_matching': True,

3341

}, {

3342

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

3343

'only_matching': True,

3344

}, {

3345

'note': 'A channel that is not live. Should raise error',

3346

'url': 'https://www.youtube.com/user/numberphile/live',

3347

'only_matching': True,

3348

}, {

3349

'url': 'https://www.youtube.com/feed/trending',

3350

'only_matching': True,

3351

}, {

3352

'url': 'https://www.youtube.com/feed/library',

3353

'only_matching': True,

3354

}, {

3355

'url': 'https://www.youtube.com/feed/history',

3356

'only_matching': True,

3357

}, {

3358

'url': 'https://www.youtube.com/feed/subscriptions',

3359

'only_matching': True,

3360

}, {

3361

'url': 'https://www.youtube.com/feed/watch_later',

3362

'only_matching': True,

3363

}, {

3364

'note': 'Recommended - redirects to home page',

3365

'url': 'https://www.youtube.com/feed/recommended',

3366

'only_matching': True,

3367

}, {

3368

'note': 'inline playlist with not always working continuations',

3369

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

3370

'only_matching': True,

3371

}, {

3372

'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',

3373

'only_matching': True,

3374

}, {

3375

'url': 'https://www.youtube.com/course',

3376

'only_matching': True,

3377

}, {

3378

'url': 'https://www.youtube.com/zsecurity',

3379

'only_matching': True,

3380

}, {

3381

'url': 'http://www.youtube.com/NASAgovVideo/videos',

3382

'only_matching': True,

3383

}, {

3384

'url': 'https://www.youtube.com/TheYoungTurks/live',

3385

'only_matching': True,

3386

}, {

3387

'url': 'https://www.youtube.com/hashtag/cctv9',

'info_dict': {

'id': 'cctv9',

'title': '#cctv9',

},

'playlist_mincount': 350,

3393

}, {

3394

'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',

3395

'only_matching': True,

3396

}, {

3397

'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',

3398

'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

3399

'only_matching': True

3400

}, {

3401

'note': '/browse/ should redirect to /channel/',

3402

'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',

3403

'only_matching': True

3404

}, {

3405

'note': 'VLPL, should redirect to playlist?list=PL...',

3406

'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

3407

'info_dict': {

3408

'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',

3409

'uploader': 'NoCopyrightSounds',

3410

'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',

3411

'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',

3412

'title': 'NCS Releases',

3413

},

3414

'playlist_mincount': 166,

3415

}, {

3416

'note': 'Topic, should redirect to playlist?list=UU...',

3417

'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',

3418

'info_dict': {

3419

'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',

3420

'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',

3421

'title': 'Uploads from Royalty Free Music - Topic',

3422

'uploader': 'Royalty Free Music - Topic',

3423

},

3424

'expected_warnings': [

3425

'A channel/user page was given',

3426

'The URL does not have a videos tab',

3427

],

3428

'playlist_mincount': 101,

3429

}, {

3430

'note': 'Topic without a UU playlist',

3431

'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',

3432

'info_dict': {

3433

'id': 'UCtFRv9O2AHqOZjjynzrv-xg',

3434

'title': 'UCtFRv9O2AHqOZjjynzrv-xg',

3435

},

3436

'expected_warnings': [

3437

'A channel/user page was given',

3438

'The URL does not have a videos tab',

3439

'Falling back to channel URL',

3440

],

3441

'playlist_mincount': 9,

3442

}, {

3443

'note': 'Youtube music Album',

3444

'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',

3445

'info_dict': {

3446

'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',

3447

'title': 'Album - Royalty Free Music Library V2 (50 Songs)',

3448

},

3449

'playlist_count': 50,

}]

@classmethod

def suitable(cls, url):

3454

return False if YoutubeIE.suitable(url) else super(

3455

YoutubeTabIE, cls).suitable(url)

3456

3457

def _extract_channel_id(self, webpage):

3458

channel_id = self._html_search_meta(

3459

'channelId', webpage, 'channel id', default=None)

3460

if channel_id:

3461

return channel_id

3462

channel_url = self._html_search_meta(

3463

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3464

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3465

'twitter:app:url:googleplay'), webpage, 'channel url')

3466

return self._search_regex(

3467

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3468

channel_url, 'channel id')

3469

3470

@staticmethod

3471

def _extract_basic_item_renderer(item):

3472

# Modified from _extract_grid_item_renderer

3473

known_basic_renderers = (

3474

'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'

3475

)

3476

for key, renderer in item.items():

3477

if not isinstance(renderer, dict):

3478

continue

3479

elif key in known_basic_renderers:

3480

return renderer

3481

elif key.startswith('grid') and key.endswith('Renderer'):

3482

return renderer

3483

3484

def _grid_entries(self, grid_renderer):

3485

for item in grid_renderer['items']:

3486

if not isinstance(item, dict):

3487

continue

3488

renderer = self._extract_basic_item_renderer(item)

3489

if not isinstance(renderer, dict):

3490

continue

3491

title = try_get(

3492

renderer, (lambda x: x['title']['runs'][0]['text'],

3493

lambda x: x['title']['simpleText']), compat_str)

3494

# playlist

3495

playlist_id = renderer.get('playlistId')

3496

if playlist_id:

3497

yield self.url_result(

3498

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3499

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

video_title=title)

continue

# video

video_id = renderer.get('videoId')

3504

if video_id:

3505

yield self._extract_video(renderer)

3506

continue

3507

# channel

3508

channel_id = renderer.get('channelId')

3509

if channel_id:

3510

title = try_get(

3511

renderer, lambda x: x['title']['simpleText'], compat_str)

3512

yield self.url_result(

3513

'https://www.youtube.com/channel/%s' % channel_id,

3514

ie=YoutubeTabIE.ie_key(), video_title=title)

3515

continue

3516

# generic endpoint URL support

3517

ep_url = urljoin('https://www.youtube.com/', try_get(

3518

renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],

3519

compat_str))

3520

if ep_url:

3521

for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):

3522

if ie.suitable(ep_url):

3523

yield self.url_result(

3524

ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)

3525

break

3526

3527

def _shelf_entries_from_content(self, shelf_renderer):

3528

content = shelf_renderer.get('content')

3529

if not isinstance(content, dict):

3530

return

3531

renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')

3532

if renderer:

3533

# TODO: add support for nested playlists so each shelf is processed

3534

# as separate playlist

3535

# TODO: this includes only first N items

3536

for entry in self._grid_entries(renderer):

3537

yield entry

3538

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3544

ep = try_get(

3545

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3546

compat_str)

3547

shelf_url = urljoin('https://www.youtube.com', ep)

3548

if shelf_url:

3549

# Skipping links to another channels, note that checking for

3550

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3551

# will not work

3552

if skip_channels and '/channels?' in shelf_url:

3553

return

3554

title = try_get(

3555

shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

3556

yield self.url_result(shelf_url, video_title=title)

3557

# Shelf may not contain shelf URL, fallback to extraction from content

3558

for entry in self._shelf_entries_from_content(shelf_renderer):

3559

yield entry

3560

3561

def _playlist_entries(self, video_list_renderer):

3562

for content in video_list_renderer['contents']:

3563

if not isinstance(content, dict):

3564

continue

3565

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3566

if not isinstance(renderer, dict):

3567

continue

3568

video_id = renderer.get('videoId')

3569

if not video_id:

3570

continue

3571

yield self._extract_video(renderer)

3572

3573

def _rich_entries(self, rich_grid_renderer):

3574

renderer = try_get(

3575

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3576

video_id = renderer.get('videoId')

3577

if not video_id:

3578

return

3579

yield self._extract_video(renderer)

3580

3581

def _video_entry(self, video_renderer):

3582

video_id = video_renderer.get('videoId')

3583

if video_id:

3584

return self._extract_video(video_renderer)

3585

3586

def _post_thread_entries(self, post_thread_renderer):

3587

post_renderer = try_get(

3588

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3589

if not post_renderer:

3590

return

3591

# video attachment

3592

video_renderer = try_get(

3593

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}

3594

video_id = video_renderer.get('videoId')

3595

if video_id:

3596

entry = self._extract_video(video_renderer)

3597

if entry:

3598

yield entry

3599

# playlist attachment

3600

playlist_id = try_get(

3601

post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)

3602

if playlist_id:

3603

yield self.url_result(

3604

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3605

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3606

# inline video links

3607

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3608

for run in runs:

3609

if not isinstance(run, dict):

3610

continue

3611

ep_url = try_get(

3612

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3613

if not ep_url:

3614

continue

3615

if not YoutubeIE.suitable(ep_url):

3616

continue

3617

ep_video_id = YoutubeIE._match_id(ep_url)

3618

if video_id == ep_video_id:

3619

continue

3620

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)

3621

3622

def _post_thread_continuation_entries(self, post_thread_continuation):

3623

contents = post_thread_continuation.get('contents')

3624

if not isinstance(contents, list):

3625

return

3626

for content in contents:

3627

renderer = content.get('backstagePostThreadRenderer')

3628

if not isinstance(renderer, dict):

3629

continue

3630

for entry in self._post_thread_entries(renderer):

yield entry

r''' # unused

def _rich_grid_entries(self, contents):

3635

for content in contents:

3636

video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)

3637

if video_renderer:

3638

entry = self._video_entry(video_renderer)

if entry:

yield entry

'''

def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):

3643

3644

def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds

3645

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3646

for content in contents:

3647

if not isinstance(content, dict):

3648

continue

3649

is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)

3650

if not is_renderer:

3651

renderer = content.get('richItemRenderer')

3652

if renderer:

3653

for entry in self._rich_entries(renderer):

3654

yield entry

3655

continuation_list[0] = self._extract_continuation(parent_renderer)

3656

continue

3657

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3658

for isr_content in isr_contents:

3659

if not isinstance(isr_content, dict):

continue

known_renderers = {

'playlistVideoListRenderer': self._playlist_entries,

3664

'gridRenderer': self._grid_entries,

3665

'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),

3666

'backstagePostThreadRenderer': self._post_thread_entries,

3667

'videoRenderer': lambda x: [self._video_entry(x)],

3668

}

3669

for key, renderer in isr_content.items():

3670

if key not in known_renderers:

3671

continue

3672

for entry in known_renderers[key](renderer):

3673

if entry:

3674

yield entry

3675

continuation_list[0] = self._extract_continuation(renderer)

3676

break

3677

3678

if not continuation_list[0]:

3679

continuation_list[0] = self._extract_continuation(is_renderer)

3680

3681

if not continuation_list[0]:

3682

continuation_list[0] = self._extract_continuation(parent_renderer)

3683

3684

continuation_list = [None] # Python 2 doesnot support nonlocal

3685

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3690

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3691

for entry in extract_entries(parent_renderer):

3692

yield entry

3693

continuation = continuation_list[0]

3694

context = self._extract_context(ytcfg)

3695

visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)

3696

3697

for page_num in itertools.count(1):

if not continuation:

break

query = {

'continuation': continuation['continuation'],

3702

'clickTracking': {'clickTrackingParams': continuation['itct']}

3703

}

3704

headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)

3705

response = self._extract_response(

3706

item_id='%s page %s' % (item_id, page_num),

3707

query=query, headers=headers, ytcfg=ytcfg,

3708

check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))

if not response:

break

visitor_data = try_get(

3713

response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data

3714

3715

known_continuation_renderers = {

3716

'playlistVideoListContinuation': self._playlist_entries,

3717

'gridContinuation': self._grid_entries,

3718

'itemSectionContinuation': self._post_thread_continuation_entries,

3719

'sectionListContinuation': extract_entries, # for feeds

3720

}

3721

continuation_contents = try_get(

3722

response, lambda x: x['continuationContents'], dict) or {}

3723

continuation_renderer = None

3724

for key, value in continuation_contents.items():

3725

if key not in known_continuation_renderers:

3726

continue

3727

continuation_renderer = value

3728

continuation_list = [None]

3729

for entry in known_continuation_renderers[key](continuation_renderer):

3730

yield entry

3731

continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)

3732

break

3733

if continuation_renderer:

continue

known_renderers = {

'gridPlaylistRenderer': (self._grid_entries, 'items'),

3738

'gridVideoRenderer': (self._grid_entries, 'items'),

3739

'playlistVideoRenderer': (self._playlist_entries, 'contents'),

3740

'itemSectionRenderer': (extract_entries, 'contents'), # for feeds

3741

'richItemRenderer': (extract_entries, 'contents'), # for hashtag

3742

'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')

3743

}

3744

on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))

3745

continuation_items = try_get(

3746

on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)

3747

continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}

3748

video_items_renderer = None

3749

for key, value in continuation_item.items():

3750

if key not in known_renderers:

3751

continue

3752

video_items_renderer = {known_renderers[key][1]: continuation_items}

3753

continuation_list = [None]

3754

for entry in known_renderers[key][0](video_items_renderer):

3755

yield entry

3756

continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)

3757

break

3758

if video_items_renderer:

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3764

for tab in tabs:

3765

renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}

3766

if renderer.get('selected') is True:

3767

return renderer

3768

else:

3769

raise ExtractorError('Unable to find selected tab')

3770

3771

@staticmethod

3772

def _extract_uploader(data):

3773

uploader = {}

3774

sidebar_renderer = try_get(

3775

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)

3776

if sidebar_renderer:

3777

for item in sidebar_renderer:

3778

if not isinstance(item, dict):

3779

continue

3780

renderer = item.get('playlistSidebarSecondaryInfoRenderer')

3781

if not isinstance(renderer, dict):

3782

continue

3783

owner = try_get(

3784

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3785

if owner:

3786

uploader['uploader'] = owner.get('text')

3787

uploader['uploader_id'] = try_get(

3788

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3789

uploader['uploader_url'] = urljoin(

3790

'https://www.youtube.com/',

3791

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3792

return {k: v for k, v in uploader.items() if v is not None}

3793

3794

def _extract_from_tabs(self, item_id, webpage, data, tabs):

3795

playlist_id = title = description = channel_url = channel_name = channel_id = None

3796

thumbnails_list = tags = []

3797

3798

selected_tab = self._extract_selected_tab(tabs)

3799

renderer = try_get(

3800

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3801

if renderer:

3802

channel_name = renderer.get('title')

3803

channel_url = renderer.get('channelUrl')

3804

channel_id = renderer.get('externalId')

3805

else:

3806

renderer = try_get(

3807

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3808

3809

if renderer:

3810

title = renderer.get('title')

3811

description = renderer.get('description', '')

3812

playlist_id = channel_id

3813

tags = renderer.get('keywords', '').split()

3814

thumbnails_list = (

3815

try_get(renderer, lambda x: x['avatar']['thumbnails'], list)

3816

or try_get(

3817

data,

3818

lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],

list)

or [])

thumbnails = []

for t in thumbnails_list:

3824

if not isinstance(t, dict):

3825

continue

3826

thumbnail_url = url_or_none(t.get('url'))

3827

if not thumbnail_url:

3828

continue

3829

thumbnails.append({

3830

'url': thumbnail_url,

3831

'width': int_or_none(t.get('width')),

3832

'height': int_or_none(t.get('height')),

3833

})

3834

if playlist_id is None:

3835

playlist_id = item_id

3836

if title is None:

3837

title = (

3838

try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])

3839

or playlist_id)

3840

title += format_field(selected_tab, 'title', ' - %s')

3841

title += format_field(selected_tab, 'expandedText', ' - %s')

3842

3843

metadata = {

3844

'playlist_id': playlist_id,

3845

'playlist_title': title,

3846

'playlist_description': description,

3847

'uploader': channel_name,

3848

'uploader_id': channel_id,

3849

'uploader_url': channel_url,

3850

'thumbnails': thumbnails,

'tags': tags,

}

if not channel_id:

metadata.update(self._extract_uploader(data))

3855

metadata.update({

3856

'channel': metadata['uploader'],

3857

'channel_id': metadata['uploader_id'],

3858

'channel_url': metadata['uploader_url']})

3859

return self.playlist_result(

3860

self._entries(

3861

selected_tab, playlist_id,

3862

self._extract_identity_token(webpage, item_id),

3863

self._extract_account_syncid(data),

3864

self._extract_ytcfg(item_id, webpage)),

3865

**metadata)

3866

3867

def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):

3868

first_id = last_id = None

3869

ytcfg = self._extract_ytcfg(playlist_id, webpage)

3870

headers = self._generate_api_headers(

3871

ytcfg, account_syncid=self._extract_account_syncid(data),

3872

identity_token=self._extract_identity_token(webpage, item_id=playlist_id),

3873

visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))

3874

for page_num in itertools.count(1):

3875

videos = list(self._playlist_entries(playlist))

3876

if not videos:

3877

return

3878

start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1

3879

if start >= len(videos):

3880

return

3881

for video in videos[start:]:

3882

if video['id'] == first_id:

3883

self.to_screen('First video %s found again; Assuming end of Mix' % first_id)

3884

return

3885

yield video

3886

first_id = first_id or videos[0]['id']

3887

last_id = videos[-1]['id']

3888

watch_endpoint = try_get(

3889

playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])

3890

query = {

3891

'playlistId': playlist_id,

3892

'videoId': watch_endpoint.get('videoId') or last_id,

3893

'index': watch_endpoint.get('index') or len(videos),

3894

'params': watch_endpoint.get('params') or 'OAE%3D'

3895

}

3896

response = self._extract_response(

3897

item_id='%s page %d' % (playlist_id, page_num),

query=query,

ep='next',

headers=headers,

check_get_keys='contents'

3902

)

3903

playlist = try_get(

3904

response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

3905

3906

def _extract_from_playlist(self, item_id, url, data, playlist, webpage):

3907

title = playlist.get('title') or try_get(

3908

data, lambda x: x['titleText']['simpleText'], compat_str)

3909

playlist_id = playlist.get('playlistId') or item_id

3910

3911

# Delegating everything except mix playlists to regular tab-based playlist URL

3912

playlist_url = urljoin(url, try_get(

3913

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3914

compat_str))

3915

if playlist_url and playlist_url != url:

3916

return self.url_result(

3917

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

3918

video_title=title)

3919

3920

return self.playlist_result(

3921

self._extract_mix_playlist(playlist, playlist_id, data, webpage),

3922

playlist_id=playlist_id, playlist_title=title)

3923

3924

def _reload_with_unavailable_videos(self, item_id, data, webpage):

3925

"""

3926

Get playlist with unavailable videos if the 'show unavailable videos' button exists.

3927

"""

3928

sidebar_renderer = try_get(

3929

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)

3930

if not sidebar_renderer:

3931

return

3932

browse_id = params = None

3933

for item in sidebar_renderer:

3934

if not isinstance(item, dict):

3935

continue

3936

renderer = item.get('playlistSidebarPrimaryInfoRenderer')

3937

menu_renderer = try_get(

3938

renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []

3939

for menu_item in menu_renderer:

3940

if not isinstance(menu_item, dict):

3941

continue

3942

nav_item_renderer = menu_item.get('menuNavigationItemRenderer')

3943

text = try_get(

3944

nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)

3945

if not text or text.lower() != 'show unavailable videos':

3946

continue

3947

browse_endpoint = try_get(

3948

nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}

3949

browse_id = browse_endpoint.get('browseId')

3950

params = browse_endpoint.get('params')

3951

break

3952

3953

ytcfg = self._extract_ytcfg(item_id, webpage)

3954

headers = self._generate_api_headers(

3955

ytcfg, account_syncid=self._extract_account_syncid(ytcfg),

3956

identity_token=self._extract_identity_token(webpage, item_id=item_id),

3957

visitor_data=try_get(

3958

self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))

3959

query = {

3960

'params': params or 'wgYCCAA=',

3961

'browseId': browse_id or 'VL%s' % item_id

3962

}

3963

return self._extract_response(

3964

item_id=item_id, headers=headers, query=query,

3965

check_get_keys='contents', fatal=False,

3966

note='Downloading API JSON with unavailable videos')

3967

3968

def _extract_webpage(self, url, item_id):

3969

retries = self.get_param('extractor_retries', 3)

3970

count = -1

3971

last_error = 'Incomplete yt initial data recieved'

3972

while count < retries:

3973

count += 1

3974

# Sometimes youtube returns a webpage with incomplete ytInitialData

3975

# See: https://github.com/yt-dlp/yt-dlp/issues/116

3976

if count:

3977

self.report_warning('%s. Retrying ...' % last_error)

3978

webpage = self._download_webpage(

3979

url, item_id,

3980

'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))

3981

data = self._extract_yt_initial_data(item_id, webpage)

3982

if data.get('contents') or data.get('currentVideoEndpoint'):

3983

break

3984

# Extract alerts here only when there is error

3985

self._extract_and_report_alerts(data)

3986

if count >= retries:

3987

raise ExtractorError(last_error)

return webpage, data

@staticmethod

def _smuggle_data(entries, data):

3992

for entry in entries:

3993

if data:

3994

entry['url'] = smuggle_url(entry['url'], data)

3995

yield entry

3996

3997

def _real_extract(self, url):

3998

url, smuggled_data = unsmuggle_url(url, {})

3999

if self.is_music_url(url):

4000

smuggled_data['is_music_url'] = True

4001

info_dict = self.__real_extract(url, smuggled_data)

4002

if info_dict.get('entries'):

4003

info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)

4004

return info_dict

4005

4006

_url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)

4007

4008

def __real_extract(self, url, smuggled_data):

4009

item_id = self._match_id(url)

4010

url = compat_urlparse.urlunparse(

4011

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

4012

compat_opts = self.get_param('compat_opts', [])

4013

4014

def get_mobj(url):

4015

mobj = self._url_re.match(url).groupdict()

4016

mobj.update((k, '') for k, v in mobj.items() if v is None)

return mobj

mobj = get_mobj(url)

# Youtube returns incomplete data if tabname is not lower case

4021

pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']

4022

4023

if is_channel:

4024

if smuggled_data.get('is_music_url'):

4025

if item_id[:2] == 'VL':

4026

# Youtube music VL channels have an equivalent playlist

4027

item_id = item_id[2:]

4028

pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False

4029

elif item_id[:2] == 'MP':

4030

# Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage

4031

item_id = self._search_regex(

4032

r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',

4033

self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),

4034

'playlist id')

4035

pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False

4036

elif mobj['channel_type'] == 'browse':

4037

# Youtube music /browse/ should be changed to /channel/

4038

pre = 'https://www.youtube.com/channel/%s' % item_id

4039

if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:

4040

# Home URLs should redirect to /videos/

4041

self.report_warning(

4042

'A channel/user page was given. All the channel\'s videos will be downloaded. '

4043

'To download only the videos in the home page, add a "/featured" to the URL')

4044

tab = '/videos'

4045

4046

url = ''.join((pre, tab, post))

4047

mobj = get_mobj(url)

4048

4049

# Handle both video/playlist URLs

4050

qs = parse_qs(url)

4051

video_id = qs.get('v', [None])[0]

4052

playlist_id = qs.get('list', [None])[0]

4053

4054

if not video_id and mobj['not_channel'].startswith('watch'):

4055

if not playlist_id:

4056

# If there is neither video or playlist ids, youtube redirects to home page, which is undesirable

4057

raise ExtractorError('Unable to recognize tab page')

4058

# Common mistake: https://www.youtube.com/watch?list=playlist_id

4059

self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)

4060

url = 'https://www.youtube.com/playlist?list=%s' % playlist_id

4061

mobj = get_mobj(url)

4062

4063

if video_id and playlist_id:

4064

if self.get_param('noplaylist'):

4065

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

4066

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

4067

self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))

4068

4069

webpage, data = self._extract_webpage(url, item_id)

4070

4071

tabs = try_get(

4072

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

4073

if tabs:

4074

selected_tab = self._extract_selected_tab(tabs)

4075

tab_name = selected_tab.get('title', '')

4076

if 'no-youtube-channel-redirect' not in compat_opts:

4077

if mobj['tab'] == '/live':

4078

# Live tab should have redirected to the video

4079

raise ExtractorError('The channel is not currently live', expected=True)

4080

if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:

4081

if not mobj['not_channel'] and item_id[:2] == 'UC':

4082

# Topic channels don't have /videos. Use the equivalent playlist instead

4083

self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))

4084

pl_id = 'UU%s' % item_id[2:]

4085

pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])

4086

try:

4087

pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)

4088

for alert_type, alert_message in self._extract_alerts(pl_data):

4089

if alert_type == 'error':

4090

raise ExtractorError('Youtube said: %s' % alert_message)

4091

item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data

4092

except ExtractorError:

4093

self.report_warning('The playlist gave error. Falling back to channel URL')

4094

else:

4095

self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))

4096

4097

self.write_debug('Final URL: %s' % url)

4098

4099

# YouTube sometimes provides a button to reload playlist with unavailable videos.

4100

if 'no-youtube-unavailable-videos' not in compat_opts:

4101

data = self._reload_with_unavailable_videos(item_id, data, webpage) or data

4102

self._extract_and_report_alerts(data)

4103

4104

tabs = try_get(

4105

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

4106

if tabs:

4107

return self._extract_from_tabs(item_id, webpage, data, tabs)

4108

4109

playlist = try_get(

4110

data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

4111

if playlist:

4112

return self._extract_from_playlist(item_id, url, data, playlist, webpage)

4113

4114

video_id = try_get(

4115

data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],

4116

compat_str) or video_id

4117

if video_id:

4118

if mobj['tab'] != '/live': # live tab is expected to redirect to video

4119

self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)

4120

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

4121

4122

raise ExtractorError('Unable to recognize tab page')

4123

4124

4125

class YoutubePlaylistIE(InfoExtractor):

4126

IE_DESC = 'YouTube.com playlists'

4127

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

4138

)''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

4139

IE_NAME = 'youtube:playlist'

4140

_TESTS = [{

4141

'note': 'issue #673',

4142

'url': 'PLBB231211A4F62143',

4143

'info_dict': {

4144

'title': '[OLD]Team Fortress 2 (Class-based LP)',

4145

'id': 'PLBB231211A4F62143',

4146

'uploader': 'Wickydoo',

4147

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

4148

},

4149

'playlist_mincount': 29,

4150

}, {

4151

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

4152

'info_dict': {

4153

'title': 'YDL_safe_search',

4154

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

4155

},

4156

'playlist_count': 2,

4157

'skip': 'This playlist is private',

4158

}, {

4159

'note': 'embedded',

4160

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

4165

'uploader': 'milan',

4166

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

4167

}

4168

}, {

4169

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

4170

'playlist_mincount': 982,

4171

'info_dict': {

4172

'title': '2018 Chinese New Singles (11/6 updated)',

4173

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

4174

'uploader': 'LBK',

4175

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

4176

}

4177

}, {

4178

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

4179

'only_matching': True,

4180

}, {

4181

# music album playlist

4182

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

4183

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

4188

if YoutubeTabIE.suitable(url):

4189

return False

4190

# Hack for lazy extractors until more generic solution is implemented

4191

# (see #28780)

4192

from .youtube import parse_qs

4193

qs = parse_qs(url)

4194

if qs.get('v', [None])[0]:

4195

return False

4196

return super(YoutubePlaylistIE, cls).suitable(url)

4197

4198

def _real_extract(self, url):

4199

playlist_id = self._match_id(url)

4200

is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)

4201

url = update_url_query(

4202

'https://www.youtube.com/playlist',

4203

parse_qs(url) or {'list': playlist_id})

4204

if is_music_url:

4205

url = smuggle_url(url, {'is_music_url': True})

4206

return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4207

4208

4209

class YoutubeYtBeIE(InfoExtractor):

4210

IE_DESC = 'youtu.be'

4211

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

4212

_TESTS = [{

4213

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

4218

'uploader': 'Backus-Page House Museum',

4219

'uploader_id': 'backuspagemuseum',

4220

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

4221

'upload_date': '20161008',

4222

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

4223

'categories': ['Nonprofits & Activism'],

4224

'tags': list,

4225

'like_count': int,

4226

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

4231

},

4232

}, {

4233

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

4234

'only_matching': True,

4235

}]

4236

4237

def _real_extract(self, url):

4238

mobj = re.match(self._VALID_URL, url)

4239

video_id = mobj.group('id')

4240

playlist_id = mobj.group('playlist_id')

4241

return self.url_result(

4242

update_url_query('https://www.youtube.com/watch', {

4243

'v': video_id,

4244

'list': playlist_id,

4245

'feature': 'youtu.be',

4246

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

4247

4248

4249

class YoutubeYtUserIE(InfoExtractor):

4250

IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'

4251

_VALID_URL = r'ytuser:(?P<id>.+)'

4252

_TESTS = [{

4253

'url': 'ytuser:phihag',

4254

'only_matching': True,

4255

}]

4256

4257

def _real_extract(self, url):

4258

user_id = self._match_id(url)

4259

return self.url_result(

4260

'https://www.youtube.com/user/%s' % user_id,

4261

ie=YoutubeTabIE.ie_key(), video_id=user_id)

4262

4263

4264

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

4265

IE_NAME = 'youtube:favorites'

4266

IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'

4267

_VALID_URL = r':ytfav(?:ou?rite)?s?'

4268

_LOGIN_REQUIRED = True

4269

_TESTS = [{

4270

'url': ':ytfav',

4271

'only_matching': True,

4272

}, {

4273

'url': ':ytfavorites',

4274

'only_matching': True,

4275

}]

4276

4277

def _real_extract(self, url):

4278

return self.url_result(

4279

'https://www.youtube.com/playlist?list=LL',

4280

ie=YoutubeTabIE.ie_key())

4281

4282

4283

class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):

4284

IE_DESC = 'YouTube.com searches, "ytsearch" keyword'

4285

# there doesn't appear to be a real limit, for example if you search for

4286

# 'python' you get more than 8.000.000 results

4287

_MAX_RESULTS = float('inf')

4288

IE_NAME = 'youtube:search'

4289

_SEARCH_KEY = 'ytsearch'

4290

_SEARCH_PARAMS = None

4291

_TESTS = []

4292

4293

def _entries(self, query, n):

4294

data = {'query': query}

4295

if self._SEARCH_PARAMS:

4296

data['params'] = self._SEARCH_PARAMS

4297

total = 0

4298

for page_num in itertools.count(1):

4299

search = self._extract_response(

4300

item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,

4301

check_get_keys=('contents', 'onResponseReceivedCommands')

)

if not search:

break

slr_contents = try_get(

4306

search,

4307

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

4308

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

list)

if not slr_contents:

break

# Youtube sometimes adds promoted content to searches,

4314

# changing the index location of videos and token.

4315

# So we search through all entries till we find them.

4316

continuation_token = None

4317

for slr_content in slr_contents:

4318

if continuation_token is None:

4319

continuation_token = try_get(

4320

slr_content,

4321

lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],

4322

compat_str)

4323

4324

isr_contents = try_get(

4325

slr_content,

4326

lambda x: x['itemSectionRenderer']['contents'],

list)

if not isr_contents:

continue

for content in isr_contents:

4331

if not isinstance(content, dict):

4332

continue

4333

video = content.get('videoRenderer')

4334

if not isinstance(video, dict):

4335

continue

4336

video_id = video.get('videoId')

if not video_id:

continue

yield self._extract_video(video)

total += 1

if total == n:

return

if not continuation_token:

4346

break

4347

data['continuation'] = continuation_token

4348

4349

def _get_n_results(self, query, n):

4350

"""Get a specified number of results for a query"""

4351

return self.playlist_result(self._entries(query, n), query)

4352

4353

4354

class YoutubeSearchDateIE(YoutubeSearchIE):

4355

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

4356

_SEARCH_KEY = 'ytsearchdate'

4357

IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'

4358

_SEARCH_PARAMS = 'CAI%3D'

4359

4360

4361

class YoutubeSearchURLIE(YoutubeSearchIE):

4362

IE_DESC = 'YouTube.com search URLs'

4363

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

4364

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'

4365

# _MAX_RESULTS = 100

4366

_TESTS = [{

4367

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

4368

'playlist_mincount': 5,

4369

'info_dict': {

4370

'title': 'youtube-dl test video',

4371

}

4372

}, {

4373

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

4374

'only_matching': True,

}]

@classmethod

def _make_valid_url(cls):

4379

return cls._VALID_URL

4380

4381

def _real_extract(self, url):

4382

qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

4383

query = (qs.get('search_query') or qs.get('q'))[0]

4384

self._SEARCH_PARAMS = qs.get('sp', ('',))[0]

4385

return self._get_n_results(query, self._MAX_RESULTS)

4386

4387

4388

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

4389

"""

4390

Base class for feed extractors

4391

Subclasses must define the _FEED_NAME property.

4392

"""

4393

_LOGIN_REQUIRED = True

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

4399

4400

def _real_extract(self, url):

4401

return self.url_result(

4402

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

4403

ie=YoutubeTabIE.ie_key())

4404

4405

4406

class YoutubeWatchLaterIE(InfoExtractor):

4407

IE_NAME = 'youtube:watchlater'

4408

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

4409

_VALID_URL = r':ytwatchlater'

4410

_TESTS = [{

4411

'url': ':ytwatchlater',

4412

'only_matching': True,

4413

}]

4414

4415

def _real_extract(self, url):

4416

return self.url_result(

4417

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

4418

4419

4420

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

4421

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

4422

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

4423

_FEED_NAME = 'recommended'

4424

_LOGIN_REQUIRED = False

4425

_TESTS = [{

4426

'url': ':ytrec',

4427

'only_matching': True,

4428

}, {

4429

'url': ':ytrecommended',

4430

'only_matching': True,

4431

}, {

4432

'url': 'https://youtube.com',

4433

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

4438

IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'

4439

_VALID_URL = r':ytsub(?:scription)?s?'

4440

_FEED_NAME = 'subscriptions'

4441

_TESTS = [{

4442

'url': ':ytsubs',

4443

'only_matching': True,

4444

}, {

4445

'url': ':ytsubscriptions',

4446

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

4451

IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'

4452

_VALID_URL = r':ythis(?:tory)?'

4453

_FEED_NAME = 'history'

4454

_TESTS = [{

4455

'url': ':ythistory',

4456

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

4461

IE_NAME = 'youtube:truncated_url'

4462

IE_DESC = False # Do not list

4463

_VALID_URL = r'''(?x)

4464

(?:https?://)?

4465

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

4466

(?:watch\?(?:

4467

feature=[a-z_]+|

4468

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

4481

'only_matching': True,

4482

}, {

4483

'url': 'https://www.youtube.com/watch?',

4484

'only_matching': True,

4485

}, {

4486

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

4487

'only_matching': True,

4488

}, {

4489

'url': 'https://www.youtube.com/watch?feature=foo',

4490

'only_matching': True,

4491

}, {

4492

'url': 'https://www.youtube.com/watch?hl=en-GB',

4493

'only_matching': True,

4494

}, {

4495

'url': 'https://www.youtube.com/watch?t=2372',

4496

'only_matching': True,

4497

}]

4498

4499

def _real_extract(self, url):

4500

raise ExtractorError(

4501

'Did you forget to quote the URL? Remember that & is a meta '

4502

'character in most shells, so you want to put the URL in quotes, '

4503

'like youtube-dl '

4504

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

4505

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

4510

IE_NAME = 'youtube:truncated_id'

4511

IE_DESC = False # Do not list

4512

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

4513

4514

_TESTS = [{

4515

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

4516

'only_matching': True,

4517

}]

4518

4519

def _real_extract(self, url):

4520

video_id = self._match_id(url)

4521

raise ExtractorError(

4522

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

4523

expected=True)