jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_HTTPError,
	20	compat_kwargs,
	21	compat_parse_qs,
	22	compat_urllib_parse_unquote,
	23	compat_urllib_parse_unquote_plus,
	24	compat_urllib_parse_urlencode,
	25	compat_urllib_parse_urlparse,
	26	compat_urlparse,
	27	compat_str,
	28	)
	29	from ..utils import (
	30	bool_or_none,
	31	clean_html,
	32	error_to_compat_str,
	33	ExtractorError,
	34	float_or_none,
	35	get_element_by_id,
	36	int_or_none,
	37	mimetype2ext,
	38	parse_codecs,
	39	parse_count,
	40	parse_duration,
	41	remove_quotes,
	42	remove_start,
	43	smuggle_url,
	44	str_or_none,
	45	str_to_int,
	46	try_get,
	47	unescapeHTML,
	48	unified_strdate,
	49	unsmuggle_url,
	50	update_url_query,
	51	uppercase_escape,
	52	url_or_none,
	53	urlencode_postdata,
	54	urljoin,
	55	)
	56
	57
	58	class YoutubeBaseInfoExtractor(InfoExtractor):
	59	"""Provide base functions for Youtube extractors"""
	60	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	61	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	62
	63	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	64	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	65	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	66
	67	_RESERVED_NAMES = (
	68	r'embed\|e\|watch_popup\|channel\|c\|user\|playlist\|watch\|w\|v\|movies\|results\|shared\|'
	69	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout\|'
	70	r'feed/(?:watch_later\|history\|subscriptions\|library\|trending\|recommended)')
	71
	72	_NETRC_MACHINE = 'youtube'
	73	# If True it will raise an error if no login info is provided
	74	_LOGIN_REQUIRED = False
	75
	76	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	77
	78	def _set_language(self):
	79	self._set_cookie(
	80	'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
	81	# YouTube sets the expire time to about two months
	82	expire_time=time.time() + 2 * 30 * 24 * 3600)
	83
	84	def _ids_to_results(self, ids):
	85	return [
	86	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	87	for vid_id in ids]
	88
	89	def _login(self):
	90	"""
	91	Attempt to log in to YouTube.
	92	True is returned if successful or skipped.
	93	False is returned if login failed.
	94
	95	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	96	"""
	97	username, password = self._get_login_info()
	98	# No authentication to be performed
	99	if username is None:
	100	if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
	101	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	102	if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
	103	self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
	104	return True
	105
	106	login_page = self._download_webpage(
	107	self._LOGIN_URL, None,
	108	note='Downloading login page',
	109	errnote='unable to fetch login page', fatal=False)
	110	if login_page is False:
	111	return
	112
	113	login_form = self._hidden_inputs(login_page)
	114
	115	def req(url, f_req, note, errnote):
	116	data = login_form.copy()
	117	data.update({
	118	'pstMsg': 1,
	119	'checkConnection': 'youtube',
	120	'checkedDomains': 'youtube',
	121	'hl': 'en',
	122	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	123	'f.req': json.dumps(f_req),
	124	'flowName': 'GlifWebSignIn',
	125	'flowEntry': 'ServiceLogin',
	126	# TODO: reverse actual botguard identifier generation algo
	127	'bgRequest': '["identifier",""]',
	128	})
	129	return self._download_json(
	130	url, None, note=note, errnote=errnote,
	131	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	132	fatal=False,
	133	data=urlencode_postdata(data), headers={
	134	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	135	'Google-Accounts-XSRF': 1,
	136	})
	137
	138	def warn(message):
	139	self._downloader.report_warning(message)
	140
	141	lookup_req = [
	142	username,
	143	None, [], None, 'US', None, None, 2, False, True,
	144	[
	145	None, None,
	146	[2, 1, None, 1,
	147	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	148	None, [], 4],
	149	1, [None, None, []], None, None, None, True
	150	],
	151	username,
	152	]
	153
	154	lookup_results = req(
	155	self._LOOKUP_URL, lookup_req,
	156	'Looking up account info', 'Unable to look up account info')
	157
	158	if lookup_results is False:
	159	return False
	160
	161	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	162	if not user_hash:
	163	warn('Unable to extract user hash')
	164	return False
	165
	166	challenge_req = [
	167	user_hash,
	168	None, 1, None, [1, None, None, None, [password, None, True]],
	169	[
	170	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	171	1, [None, None, []], None, None, None, True
	172	]]
	173
	174	challenge_results = req(
	175	self._CHALLENGE_URL, challenge_req,
	176	'Logging in', 'Unable to log in')
	177
	178	if challenge_results is False:
	179	return
	180
	181	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	182	if login_res:
	183	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	184	warn(
	185	'Unable to login: %s' % 'Invalid password'
	186	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	187	return False
	188
	189	res = try_get(challenge_results, lambda x: x[0][-1], list)
	190	if not res:
	191	warn('Unable to extract result entry')
	192	return False
	193
	194	login_challenge = try_get(res, lambda x: x[0][0], list)
	195	if login_challenge:
	196	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	197	if challenge_str == 'TWO_STEP_VERIFICATION':
	198	# SEND_SUCCESS - TFA code has been successfully sent to phone
	199	# QUOTA_EXCEEDED - reached the limit of TFA codes
	200	status = try_get(login_challenge, lambda x: x[5], compat_str)
	201	if status == 'QUOTA_EXCEEDED':
	202	warn('Exceeded the limit of TFA codes, try later')
	203	return False
	204
	205	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	206	if not tl:
	207	warn('Unable to extract TL')
	208	return False
	209
	210	tfa_code = self._get_tfa_info('2-step verification code')
	211
	212	if not tfa_code:
	213	warn(
	214	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	215	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	216	return False
	217
	218	tfa_code = remove_start(tfa_code, 'G-')
	219
	220	tfa_req = [
	221	user_hash, None, 2, None,
	222	[
	223	9, None, None, None, None, None, None, None,
	224	[None, tfa_code, True, 2]
	225	]]
	226
	227	tfa_results = req(
	228	self._TFA_URL.format(tl), tfa_req,
	229	'Submitting TFA code', 'Unable to submit TFA code')
	230
	231	if tfa_results is False:
	232	return False
	233
	234	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	235	if tfa_res:
	236	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	237	warn(
	238	'Unable to finish TFA: %s' % 'Invalid TFA code'
	239	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	240	return False
	241
	242	check_cookie_url = try_get(
	243	tfa_results, lambda x: x[0][-1][2], compat_str)
	244	else:
	245	CHALLENGES = {
	246	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	247	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	248	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	249	}
	250	challenge = CHALLENGES.get(
	251	challenge_str,
	252	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	253	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	254	return False
	255	else:
	256	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	257
	258	if not check_cookie_url:
	259	warn('Unable to extract CheckCookie URL')
	260	return False
	261
	262	check_cookie_results = self._download_webpage(
	263	check_cookie_url, None, 'Checking cookie', fatal=False)
	264
	265	if check_cookie_results is False:
	266	return False
	267
	268	if 'https://myaccount.google.com/' not in check_cookie_results:
	269	warn('Unable to log in')
	270	return False
	271
	272	return True
	273
	274	def _download_webpage_handle(self, args, *kwargs):
	275	query = kwargs.get('query', {}).copy()
	276	kwargs['query'] = query
	277	return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
	278	args, *compat_kwargs(kwargs))
	279
	280	def _real_initialize(self):
	281	if self._downloader is None:
	282	return
	283	self._set_language()
	284	if not self._login():
	285	return
	286
	287	_DEFAULT_API_DATA = {
	288	'context': {
	289	'client': {
	290	'clientName': 'WEB',
	291	'clientVersion': '2.20201021.03.00',
	292	}
	293	},
	294	}
	295
	296	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	297	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	298	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	299
	300	def _call_api(self, ep, query, video_id):
	301	data = self._DEFAULT_API_DATA.copy()
	302	data.update(query)
	303
	304	response = self._download_json(
	305	'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
	306	note='Downloading API JSON', errnote='Unable to download API page',
	307	data=json.dumps(data).encode('utf8'),
	308	headers={'content-type': 'application/json'},
	309	query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
	310
	311	return response
	312
	313	def _extract_yt_initial_data(self, video_id, webpage):
	314	return self._parse_json(
	315	self._search_regex(
	316	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	317	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
	318	video_id)
	319
	320	def _extract_ytcfg(self, video_id, webpage):
	321	return self._parse_json(
	322	self._search_regex(
	323	r'ytcfg\.set\s\(\s({.+?})\s\)\s;', webpage, 'ytcfg',
	324	default='{}'), video_id, fatal=False)
	325
	326	def _extract_video(self, renderer):
	327	video_id = renderer.get('videoId')
	328	title = try_get(
	329	renderer,
	330	(lambda x: x['title']['runs'][0]['text'],
	331	lambda x: x['title']['simpleText']), compat_str)
	332	description = try_get(
	333	renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
	334	compat_str)
	335	duration = parse_duration(try_get(
	336	renderer, lambda x: x['lengthText']['simpleText'], compat_str))
	337	view_count_text = try_get(
	338	renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
	339	view_count = str_to_int(self._search_regex(
	340	r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
	341	'view count', default=None))
	342	uploader = try_get(
	343	renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
	344	return {
	345	'_type': 'url_transparent',
	346	'ie_key': YoutubeIE.ie_key(),
	347	'id': video_id,
	348	'url': video_id,
	349	'title': title,
	350	'description': description,
	351	'duration': duration,
	352	'view_count': view_count,
	353	'uploader': uploader,
	354	}
	355
	356
	357	class YoutubeIE(YoutubeBaseInfoExtractor):
	358	IE_DESC = 'YouTube.com'
	359	_VALID_URL = r"""(?x)^
	360	(
	361	(?:https?://\|//) # http(s):// or protocol-independent URL
	362	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie\|kids)?\.com/\|
	363	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	364	(?:www\.)?pwnyoutube\.com/\|
	365	(?:www\.)?hooktube\.com/\|
	366	(?:www\.)?yourepeat\.com/\|
	367	tube\.majestyc\.net/\|
	368	# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
	369	(?:(?:www\|dev)\.)?invidio\.us/\|
	370	(?:(?:www\|no)\.)?invidiou\.sh/\|
	371	(?:(?:www\|fi)\.)?invidious\.snopyta\.org/\|
	372	(?:www\.)?invidious\.kabi\.tk/\|
	373	(?:www\.)?invidious\.13ad\.de/\|
	374	(?:www\.)?invidious\.mastodon\.host/\|
	375	(?:www\.)?invidious\.zapashcanon\.fr/\|
	376	(?:www\.)?invidious\.kavin\.rocks/\|
	377	(?:www\.)?invidious\.tube/\|
	378	(?:www\.)?invidiou\.site/\|
	379	(?:www\.)?invidious\.site/\|
	380	(?:www\.)?invidious\.xyz/\|
	381	(?:www\.)?invidious\.nixnet\.xyz/\|
	382	(?:www\.)?invidious\.drycat\.fr/\|
	383	(?:www\.)?tube\.poal\.co/\|
	384	(?:www\.)?tube\.connect\.cafe/\|
	385	(?:www\.)?vid\.wxzm\.sx/\|
	386	(?:www\.)?vid\.mint\.lgbt/\|
	387	(?:www\.)?yewtu\.be/\|
	388	(?:www\.)?yt\.elukerio\.org/\|
	389	(?:www\.)?yt\.lelux\.fi/\|
	390	(?:www\.)?invidious\.ggc-project\.de/\|
	391	(?:www\.)?yt\.maisputain\.ovh/\|
	392	(?:www\.)?invidious\.13ad\.de/\|
	393	(?:www\.)?invidious\.toot\.koeln/\|
	394	(?:www\.)?invidious\.fdn\.fr/\|
	395	(?:www\.)?watch\.nettohikari\.com/\|
	396	(?:www\.)?kgg2m7yk5aybusll\.onion/\|
	397	(?:www\.)?qklhadlycap4cnod\.onion/\|
	398	(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/\|
	399	(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/\|
	400	(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/\|
	401	(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/\|
	402	(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/\|
	403	(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/\|
	404	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	405	(?:.*?\#/)? # handle anchor (#/) redirect urls
	406	(?: # the various things that can precede the ID:
	407	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	408	\|(?: # or the v= param in all its forms
	409	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	410	(?:\?\|\#!?) # the params delimiter ? or # or #!
	411	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	412	v=
	413	)
	414	))
	415	\|(?:
	416	youtu\.be\| # just youtu.be/xxxx
	417	vid\.plus\| # or vid.plus/xxxx
	418	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	419	)/
	420	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	421	)
	422	)? # all until now is optional -> you can pass the naked ID
	423	(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	424	(?!.*?\blist=
	425	(?:
	426	%(playlist_id)s\| # combined list/video URLs are handled by the playlist IE
	427	WL # WL are handled by the watch later IE
	428	)
	429	)
	430	(?(1).+)? # if we found the ID, everything can follow
	431	$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
	432	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	433	_PLAYER_INFO_RE = (
	434	r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
	435	r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
	436	)
	437	_formats = {
	438	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	439	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	440	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	441	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	442	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	443	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	444	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	445	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	446	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	447	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	448	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	449	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	450	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	451	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	452	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	453	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	454	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	455	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	456
	457
	458	# 3D videos
	459	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	460	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	461	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	462	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	463	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	464	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	465	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	466
	467	# Apple HTTP Live Streaming
	468	'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	469	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	470	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	471	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	472	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	473	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	474	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	475	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	476
	477	# DASH mp4 video
	478	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
	479	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
	480	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	481	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
	482	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
	483	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
	484	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
	485	'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	486	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
	487	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	488	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	489	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
	490
	491	# Dash mp4 audio
	492	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
	493	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
	494	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
	495	'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	496	'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	497	'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
	498	'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
	499
	500	# Dash webm

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

compat_chr,

compat_HTTPError,

compat_kwargs,

compat_parse_qs,

compat_urllib_parse_unquote,

23

compat_urllib_parse_unquote_plus,

24

compat_urllib_parse_urlencode,

25

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

bool_or_none,

clean_html,

error_to_compat_str,

ExtractorError,

float_or_none,

get_element_by_id,

int_or_none,

mimetype2ext,

parse_codecs,

parse_count,

parse_duration,

remove_quotes,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

update_url_query,

uppercase_escape,

url_or_none,

urlencode_postdata,

urljoin,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

59

"""Provide base functions for Youtube extractors"""

60

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

61

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

62

63

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

64

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

65

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

_RESERVED_NAMES = (

_NETRC_MACHINE = 'youtube'

73

# If True it will raise an error if no login info is provided

74

_LOGIN_REQUIRED = False

75

76

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

77

78

def _set_language(self):

79

self._set_cookie(

80

'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',

81

# YouTube sets the expire time to about two months

82

expire_time=time.time() + 2 * 30 * 24 * 3600)

83

84

def _ids_to_results(self, ids):

85

return [

86

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

92

True is returned if successful or skipped.

93

False is returned if login failed.

94

95

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

96

"""

97

username, password = self._get_login_info()

98

# No authentication to be performed

99

if username is None:

100

if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

101

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

102

if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.

103

self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')

104

return True

105

106

login_page = self._download_webpage(

107

self._LOGIN_URL, None,

108

note='Downloading login page',

109

errnote='unable to fetch login page', fatal=False)

110

if login_page is False:

111

return

112

113

login_form = self._hidden_inputs(login_page)

114

115

def req(url, f_req, note, errnote):

116

data = login_form.copy()

117

data.update({

118

'pstMsg': 1,

119

'checkConnection': 'youtube',

120

'checkedDomains': 'youtube',

121

'hl': 'en',

122

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

123

'f.req': json.dumps(f_req),

124

'flowName': 'GlifWebSignIn',

125

'flowEntry': 'ServiceLogin',

126

# TODO: reverse actual botguard identifier generation algo

127

'bgRequest': '["identifier",""]',

128

})

129

return self._download_json(

130

url, None, note=note, errnote=errnote,

131

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

132

fatal=False,

133

data=urlencode_postdata(data), headers={

134

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

135

'Google-Accounts-XSRF': 1,

})

def warn(message):

self._downloader.report_warning(message)

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

148

None, [], 4],

149

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

155

self._LOOKUP_URL, lookup_req,

156

'Looking up account info', 'Unable to look up account info')

157

158

if lookup_results is False:

159

return False

160

161

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

162

if not user_hash:

163

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

169

[

170

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

171

1, [None, None, []], None, None, None, True

172

]]

173

174

challenge_results = req(

175

self._CHALLENGE_URL, challenge_req,

176

'Logging in', 'Unable to log in')

177

178

if challenge_results is False:

179

return

180

181

login_res = try_get(challenge_results, lambda x: x[0][5], list)

182

if login_res:

183

login_msg = try_get(login_res, lambda x: x[5], compat_str)

184

warn(

185

'Unable to login: %s' % 'Invalid password'

186

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

187

return False

188

189

res = try_get(challenge_results, lambda x: x[0][-1], list)

190

if not res:

191

warn('Unable to extract result entry')

192

return False

193

194

login_challenge = try_get(res, lambda x: x[0][0], list)

195

if login_challenge:

196

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

197

if challenge_str == 'TWO_STEP_VERIFICATION':

198

# SEND_SUCCESS - TFA code has been successfully sent to phone

199

# QUOTA_EXCEEDED - reached the limit of TFA codes

200

status = try_get(login_challenge, lambda x: x[5], compat_str)

201

if status == 'QUOTA_EXCEEDED':

202

warn('Exceeded the limit of TFA codes, try later')

203

return False

204

205

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

206

if not tl:

207

warn('Unable to extract TL')

208

return False

209

210

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

215

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

216

return False

217

218

tfa_code = remove_start(tfa_code, 'G-')

219

220

tfa_req = [

221

user_hash, None, 2, None,

222

[

223

9, None, None, None, None, None, None, None,

224

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

229

'Submitting TFA code', 'Unable to submit TFA code')

230

231

if tfa_results is False:

232

return False

233

234

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

235

if tfa_res:

236

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

237

warn(

238

'Unable to finish TFA: %s' % 'Invalid TFA code'

239

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

240

return False

241

242

check_cookie_url = try_get(

243

tfa_results, lambda x: x[0][-1][2], compat_str)

244

else:

245

CHALLENGES = {

246

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

247

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

248

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

249

}

250

challenge = CHALLENGES.get(

251

challenge_str,

252

'%s returned error %s.' % (self.IE_NAME, challenge_str))

253

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

254

return False

255

else:

256

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

257

258

if not check_cookie_url:

259

warn('Unable to extract CheckCookie URL')

260

return False

261

262

check_cookie_results = self._download_webpage(

263

check_cookie_url, None, 'Checking cookie', fatal=False)

264

265

if check_cookie_results is False:

266

return False

267

268

if 'https://myaccount.google.com/' not in check_cookie_results:

269

warn('Unable to log in')

return False

return True

def _download_webpage_handle(self, *args, **kwargs):

275

query = kwargs.get('query', {}).copy()

276

kwargs['query'] = query

277

return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(

278

*args, **compat_kwargs(kwargs))

279

280

def _real_initialize(self):

281

if self._downloader is None:

282

return

283

self._set_language()

284

if not self._login():

285

return

286

287

_DEFAULT_API_DATA = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

}

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

297

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

298

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

299

300

def _call_api(self, ep, query, video_id):

301

data = self._DEFAULT_API_DATA.copy()

302

data.update(query)

303

304

response = self._download_json(

305

'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,

306

note='Downloading API JSON', errnote='Unable to download API page',

307

data=json.dumps(data).encode('utf8'),

308

headers={'content-type': 'application/json'},

309

query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})

return response

def _extract_yt_initial_data(self, video_id, webpage):

314

return self._parse_json(

315

self._search_regex(

316

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

317

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),

318

video_id)

319

320

def _extract_ytcfg(self, video_id, webpage):

321

return self._parse_json(

322

self._search_regex(

323

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

324

default='{}'), video_id, fatal=False)

325

326

def _extract_video(self, renderer):

327

video_id = renderer.get('videoId')

328

title = try_get(

329

renderer,

330

(lambda x: x['title']['runs'][0]['text'],

331

lambda x: x['title']['simpleText']), compat_str)

332

description = try_get(

333

renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],

334

compat_str)

335

duration = parse_duration(try_get(

336

renderer, lambda x: x['lengthText']['simpleText'], compat_str))

337

view_count_text = try_get(

338

renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

339

view_count = str_to_int(self._search_regex(

340

r'^([\d,]+)', re.sub(r'\s', '', view_count_text),

341

'view count', default=None))

342

uploader = try_get(

343

renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

344

return {

345

'_type': 'url_transparent',

346

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

351

'duration': duration,

352

'view_count': view_count,

353

'uploader': uploader,

}

class YoutubeIE(YoutubeBaseInfoExtractor):

358

IE_DESC = 'YouTube.com'

359

_VALID_URL = r"""(?x)^

360

(

361

(?:https?://|//) # http(s):// or protocol-independent URL

362

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|

363

(?:www\.)?deturl\.com/www\.youtube\.com/|

364

(?:www\.)?pwnyoutube\.com/|

365

(?:www\.)?hooktube\.com/|

366

(?:www\.)?yourepeat\.com/|

367

tube\.majestyc\.net/|

368

# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances

369

(?:(?:www|dev)\.)?invidio\.us/|

370

(?:(?:www|no)\.)?invidiou\.sh/|

371

(?:(?:www|fi)\.)?invidious\.snopyta\.org/|

372

(?:www\.)?invidious\.kabi\.tk/|

373

(?:www\.)?invidious\.13ad\.de/|

374

(?:www\.)?invidious\.mastodon\.host/|

375

(?:www\.)?invidious\.zapashcanon\.fr/|

376

(?:www\.)?invidious\.kavin\.rocks/|

377

(?:www\.)?invidious\.tube/|

378

(?:www\.)?invidiou\.site/|

379

(?:www\.)?invidious\.site/|

380

(?:www\.)?invidious\.xyz/|

381

(?:www\.)?invidious\.nixnet\.xyz/|

382

(?:www\.)?invidious\.drycat\.fr/|

383

(?:www\.)?tube\.poal\.co/|

384

(?:www\.)?tube\.connect\.cafe/|

385

(?:www\.)?vid\.wxzm\.sx/|

386

(?:www\.)?vid\.mint\.lgbt/|

387

(?:www\.)?yewtu\.be/|

388

(?:www\.)?yt\.elukerio\.org/|

389

(?:www\.)?yt\.lelux\.fi/|

390

(?:www\.)?invidious\.ggc-project\.de/|

391

(?:www\.)?yt\.maisputain\.ovh/|

392

(?:www\.)?invidious\.13ad\.de/|

393

(?:www\.)?invidious\.toot\.koeln/|

394

(?:www\.)?invidious\.fdn\.fr/|

395

(?:www\.)?watch\.nettohikari\.com/|

396

(?:www\.)?kgg2m7yk5aybusll\.onion/|

397

(?:www\.)?qklhadlycap4cnod\.onion/|

398

(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|

399

(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|

400

(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|

401

(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|

402

(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|

403

(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|

404

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

405

(?:.*?\#/)? # handle anchor (#/) redirect urls

406

(?: # the various things that can precede the ID:

407

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

408

|(?: # or the v= param in all its forms

409

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

410

(?:\?|\#!?) # the params delimiter ? or # or #!

411

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

417

vid\.plus| # or vid.plus/xxxx

418

zwearz\.com/watch| # or zwearz.com/watch/xxxx

419

)/

420

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

421

)

422

)? # all until now is optional -> you can pass the naked ID

423

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

424

(?!.*?\blist=

425

(?:

426

%(playlist_id)s| # combined list/video URLs are handled by the playlist IE

427

WL # WL are handled by the watch later IE

428

)

429

)

430

(?(1).+)? # if we found the ID, everything can follow

431

$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

432

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

433

_PLAYER_INFO_RE = (

434

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',

435

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',

436

)

437

_formats = {

438

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

439

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

440

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

441

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

442

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

443

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

444

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

445

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

446

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

447

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

448

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

449

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

450

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

451

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

452

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

453

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

454

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

455

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

460

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

461

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

462

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

463

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

464

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

465

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

466

467

# Apple HTTP Live Streaming

468

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

469

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

470

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

471

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

472

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

473

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

474

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

475

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

476

477

# DASH mp4 video

478

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

479

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

480

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

481

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

482

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

483

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

484

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

485

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

486

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

487

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

488

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

489

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

490

491

# Dash mp4 audio

492

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

493

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

494

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

495

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

496

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

497

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

498

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

499

500

# Dash webm

501

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

502

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

503

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

504

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

505

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

506

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

507

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

508

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

509

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

510

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

511

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

512

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

513

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

514

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

515

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

516

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

517

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

518

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

519

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

520

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

521

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

522

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

523

524

# Dash webm audio

525

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

526

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

527

528

# Dash webm audio with opus inside

529

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

530

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

531

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

532

533

# RTMP (unnamed)

534

'_rtmp': {'protocol': 'rtmp'},

535

536

# av01 video only formats sometimes served with "unknown" codecs

537

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

538

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

539

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

540

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

541

}

542

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

554

'uploader': 'Philipp Hagemeister',

555

'uploader_id': 'phihag',

556

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

557

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

558

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

559

'upload_date': '20121002',

560

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

561

'categories': ['Science & Technology'],

562

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

573

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

578

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

579

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

580

'uploader': 'SET India',

581

'uploader_id': 'setindia',

582

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

588

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

593

'uploader': 'Philipp Hagemeister',

594

'uploader_id': 'phihag',

595

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

596

'upload_date': '20121002',

597

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

598

'categories': ['Science & Technology'],

599

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

604

},

605

'params': {

606

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

611

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

616

'uploader_id': '8KVIDEO',

617

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

618

'description': '',

619

'uploader': '8KVIDEO',

620

'title': 'UHDTV TEST 8K VIDEO.mp4'

621

},

622

'params': {

623

'youtube_include_dash_manifest': True,

624

'format': '141',

625

},

626

'skip': 'format 141 not served anymore',

627

},

628

# DASH manifest with encrypted signature

629

{

630

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

635

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

636

'duration': 244,

637

'uploader': 'AfrojackVEVO',

638

'uploader_id': 'AfrojackVEVO',

639

'upload_date': '20131011',

640

},

641

'params': {

642

'youtube_include_dash_manifest': True,

643

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

654

'uploader': 'Amazing Atheist',

655

'uploader_id': 'TheAmazingAtheist',

656

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

657

'title': 'Burning Everyone\'s Koran',

658

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

659

}

660

},

661

# Normal age-gate video (embed allowed)

662

{

663

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

668

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

669

'duration': 142,

670

'uploader': 'The Witcher',

671

'uploader_id': 'WitcherGame',

672

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

673

'upload_date': '20140605',

'age_limit': 18,

},

},

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

678

# YouTube Red ad is not captured for creator

679

{

680

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

686

'uploader_id': 'deadmau5',

687

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

688

'creator': 'Dada Life, deadmau5',

689

'description': 'md5:12c56784b8032162bb936a5f76d55360',

690

'uploader': 'deadmau5',

691

'title': 'Deadmau5 - Some Chords (HD)',

692

'alt_title': 'This Machine Kills Some Chords',

693

},

694

'expected_warnings': [

695

'DASH manifest missing',

696

]

697

},

698

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

699

{

700

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

706

'uploader_id': 'olympic',

707

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

708

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

709

'uploader': 'Olympic',

710

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

711

},

712

'params': {

713

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

723

'duration': 85,

724

'upload_date': '20110310',

725

'uploader_id': 'AllenMeow',

726

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

727

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

728

'uploader': '孫ᄋᄅ',

729

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

730

},

731

},

732

# url_encoded_fmt_stream_map is empty string

733

{

734

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

739

'description': '',

740

'upload_date': '20150404',

741

'uploader_id': 'spbelect',

742

'uploader': 'Наблюдатели Петербурга',

743

},

744

'params': {

745

'skip_download': 'requires avconv',

746

},

747

'skip': 'This live event has ended.',

748

},

749

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

750

{

751

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

756

'description': 'md5:116377fd2963b81ec4ce64b542173306',

757

'duration': 220,

758

'upload_date': '20150625',

759

'uploader_id': 'dorappi2000',

760

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

761

'uploader': 'dorappi2000',

762

'formats': 'mincount:31',

763

},

764

'skip': 'not actual anymore',

765

},

766

# DASH manifest with segment_list

767

{

768

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

769

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

774

'uploader': 'Airtek',

775

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

776

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

777

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

778

},

779

'params': {

780

'youtube_include_dash_manifest': True,

781

'format': '135', # bestvideo

782

},

783

'skip': 'This live event has ended.',

784

},

785

{

786

# Multifeed videos (multiple cameras), URL is for Main Camera

787

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

788

'info_dict': {

789

'id': 'jqWvoWXjCVs',

790

'title': 'teamPGP: Rocket League Noob Stream',

791

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

798

'description': 'md5:dc7872fb300e143831327f1bae3af010',

799

'duration': 7335,

800

'upload_date': '20150721',

801

'uploader': 'Beer Games Beer',

802

'uploader_id': 'beergamesbeer',

803

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

804

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

811

'description': 'md5:dc7872fb300e143831327f1bae3af010',

812

'duration': 7337,

813

'upload_date': '20150721',

814

'uploader': 'Beer Games Beer',

815

'uploader_id': 'beergamesbeer',

816

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

817

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

824

'description': 'md5:dc7872fb300e143831327f1bae3af010',

825

'duration': 7337,

826

'upload_date': '20150721',

827

'uploader': 'Beer Games Beer',

828

'uploader_id': 'beergamesbeer',

829

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

830

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

837

'description': 'md5:dc7872fb300e143831327f1bae3af010',

838

'duration': 7334,

839

'upload_date': '20150721',

840

'uploader': 'Beer Games Beer',

841

'uploader_id': 'beergamesbeer',

842

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

843

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

848

},

849

'skip': 'This video is not available.',

850

},

851

{

852

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

853

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

854

'info_dict': {

855

'id': 'gVfLd0zydlo',

856

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

857

},

858

'playlist_count': 2,

859

'skip': 'Not multifeed anymore',

860

},

861

{

862

'url': 'https://vid.plus/FlRa-iH7PGw',

863

'only_matching': True,

864

},

865

{

866

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

867

'only_matching': True,

868

},

869

{

870

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

871

# Also tests cut-off URL expansion in video description (see

872

# https://github.com/ytdl-org/youtube-dl/issues/1892,

873

# https://github.com/ytdl-org/youtube-dl/issues/8164)

874

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

879

'alt_title': 'Dark Walk - Position Music',

880

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

881

'duration': 133,

882

'upload_date': '20151119',

883

'uploader_id': 'IronSoulElf',

884

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

885

'uploader': 'IronSoulElf',

886

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

887

'track': 'Dark Walk - Position Music',

888

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

889

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

890

},

891

'params': {

892

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

897

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

898

'only_matching': True,

899

},

900

{

901

# Video with yt:stretch=17:0

902

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

907

'description': 'md5:ee18a25c350637c8faff806845bddee9',

908

'upload_date': '20151107',

909

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

910

'uploader': 'CH GAMER DROID',

911

},

912

'params': {

913

'skip_download': True,

914

},

915

'skip': 'This video does not exist.',

916

},

917

{

918

# Video licensed under Creative Commons

919

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

924

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

925

'duration': 721,

926

'upload_date': '20150127',

927

'uploader_id': 'BerkmanCenter',

928

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

929

'uploader': 'The Berkman Klein Center for Internet & Society',

930

'license': 'Creative Commons Attribution license (reuse allowed)',

931

},

932

'params': {

933

'skip_download': True,

},

},

{

# Channel-like uploader_url

938

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

943

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

944

'duration': 4060,

945

'upload_date': '20151119',

946

'uploader': 'Bernie Sanders',

947

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

948

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

949

'license': 'Creative Commons Attribution license (reuse allowed)',

950

},

951

'params': {

952

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

957

'only_matching': True,

958

},

959

{

960

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

961

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

962

'only_matching': True,

963

},

964

{

965

# Rental video preview

966

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

971

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

972

'upload_date': '20150811',

973

'uploader': 'FlixMatrix',

974

'uploader_id': 'FlixMatrixKaravan',

975

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

976

'license': 'Standard YouTube License',

977

},

978

'params': {

979

'skip_download': True,

980

},

981

'skip': 'This video is not available.',

982

},

983

{

984

# YouTube Red video with episode data

985

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

990

'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',

991

'duration': 2085,

992

'upload_date': '20170118',

993

'uploader': 'Vsauce',

994

'uploader_id': 'Vsauce',

995

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

996

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

1002

},

1003

'expected_warnings': [

1004

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1009

# as inappropriate or offensive to some audiences.

1010

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1015

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1016

'duration': 965,

1017

'upload_date': '20140124',

1018

'uploader': 'New Century Foundation',

1019

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1020

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1021

},

1022

'params': {

1023

'skip_download': True,

},

},

{

# itag 212

'url': '1t24XAntNCY',

1029

'only_matching': True,

1030

},

1031

{

1032

# geo restricted to JP

1033

'url': 'sJL6WA-aGkQ',

1034

'only_matching': True,

1035

},

1036

{

1037

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1038

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1043

'only_matching': True,

1044

},

1045

{

1046

# Video with unsupported adaptive stream type formats

1047

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1052

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1053

'duration': 433,

1054

'upload_date': '20130923',

1055

'uploader': 'Amelia Putri Harwita',

1056

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1057

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1058

'formats': 'maxcount:10',

1059

},

1060

'params': {

1061

'skip_download': True,

1062

'youtube_include_dash_manifest': False,

1063

},

1064

'skip': 'not actual anymore',

1065

},

1066

{

1067

# Youtube Music Auto-generated description

1068

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1073

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1074

'upload_date': '20190312',

1075

'uploader': 'Stephen - Topic',

1076

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1077

'artist': 'Stephen',

1078

'track': 'Voyeur Girl',

1079

'album': 'it\'s too much love to know my dear',

1080

'release_date': '20190313',

1081

'release_year': 2019,

1082

},

1083

'params': {

1084

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1089

'only_matching': True,

1090

},

1091

{

1092

# invalid -> valid video id redirection

1093

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1098

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1099

'upload_date': '20090125',

1100

'uploader': 'Prochorowka',

1101

'uploader_id': 'Prochorowka',

1102

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1103

'artist': 'Panjabi MC',

1104

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1105

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1106

},

1107

'params': {

1108

'skip_download': True,

},

},

{

# empty description results in an empty string

1113

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1120

'uploader_id': 'ElevageOrVert',

1121

'uploader': 'ElevageOrVert',

1122

},

1123

'params': {

1124

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1129

# see [2] for an example with '};' inside ytInitialPlayerResponse

1130

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1131

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1132

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1137

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1138

'upload_date': '20130831',

1139

'uploader_id': 'kudvenkat',

1140

'uploader': 'kudvenkat',

1141

},

1142

'params': {

1143

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1148

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1149

'only_matching': True,

1150

},

1151

{

1152

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1153

'only_matching': True,

},

]

def __init__(self, *args, **kwargs):

1158

super(YoutubeIE, self).__init__(*args, **kwargs)

1159

self._player_cache = {}

1160

1161

def report_video_info_webpage_download(self, video_id):

1162

"""Report attempt to download video info webpage."""

1163

self.to_screen('%s: Downloading video info webpage' % video_id)

1164

1165

def report_information_extraction(self, video_id):

1166

"""Report attempt to extract video information."""

1167

self.to_screen('%s: Extracting video information' % video_id)

1168

1169

def report_unavailable_format(self, video_id, format):

1170

"""Report extracted video URL."""

1171

self.to_screen('%s: Format %s not available' % (video_id, format))

1172

1173

def report_rtmp_download(self):

1174

"""Indicate the download will use the RTMP protocol."""

1175

self.to_screen('RTMP download detected')

1176

1177

def _signature_cache_id(self, example_sig):

1178

""" Return a string representation of a signature """

1179

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1180

1181

@classmethod

1182

def _extract_player_info(cls, player_url):

1183

for player_re in cls._PLAYER_INFO_RE:

1184

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1189

return id_m.group('ext'), id_m.group('id')

1190

1191

def _extract_signature_function(self, video_id, player_url, example_sig):

1192

player_type, player_id = self._extract_player_info(player_url)

1193

1194

# Read from filesystem cache

1195

func_id = '%s_%s_%s' % (

1196

player_type, player_id, self._signature_cache_id(example_sig))

1197

assert os.path.basename(func_id) == func_id

1198

1199

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1200

if cache_spec is not None:

1201

return lambda s: ''.join(s[i] for i in cache_spec)

1202

1203

download_note = (

1204

'Downloading player %s' % player_url

1205

if self._downloader.params.get('verbose') else

1206

'Downloading %s player %s' % (player_type, player_id)

1207

)

1208

if player_type == 'js':

1209

code = self._download_webpage(

1210

player_url, video_id,

1211

note=download_note,

1212

errnote='Download of %s failed' % player_url)

1213

res = self._parse_sig_js(code)

1214

elif player_type == 'swf':

1215

urlh = self._request_webpage(

1216

player_url, video_id,

1217

note=download_note,

1218

errnote='Download of %s failed' % player_url)

1219

code = urlh.read()

1220

res = self._parse_sig_swf(code)

1221

else:

1222

assert False, 'Invalid player type %r' % player_type

1223

1224

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1225

cache_res = res(test_string)

1226

cache_spec = [ord(c) for c in cache_res]

1227

1228

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1229

return res

1230

1231

def _print_sig_code(self, func, example_sig):

1232

def gen_sig_code(idxs):

1233

def _genslice(start, end, step):

1234

starts = '' if start == 0 else str(start)

1235

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1236

steps = '' if step == 1 else (':%d' % step)

1237

return 's[%s%s%s]' % (starts, ends, steps)

1238

1239

step = None

1240

# Quelch pyflakes warnings - start will be set when step is set

1241

start = '(Never used)'

1242

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1247

step = None

1248

continue

1249

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1259

1260

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1261

cache_res = func(test_string)

1262

cache_spec = [ord(c) for c in cache_res]

1263

expr_code = ' + '.join(gen_sig_code(cache_spec))

1264

signature_id_tuple = '(%s)' % (

1265

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1266

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1267

' return %s\n') % (signature_id_tuple, expr_code)

1268

self.to_screen('Extracted signature function:\n' + code)

1269

1270

def _parse_sig_js(self, jscode):

1271

funcname = self._search_regex(

1272

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1273

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1274

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1275

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1276

# Obsolete patterns

1277

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1278

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1279

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1280

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1281

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1282

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1283

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1284

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1285

jscode, 'Initial JS player signature function name', group='sig')

1286

1287

jsi = JSInterpreter(jscode)

1288

initial_function = jsi.extract_function(funcname)

1289

return lambda s: initial_function([s])

1290

1291

def _parse_sig_swf(self, file_contents):

1292

swfi = SWFInterpreter(file_contents)

1293

TARGET_CLASSNAME = 'SignatureDecipher'

1294

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1295

initial_function = swfi.extract_function(searched_class, 'decipher')

1296

return lambda s: initial_function([s])

1297

1298

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1299

"""Turn the encrypted s field into a working signature"""

1300

1301

if player_url is None:

1302

raise ExtractorError('Cannot decrypt signature without player_url')

1303

1304

if player_url.startswith('//'):

1305

player_url = 'https:' + player_url

1306

elif not re.match(r'https?://', player_url):

1307

player_url = compat_urlparse.urljoin(

1308

'https://www.youtube.com', player_url)

1309

try:

1310

player_id = (player_url, self._signature_cache_id(s))

1311

if player_id not in self._player_cache:

1312

func = self._extract_signature_function(

1313

video_id, player_url, s

1314

)

1315

self._player_cache[player_id] = func

1316

func = self._player_cache[player_id]

1317

if self._downloader.params.get('youtube_print_sig_code'):

1318

self._print_sig_code(func, s)

1319

return func(s)

1320

except Exception as e:

1321

tb = traceback.format_exc()

1322

raise ExtractorError(

1323

'Signature extraction failed: ' + tb, cause=e)

1324

1325

def _get_subtitles(self, video_id, webpage, has_live_chat_replay):

1326

try:

1327

subs_doc = self._download_xml(

1328

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1329

video_id, note=False)

1330

except ExtractorError as err:

1331

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1336

lang = track.attrib['lang_code']

1337

if lang in sub_lang_list:

1338

continue

1339

sub_formats = []

1340

for ext in self._SUBTITLE_FORMATS:

1341

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1346

})

1347

sub_formats.append({

1348

'url': 'https://www.youtube.com/api/timedtext?' + params,

1349

'ext': ext,

1350

})

1351

sub_lang_list[lang] = sub_formats

1352

if has_live_chat_replay:

1353

sub_lang_list['live_chat'] = [

1354

{

1355

'video_id': video_id,

1356

'ext': 'json',

1357

'protocol': 'youtube_live_chat_replay',

1358

},

1359

]

1360

if not sub_lang_list:

1361

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1366

patterns = (

1367

# User data may contain arbitrary character sequences that may affect

1368

# JSON extraction with regex, e.g. when '};' is contained the second

1369

# regex won't capture the whole JSON. Yet working around by trying more

1370

# concrete regex first keeping in mind proper quoted string handling

1371

# to be implemented in future that will replace this workaround (see

1372

# https://github.com/ytdl-org/youtube-dl/issues/7468,

1373

# https://github.com/ytdl-org/youtube-dl/pull/7599)

1374

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1375

r';ytplayer\.config\s*=\s*({.+?});',

1376

)

1377

config = self._search_regex(

1378

patterns, webpage, 'ytplayer.config', default=None)

1379

if config:

1380

return self._parse_json(

1381

uppercase_escape(config), video_id, fatal=False)

1382

1383

def _get_automatic_captions(self, video_id, player_response, player_config):

1384

"""We need the webpage for getting the captions url, pass it as an

1385

argument to speed up the process."""

1386

self.to_screen('%s: Looking for automatic captions' % video_id)

1387

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1388

if not (player_response or player_config):

1389

self._downloader.report_warning(err_msg)

1390

return {}

1391

try:

1392

args = player_config.get('args') if player_config else {}

1393

caption_url = args.get('ttsurl')

1394

if caption_url:

1395

timestamp = args['timestamp']

1396

# We get the available subtitles

1397

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1403

caption_list = self._download_xml(list_url, video_id)

1404

original_lang_node = caption_list.find('track')

1405

if original_lang_node is None:

1406

self._downloader.report_warning('Video doesn\'t have automatic captions')

1407

return {}

1408

original_lang = original_lang_node.attrib['lang_code']

1409

caption_kind = original_lang_node.attrib.get('kind', '')

1410

1411

sub_lang_list = {}

1412

for lang_node in caption_list.findall('target'):

1413

sub_lang = lang_node.attrib['lang_code']

1414

sub_formats = []

1415

for ext in self._SUBTITLE_FORMATS:

1416

params = compat_urllib_parse_urlencode({

1417

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1422

})

1423

sub_formats.append({

1424

'url': caption_url + '&' + params,

1425

'ext': ext,

1426

})

1427

sub_lang_list[sub_lang] = sub_formats

1428

return sub_lang_list

1429

1430

def make_captions(sub_url, sub_langs):

1431

parsed_sub_url = compat_urllib_parse_urlparse(sub_url)

1432

caption_qs = compat_parse_qs(parsed_sub_url.query)

1433

captions = {}

1434

for sub_lang in sub_langs:

1435

sub_formats = []

1436

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(

1442

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

captions[sub_lang] = sub_formats

1448

return captions

1449

1450

# New captions format as of 22.06.2017

1451

if player_response:

1452

renderer = player_response['captions']['playerCaptionsTracklistRenderer']

1453

base_url = renderer['captionTracks'][0]['baseUrl']

1454

sub_lang_list = []

1455

for lang in renderer['translationLanguages']:

1456

lang_code = lang.get('languageCode')

1457

if lang_code:

1458

sub_lang_list.append(lang_code)

1459

return make_captions(base_url, sub_lang_list)

1460

1461

# Some videos don't provide ttsurl but rather caption_tracks and

1462

# caption_translation_languages (e.g. 20LmZk1hakA)

1463

# Does not used anymore as of 22.06.2017

1464

caption_tracks = args['caption_tracks']

1465

caption_translation_languages = args['caption_translation_languages']

1466

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1467

sub_lang_list = []

1468

for lang in caption_translation_languages.split(','):

1469

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1470

sub_lang = lang_qs.get('lc', [None])[0]

1471

if sub_lang:

1472

sub_lang_list.append(sub_lang)

1473

return make_captions(caption_url, sub_lang_list)

1474

# An extractor error can be raise by the download process if there are

1475

# no automatic captions but there are subtitles

1476

except (KeyError, IndexError, ExtractorError):

1477

self._downloader.report_warning(err_msg)

1478

return {}

1479

1480

def _mark_watched(self, video_id, video_info, player_response):

1481

playback_url = url_or_none(try_get(

1482

player_response,

1483

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(

1484

video_info, lambda x: x['videostats_playback_base_url'][0]))

1485

if not playback_url:

1486

return

1487

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1488

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1489

1490

# cpn generation algorithm is reverse engineered from base.js.

1491

# In fact it works even with dummy cpn.

1492

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1493

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1500

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1501

1502

self._download_webpage(

1503

playback_url, video_id, 'Marking watched',

1504

'Unable to mark watched', fatal=False)

1505

1506

@staticmethod

1507

def _extract_urls(webpage):

1508

# Embedded YouTube player

1509

entries = [

1510

unescapeHTML(mobj.group('url'))

1511

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1522

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1523

\1''', webpage)]

1524

1525

# lazyYT YouTube embed

1526

entries.extend(list(map(

1527

unescapeHTML,

1528

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1529

1530

# Wordpress "YouTube Video Importer" plugin

1531

matches = re.findall(r'''(?x)<div[^>]+

1532

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1533

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1534

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1540

urls = YoutubeIE._extract_urls(webpage)

1541

return urls[0] if urls else None

1542

1543

@classmethod

1544

def extract_id(cls, url):

1545

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1546

if mobj is None:

1547

raise ExtractorError('Invalid URL: %s' % url)

1548

video_id = mobj.group(2)

1549

return video_id

1550

1551

def _extract_chapters_from_json(self, webpage, video_id, duration):

1552

if not webpage:

1553

return

1554

data = self._extract_yt_initial_data(video_id, webpage)

1555

if not data or not isinstance(data, dict):

1556

return

1557

chapters_list = try_get(

1558

data,

1559

lambda x: x['playerOverlays']

1560

['playerOverlayRenderer']

1561

['decoratedPlayerBarRenderer']

1562

['decoratedPlayerBarRenderer']

1563

['playerBar']

1564

['chapteredPlayerBarRenderer']

1565

['chapters'],

1566

list)

1567

if not chapters_list:

1568

return

1569

1570

def chapter_time(chapter):

1571

return float_or_none(

1572

try_get(

1573

chapter,

1574

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1579

start_time = chapter_time(chapter)

1580

if start_time is None:

1581

continue

1582

end_time = (chapter_time(chapters_list[next_num])

1583

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1588

compat_str)

1589

chapters.append({

1590

'start_time': start_time,

1591

'end_time': end_time,

'title': title,

})

return chapters

@staticmethod

def _extract_chapters_from_description(description, duration):

1598

if not description:

1599

return None

1600

chapter_lines = re.findall(

1601

r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',

1602

description)

1603

if not chapter_lines:

1604

return None

1605

chapters = []

1606

for next_num, (chapter_line, time_point) in enumerate(

1607

chapter_lines, start=1):

1608

start_time = parse_duration(time_point)

1609

if start_time is None:

1610

continue

1611

if start_time > duration:

1612

break

1613

end_time = (duration if next_num == len(chapter_lines)

1614

else parse_duration(chapter_lines[next_num][1]))

1615

if end_time is None:

1616

continue

1617

if end_time > duration:

1618

end_time = duration

1619

if start_time > end_time:

1620

break

1621

chapter_title = re.sub(

1622

r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')

1623

chapter_title = re.sub(r'\s+', ' ', chapter_title)

1624

chapters.append({

1625

'start_time': start_time,

1626

'end_time': end_time,

1627

'title': chapter_title,

})

return chapters

def _extract_chapters(self, webpage, description, video_id, duration):

1632

return (self._extract_chapters_from_json(webpage, video_id, duration)

1633

or self._extract_chapters_from_description(description, duration))

1634

1635

def _real_extract(self, url):

1636

url, smuggled_data = unsmuggle_url(url, {})

1637

1638

proto = (

1639

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1645

for component in [parsed_url.fragment, parsed_url.query]:

1646

query = compat_parse_qs(component)

1647

if start_time is None and 't' in query:

1648

start_time = parse_duration(query['t'][0])

1649

if start_time is None and 'start' in query:

1650

start_time = parse_duration(query['start'][0])

1651

if end_time is None and 'end' in query:

1652

end_time = parse_duration(query['end'][0])

1653

1654

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1655

mobj = re.search(self._NEXT_URL_RE, url)

1656

if mobj:

1657

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1658

video_id = self.extract_id(url)

1659

1660

# Get video webpage

1661

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1662

video_webpage, urlh = self._download_webpage_handle(url, video_id)

1663

1664

qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)

1665

video_id = qs.get('v', [None])[0] or video_id

1666

1667

# Attempt to extract SWF player URL

1668

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1669

if mobj is not None:

1670

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1677

dash_mpd = video_info.get('dashmpd')

1678

if dash_mpd and dash_mpd[0] not in dash_mpds:

1679

dash_mpds.append(dash_mpd[0])

1680

1681

def add_dash_mpd_pr(pl_response):

1682

dash_mpd = url_or_none(try_get(

1683

pl_response, lambda x: x['streamingData']['dashManifestUrl'],

1684

compat_str))

1685

if dash_mpd and dash_mpd not in dash_mpds:

1686

dash_mpds.append(dash_mpd)

is_live = None

view_count = None

def extract_view_count(v_info):

1692

return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))

1693

1694

def extract_player_response(player_response, video_id):

1695

pl_response = str_or_none(player_response)

1696

if not pl_response:

1697

return

1698

pl_response = self._parse_json(pl_response, video_id, fatal=False)

1699

if isinstance(pl_response, dict):

1700

add_dash_mpd_pr(pl_response)

1701

return pl_response

1702

1703

def extract_embedded_config(embed_webpage, video_id):

1704

embedded_config = self._search_regex(

1705

r'setConfig$({.*})$;',

1706

embed_webpage, 'ytInitialData', default=None)

1707

if embedded_config:

1708

return embedded_config

video_info = {}

player_response = {}

ytplayer_config = None

embed_webpage = None

# Get video info

if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'

1717

or re.search(r'player-age-gate-content">', video_webpage) is not None):

1718

cookie_keys = self._get_cookies('https://www.youtube.com').keys()

1719

age_gate = True

1720

# We simulate the access to the video from www.youtube.com/v/{video_id}

1721

# this can be viewed without login into Youtube

1722

url = proto + '://www.youtube.com/embed/%s' % video_id

1723

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1724

ext = extract_embedded_config(embed_webpage, video_id)

1725

# playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)

1726

playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)

1727

if not playable_in_embed:

1728

self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)

1729

playable_in_embed = ''

1730

else:

1731

playable_in_embed = playable_in_embed.group('playableinEmbed')

1732

# check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)

1733

# if re.search(r'player-unavailable">', embed_webpage) is not None:

1734

if playable_in_embed == 'false':

1735

'''

1736

# TODO apply this patch when Support for Python 2.6(!) and above drops

1737

if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys

1738

or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):

1739

'''

1740

if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)

1741

or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):

1742

age_gate = False

1743

# Try looking directly into the video webpage

1744

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1745

if ytplayer_config:

1746

args = ytplayer_config.get("args")

1747

if args is not None:

1748

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1749

# Convert to the same format returned by compat_parse_qs

1750

video_info = dict((k, [v]) for k, v in args.items())

1751

add_dash_mpd(video_info)

1752

# Rental video is not rented but preview is available (e.g.

1753

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1754

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1755

if not video_info and args.get('ypc_vid'):

1756

return self.url_result(

1757

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1758

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1759

is_live = True

1760

if not player_response:

1761

player_response = extract_player_response(args.get('player_response'), video_id)

1762

elif not player_response:

1763

player_response = ytplayer_config

1764

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1765

add_dash_mpd_pr(player_response)

1766

else:

1767

raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)

1768

else:

1769

data = compat_urllib_parse_urlencode({

1770

'video_id': video_id,

1771

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1772

'sts': self._search_regex(

1773

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1774

})

1775

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1776

try:

1777

video_info_webpage = self._download_webpage(

1778

video_info_url, video_id,

1779

note='Refetching age-gated info webpage',

1780

errnote='unable to download video info webpage')

1781

except ExtractorError:

1782

video_info_webpage = None

1783

if video_info_webpage:

1784

video_info = compat_parse_qs(video_info_webpage)

1785

pl_response = video_info.get('player_response', [None])[0]

1786

player_response = extract_player_response(pl_response, video_id)

1787

add_dash_mpd(video_info)

1788

view_count = extract_view_count(video_info)

1789

else:

1790

age_gate = False

1791

# Try looking directly into the video webpage

1792

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1793

if ytplayer_config:

1794

args = ytplayer_config.get('args', {})

1795

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1796

# Convert to the same format returned by compat_parse_qs

1797

video_info = dict((k, [v]) for k, v in args.items())

1798

add_dash_mpd(video_info)

1799

# Rental video is not rented but preview is available (e.g.

1800

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1801

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1802

if not video_info and args.get('ypc_vid'):

1803

return self.url_result(

1804

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1805

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1806

is_live = True

1807

if not player_response:

1808

player_response = extract_player_response(args.get('player_response'), video_id)

1809

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1810

add_dash_mpd_pr(player_response)

1811

1812

if not video_info and not player_response:

1813

player_response = extract_player_response(

1814

self._search_regex(

1815

(r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),

1816

self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,

1817

'initial player response', default='{}'),

1818

video_id)

1819

1820

def extract_unavailable_message():

1821

messages = []

1822

for tag, kind in (('h1', 'message'), ('div', 'submessage')):

1823

msg = self._html_search_regex(

1824

r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),

1825

video_webpage, 'unavailable %s' % kind, default=None)

if msg:

messages.append(msg)

if messages:

return '\n'.join(messages)

1830

1831

if not video_info and not player_response:

1832

unavailable_message = extract_unavailable_message()

1833

if not unavailable_message:

1834

unavailable_message = 'Unable to extract video data'

1835

raise ExtractorError(

1836

'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)

1837

1838

if not isinstance(video_info, dict):

1839

video_info = {}

1840

1841

playable_in_embed = try_get(

1842

player_response, lambda x: x['playabilityStatus']['playableInEmbed'])

1843

1844

video_details = try_get(

1845

player_response, lambda x: x['videoDetails'], dict) or {}

1846

1847

microformat = try_get(

1848

player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}

1849

1850

video_title = video_info.get('title', [None])[0] or video_details.get('title')

1851

if not video_title:

1852

self._downloader.report_warning('Unable to extract video title')

1853

video_title = '_'

1854

1855

description_original = video_description = get_element_by_id("eow-description", video_webpage)

1856

if video_description:

1857

1858

def replace_url(m):

1859

redir_url = compat_urlparse.urljoin(url, m.group(1))

1860

parsed_redir_url = compat_urllib_parse_urlparse(redir_url)

1861

if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':

1862

qs = compat_parse_qs(parsed_redir_url.query)

q = qs.get('q')

if q and q[0]:

return q[0]

return redir_url

description_original = video_description = re.sub(r'''(?x)

1869

<a\s+

1870

(?:[a-zA-Z-]+="[^"]*"\s+)*?

1871

(?:title|href)="([^"]+)"\s+

1872

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', replace_url, video_description)

1877

video_description = clean_html(video_description)

1878

else:

1879

video_description = video_details.get('shortDescription')

1880

if video_description is None:

1881

video_description = self._html_search_meta('description', video_webpage)

1882

1883

if not smuggled_data.get('force_singlefeed', False):

1884

if not self._downloader.params.get('noplaylist'):

1885

multifeed_metadata_list = try_get(

1886

player_response,

1887

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

1888

compat_str) or try_get(

1889

video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)

1890

if multifeed_metadata_list:

1891

entries = []

1892

feed_ids = []

1893

for feed in multifeed_metadata_list.split(','):

1894

# Unquote should take place before split on comma (,) since textual

1895

# fields may contain comma as well (see

1896

# https://github.com/ytdl-org/youtube-dl/issues/8536)

1897

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1898

1899

def feed_entry(name):

1900

return try_get(feed_data, lambda x: x[name][0], compat_str)

1901

1902

feed_id = feed_entry('id')

1903

if not feed_id:

1904

continue

1905

feed_title = feed_entry('title')

1906

title = video_title

1907

if feed_title:

1908

title += ' (%s)' % feed_title

1909

entries.append({

1910

'_type': 'url_transparent',

1911

'ie_key': 'Youtube',

1912

'url': smuggle_url(

1913

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1914

{'force_singlefeed': True}),

1915

'title': title,

1916

})

1917

feed_ids.append(feed_id)

1918

self.to_screen(

1919

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1920

% (', '.join(feed_ids), video_id))

1921

return self.playlist_result(entries, video_id, video_title, video_description)

1922

else:

1923

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1924

1925

if view_count is None:

1926

view_count = extract_view_count(video_info)

1927

if view_count is None and video_details:

1928

view_count = int_or_none(video_details.get('viewCount'))

1929

if view_count is None and microformat:

1930

view_count = int_or_none(microformat.get('viewCount'))

1931

1932

if is_live is None:

1933

is_live = bool_or_none(video_details.get('isLive'))

1934

1935

has_live_chat_replay = False

1936

if not is_live:

1937

yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)

1938

try:

1939

yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

1940

has_live_chat_replay = True

1941

except (KeyError, IndexError, TypeError):

1942

pass

1943

1944

# Check for "rental" videos

1945

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

1946

raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)

1947

1948

def _extract_filesize(media_url):

1949

return int_or_none(self._search_regex(

1950

r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

1951

1952

streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []

1953

streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])

1954

1955

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

1956

self.report_rtmp_download()

1957

formats = [{

1958

'format_id': '_rtmp',

1959

'protocol': 'rtmp',

1960

'url': video_info['conn'][0],

1961

'player_url': player_url,

1962

}]

1963

elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):

1964

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

1965

if 'rtmpe%3Dyes' in encoded_url_map:

1966

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)

1967

formats = []

1968

formats_spec = {}

1969

fmt_list = video_info.get('fmt_list', [''])[0]

1970

if fmt_list:

1971

for fmt in fmt_list.split(','):

1972

spec = fmt.split('/')

1973

if len(spec) > 1:

1974

width_height = spec[1].split('x')

1975

if len(width_height) == 2:

1976

formats_spec[spec[0]] = {

1977

'resolution': spec[1],

1978

'width': int_or_none(width_height[0]),

1979

'height': int_or_none(width_height[1]),

1980

}

1981

for fmt in streaming_formats:

1982

itag = str_or_none(fmt.get('itag'))

1983

if not itag:

1984

continue

1985

quality = fmt.get('quality')

1986

quality_label = fmt.get('qualityLabel') or quality

1987

formats_spec[itag] = {

1988

'asr': int_or_none(fmt.get('audioSampleRate')),

1989

'filesize': int_or_none(fmt.get('contentLength')),

1990

'format_note': quality_label,

1991

'fps': int_or_none(fmt.get('fps')),

1992

'height': int_or_none(fmt.get('height')),

1993

# bitrate for itag 43 is always 2147483647

1994

'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,

1995

'width': int_or_none(fmt.get('width')),

1996

}

1997

1998

for fmt in streaming_formats:

1999

if fmt.get('drmFamilies') or fmt.get('drm_families'):

2000

continue

2001

url = url_or_none(fmt.get('url'))

2002

2003

if not url:

2004

cipher = fmt.get('cipher') or fmt.get('signatureCipher')

2005

if not cipher:

2006

continue

2007

url_data = compat_parse_qs(cipher)

2008

url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))

if not url:

continue

else:

cipher = None

url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

2014

2015

stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))

2016

# Unsupported FORMAT_STREAM_TYPE_OTF

if stream_type == 3:

continue

format_id = fmt.get('itag') or url_data['itag'][0]

2021

if not format_id:

2022

continue

2023

format_id = compat_str(format_id)

2024

2025

if cipher:

2026

if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):

2027

ASSETS_RE = (

2028

r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',

2029

r'"jsUrl"\s*:\s*("[^"]+")',

2030

r'"assets":.+?"js":\s*("[^"]+")')

2031

jsplayer_url_json = self._search_regex(

2032

ASSETS_RE,

2033

embed_webpage if age_gate else video_webpage,

2034

'JS player URL (1)', default=None)

2035

if not jsplayer_url_json and not age_gate:

2036

# We need the embed website after all

2037

if embed_webpage is None:

2038

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

2039

embed_webpage = self._download_webpage(

2040

embed_url, video_id, 'Downloading embed webpage')

2041

jsplayer_url_json = self._search_regex(

2042

ASSETS_RE, embed_webpage, 'JS player URL')

2043

2044

player_url = json.loads(jsplayer_url_json)

2045

if player_url is None:

2046

player_url_json = self._search_regex(

2047

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

2048

video_webpage, 'age gate player URL')

2049

player_url = json.loads(player_url_json)

2050

2051

if 'sig' in url_data:

2052

url += '&signature=' + url_data['sig'][0]

2053

elif 's' in url_data:

2054

encrypted_sig = url_data['s'][0]

2055

2056

if self._downloader.params.get('verbose'):

2057

if player_url is None:

2058

player_desc = 'unknown'

2059

else:

2060

player_type, player_version = self._extract_player_info(player_url)

2061

player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)

2062

parts_sizes = self._signature_cache_id(encrypted_sig)

2063

self.to_screen('{%s} signature length %s, %s' %

2064

(format_id, parts_sizes, player_desc))

2065

2066

signature = self._decrypt_signature(

2067

encrypted_sig, video_id, player_url, age_gate)

2068

sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'

2069

url += '&%s=%s' % (sp, signature)

2070

if 'ratebypass' not in url:

2071

url += '&ratebypass=yes'

2072

2073

dct = {

2074

'format_id': format_id,

2075

'url': url,

2076

'player_url': player_url,

2077

}

2078

if format_id in self._formats:

2079

dct.update(self._formats[format_id])

2080

if format_id in formats_spec:

2081

dct.update(formats_spec[format_id])

2082

2083

# Some itags are not included in DASH manifest thus corresponding formats will

2084

# lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).

2085

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

2086

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

2087

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

2088

2089

if width is None:

2090

width = int_or_none(fmt.get('width'))

2091

if height is None:

2092

height = int_or_none(fmt.get('height'))

2093

2094

filesize = int_or_none(url_data.get(

2095

'clen', [None])[0]) or _extract_filesize(url)

2096

2097

quality = url_data.get('quality', [None])[0] or fmt.get('quality')

2098

quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')

2099

2100

tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)

2101

or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None

2102

fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

2103

2104

more_fields = {

2105

'filesize': filesize,

'tbr': tbr,

'width': width,

'height': height,

'fps': fps,

'format_note': quality_label or quality,

2111

}

2112

for key, value in more_fields.items():

2113

if value:

2114

dct[key] = value

2115

type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')

2116

if type_:

2117

type_split = type_.split(';')

2118

kind_ext = type_split[0].split('/')

2119

if len(kind_ext) == 2:

2120

kind, _ = kind_ext

2121

dct['ext'] = mimetype2ext(type_split[0])

2122

if kind in ('audio', 'video'):

2123

codecs = None

2124

for mobj in re.finditer(

2125

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

2126

if mobj.group('key') == 'codecs':

2127

codecs = mobj.group('val')

2128

break

2129

if codecs:

2130

dct.update(parse_codecs(codecs))

2131

if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':

2132

dct['downloader_options'] = {

2133

# Youtube throttles chunks >~10M

2134

'http_chunk_size': 10485760,

}

formats.append(dct)

else:

manifest_url = (

url_or_none(try_get(

player_response,

lambda x: x['streamingData']['hlsManifestUrl'],

2142

compat_str))

2143

or url_or_none(try_get(

2144

video_info, lambda x: x['hlsvp'][0], compat_str)))

2145

if manifest_url:

2146

formats = []

2147

m3u8_formats = self._extract_m3u8_formats(

2148

manifest_url, video_id, 'mp4', fatal=False)

2149

for a_format in m3u8_formats:

2150

itag = self._search_regex(

2151

r'/itag/(\d+)/', a_format['url'], 'itag', default=None)

2152

if itag:

2153

a_format['format_id'] = itag

2154

if itag in self._formats:

2155

dct = self._formats[itag].copy()

2156

dct.update(a_format)

2157

a_format = dct

2158

a_format['player_url'] = player_url

2159

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

2160

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

2161

if self._downloader.params.get('youtube_include_hls_manifest', True):

2162

formats.append(a_format)

2163

else:

2164

error_message = extract_unavailable_message()

2165

if not error_message:

2166

reason_list = try_get(

2167

player_response,

2168

lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],

2169

list) or []

2170

for reason in reason_list:

2171

if not isinstance(reason, dict):

2172

continue

2173

reason_text = try_get(reason, lambda x: x['text'], compat_str)

2174

if reason_text:

2175

if not error_message:

2176

error_message = ''

2177

error_message += reason_text

2178

if error_message:

2179

error_message = clean_html(error_message)

2180

if not error_message:

2181

error_message = clean_html(try_get(

2182

player_response, lambda x: x['playabilityStatus']['reason'],

2183

compat_str))

2184

if not error_message:

2185

error_message = clean_html(

2186

try_get(video_info, lambda x: x['reason'][0], compat_str))

2187

if error_message:

2188

raise ExtractorError(error_message, expected=True)

2189

raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')

2190

2191

# uploader

2192

video_uploader = try_get(

2193

video_info, lambda x: x['author'][0],

2194

compat_str) or str_or_none(video_details.get('author'))

2195

if video_uploader:

2196

video_uploader = compat_urllib_parse_unquote_plus(video_uploader)

2197

else:

2198

self._downloader.report_warning('unable to extract uploader name')

2199

2200

# uploader_id

2201

video_uploader_id = None

2202

video_uploader_url = None

2203

mobj = re.search(

2204

r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

2205

video_webpage)

2206

if mobj is not None:

2207

video_uploader_id = mobj.group('uploader_id')

2208

video_uploader_url = mobj.group('uploader_url')

2209

else:

2210

owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))

2211

if owner_profile_url:

2212

video_uploader_id = self._search_regex(

2213

r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',

2214

default=None)

2215

video_uploader_url = owner_profile_url

2216

2217

channel_id = (

2218

str_or_none(video_details.get('channelId'))

2219

or self._html_search_meta(

2220

'channelId', video_webpage, 'channel id', default=None)

2221

or self._search_regex(

2222

r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',

2223

video_webpage, 'channel id', default=None, group='id'))

2224

channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None

2225

2226

thumbnails = []

2227

thumbnails_list = try_get(

2228

video_details, lambda x: x['thumbnail']['thumbnails'], list) or []

2229

for t in thumbnails_list:

2230

if not isinstance(t, dict):

2231

continue

2232

thumbnail_url = url_or_none(t.get('url'))

2233

if not thumbnail_url:

2234

continue

2235

thumbnails.append({

2236

'url': thumbnail_url,

2237

'width': int_or_none(t.get('width')),

2238

'height': int_or_none(t.get('height')),

})

if not thumbnails:

video_thumbnail = None

2243

# We try first to get a high quality image:

2244

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

2245

video_webpage, re.DOTALL)

2246

if m_thumb is not None:

2247

video_thumbnail = m_thumb.group(1)

2248

thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)

2249

if thumbnail_url:

2250

video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)

2251

if video_thumbnail:

2252

thumbnails.append({'url': video_thumbnail})

2253

2254

# upload date

2255

upload_date = self._html_search_meta(

2256

'datePublished', video_webpage, 'upload date', default=None)

2257

if not upload_date:

2258

upload_date = self._search_regex(

2259

[r'(?s)id="eow-date.*?>(.*?)</span>',

2260

r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],

2261

video_webpage, 'upload date', default=None)

2262

if not upload_date:

2263

upload_date = microformat.get('publishDate') or microformat.get('uploadDate')

2264

upload_date = unified_strdate(upload_date)

2265

2266

video_license = self._html_search_regex(

2267

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

2268

video_webpage, 'license', default=None)

m_music = re.search(

r'''(?x)

<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*

<ul[^>]*>\s*

<li>(?P<title>.+?)

by (?P<creator>.+?)

(?:

$.+?$|

<a[^>]*

(?:

\bhref=["\']/red[^>]*>| # drop possible

2281

>\s*Listen ad-free with YouTube Red # YouTube Red ad

)

.*?

)?</li

''',

video_webpage)

if m_music:

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

2289

video_creator = clean_html(m_music.group('creator'))

2290

else:

2291

video_alt_title = video_creator = None

2292

2293

def extract_meta(field):

2294

return self._html_search_regex(

2295

r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,

2296

video_webpage, field, default=None)

2297

2298

track = extract_meta('Song')

2299

artist = extract_meta('Artist')

2300

album = extract_meta('Album')

2301

2302

# Youtube Music Auto-generated description

2303

release_date = release_year = None

2304

if video_description:

2305

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

2306

if mobj:

2307

if not track:

2308

track = mobj.group('track').strip()

2309

if not artist:

2310

artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))

2311

if not album:

2312

album = mobj.group('album'.strip())

2313

release_year = mobj.group('release_year')

2314

release_date = mobj.group('release_date')

2315

if release_date:

2316

release_date = release_date.replace('-', '')

2317

if not release_year:

2318

release_year = int(release_date[:4])

2319

if release_year:

2320

release_year = int(release_year)

2321

2322

yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)

2323

contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

2324

for content in contents:

2325

rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []

2326

multiple_songs = False

2327

for row in rows:

2328

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

2329

multiple_songs = True

2330

break

2331

for row in rows:

2332

mrr = row.get('metadataRowRenderer') or {}

2333

mrr_title = try_get(

2334

mrr, lambda x: x['title']['simpleText'], compat_str)

2335

mrr_contents = try_get(

2336

mrr, lambda x: x['contents'][0], dict) or {}

2337

mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)

2338

if not (mrr_title and mrr_contents_text):

2339

continue

2340

if mrr_title == 'License':

2341

video_license = mrr_contents_text

2342

elif not multiple_songs:

2343

if mrr_title == 'Album':

2344

album = mrr_contents_text

2345

elif mrr_title == 'Artist':

2346

artist = mrr_contents_text

2347

elif mrr_title == 'Song':

2348

track = mrr_contents_text

2349

2350

m_episode = re.search(

2351

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

2352

video_webpage)

2353

if m_episode:

2354

series = unescapeHTML(m_episode.group('series'))

2355

season_number = int(m_episode.group('season'))

2356

episode_number = int(m_episode.group('episode'))

2357

else:

2358

series = season_number = episode_number = None

2359

2360

m_cat_container = self._search_regex(

2361

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

2362

video_webpage, 'categories', default=None)

2363

category = None

2364

if m_cat_container:

2365

category = self._html_search_regex(

2366

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

default=None)

if not category:

category = try_get(

microformat, lambda x: x['category'], compat_str)

2371

video_categories = None if category is None else [category]

2372

2373

video_tags = [

2374

unescapeHTML(m.group('content'))

2375

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

2376

if not video_tags:

2377

video_tags = try_get(video_details, lambda x: x['keywords'], list)

2378

2379

def _extract_count(count_name):

2380

return str_to_int(self._search_regex(

2381

(r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),

2382

r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),

2383

video_webpage, count_name, default=None))

2384

2385

like_count = _extract_count('like')

2386

dislike_count = _extract_count('dislike')

2387

2388

if view_count is None:

2389

view_count = str_to_int(self._search_regex(

2390

r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,

2391

'view count', default=None))

2392

2393

average_rating = (

2394

float_or_none(video_details.get('averageRating'))

2395

or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))

2396

2397

# subtitles

2398

video_subtitles = self.extract_subtitles(

2399

video_id, video_webpage, has_live_chat_replay)

2400

automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)

2401

2402

video_duration = try_get(

2403

video_info, lambda x: int_or_none(x['length_seconds'][0]))

2404

if not video_duration:

2405

video_duration = int_or_none(video_details.get('lengthSeconds'))

2406

if not video_duration:

2407

video_duration = parse_duration(self._html_search_meta(

2408

'duration', video_webpage, 'video duration'))

2409

2410

# Get Subscriber Count of channel

2411

subscriber_count = parse_count(self._search_regex(

2412

r'"text":"([\d\.]+\w?) subscribers"',

video_webpage,

'subscriber count',

default=None

))

# get xsrf for annotations or comments

2419

get_annotations = self._downloader.params.get('writeannotations', False)

2420

get_comments = self._downloader.params.get('getcomments', False)

2421

if get_annotations or get_comments:

2422

xsrf_token = None

2423

ytcfg = self._extract_ytcfg(video_id, video_webpage)

2424

if ytcfg:

2425

xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)

2426

if not xsrf_token:

2427

xsrf_token = self._search_regex(

2428

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',

2429

video_webpage, 'xsrf token', group='xsrf_token', fatal=False)

2430

2431

# annotations

2432

video_annotations = None

2433

if get_annotations:

2434

invideo_url = try_get(

2435

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

2436

if xsrf_token and invideo_url:

2437

xsrf_field_name = None

2438

if ytcfg:

2439

xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)

2440

if not xsrf_field_name:

2441

xsrf_field_name = self._search_regex(

2442

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

2443

video_webpage, 'xsrf field name',

2444

group='xsrf_field_name', default='session_token')

2445

video_annotations = self._download_webpage(

2446

self._proto_relative_url(invideo_url),

2447

video_id, note='Downloading annotations',

2448

errnote='Unable to download video annotations', fatal=False,

2449

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

2450

2451

chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)

2452

2453

# Get comments

2454

# TODO: Refactor and move to seperate function

2455

if get_comments:

2456

expected_video_comment_count = 0

2457

video_comments = []

2458

2459

def find_value(html, key, num_chars=2, separator='"'):

2460

pos_begin = html.find(key) + len(key) + num_chars

2461

pos_end = html.find(separator, pos_begin)

2462

return html[pos_begin: pos_end]

2463

2464

def search_dict(partial, key):

2465

if isinstance(partial, dict):

2466

for k, v in partial.items():

if k == key:

yield v

else:

for o in search_dict(v, key):

2471

yield o

2472

elif isinstance(partial, list):

2473

for i in partial:

2474

for o in search_dict(i, key):

yield o

try:

ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))

2479

continuations = [ncd['continuation']]

2480

# Handle videos where comments have been disabled entirely

2481

except StopIteration:

2482

continuations = []

2483

2484

def get_continuation(continuation, session_token, replies=False):

2485

query = {

2486

'pbj': 1,

2487

'ctoken': continuation,

2488

}

2489

if replies:

2490

query['action_get_comment_replies'] = 1

2491

else:

2492

query['action_get_comments'] = 1

2493

2494

while True:

2495

content, handle = self._download_webpage_handle(

2496

'https://www.youtube.com/comment_service_ajax',

2497

video_id,

2498

note=False,

2499

expected_status=[413],

2500

data=urlencode_postdata({

2501

'session_token': session_token

}),

query=query,

headers={

'Accept': '*/*',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',

2507

'X-YouTube-Client-Name': '1',

2508

'X-YouTube-Client-Version': '2.20201202.06.01'

}

)

response_code = handle.getcode()

2513

if (response_code == 200):

2514

return self._parse_json(content, video_id)

2515

if (response_code == 413):

2516

return None

2517

raise ExtractorError('Unexpected HTTP error code: %s' % response_code)

2518

2519

first_continuation = True

2520

while continuations:

2521

continuation, itct = continuations.pop()

2522

comment_response = get_continuation(continuation, xsrf_token)

2523

if not comment_response:

2524

continue

2525

if list(search_dict(comment_response, 'externalErrorMessage')):

2526

raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))

2527

2528

if 'continuationContents' not in comment_response['response']:

2529

# Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)

2530

continue

2531

# not sure if this actually helps

2532

if 'xsrf_token' in comment_response:

2533

xsrf_token = comment_response['xsrf_token']

2534

2535

item_section = comment_response['response']['continuationContents']['itemSectionContinuation']

2536

if first_continuation:

2537

expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))

2538

first_continuation = False

2539

if 'contents' not in item_section:

2540

# continuation returned no comments?

2541

# set an empty array as to not break the for loop

2542

item_section['contents'] = []

2543

2544

for meta_comment in item_section['contents']:

2545

comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']

2546

video_comments.append({

2547

'id': comment['commentId'],

2548

'text': ''.join([c['text'] for c in comment['contentText']['runs']]),

2549

'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),

2550

'author': comment.get('authorText', {}).get('simpleText', ''),

2551

'votes': comment.get('voteCount', {}).get('simpleText', '0'),

2552

'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],

2553

'parent': 'root'

2554

})

2555

if 'replies' not in meta_comment['commentThreadRenderer']:

2556

continue

2557

2558

reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]

2559

while reply_continuations:

2560

time.sleep(1)

2561

continuation = reply_continuations.pop()

2562

replies_data = get_continuation(continuation, xsrf_token, True)

2563

if not replies_data or 'continuationContents' not in replies_data[1]['response']:

2564

continue

2565

2566

if self._downloader.params.get('verbose', False):

2567

self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))

2568

reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']

2569

for reply_meta in replies_data[1]['response']['continuationContents']['commentRepliesContinuation']['contents']:

2570

reply_comment = reply_meta['commentRenderer']

2571

video_comments.append({

2572

'id': reply_comment['commentId'],

2573

'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),

2574

'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),

2575

'author': reply_comment.get('authorText', {}).get('simpleText', ''),

2576

'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),

2577

'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],

2578

'parent': comment['commentId']

2579

})

2580

if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:

2581

continue

2582

2583

reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]

2584

2585

self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))

2586

2587

if 'continuations' in item_section:

2588

continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]

2589

time.sleep(1)

2590

2591

self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))

2592

else:

2593

expected_video_comment_count = None

2594

video_comments = None

2595

2596

# Look for the DASH manifest

2597

if self._downloader.params.get('youtube_include_dash_manifest', True):

2598

dash_mpd_fatal = True

2599

for mpd_url in dash_mpds:

2600

dash_formats = {}

2601

try:

2602

def decrypt_sig(mobj):

2603

s = mobj.group(1)

2604

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

2605

return '/signature/%s' % dec_s

2606

2607

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

2608

2609

for df in self._extract_mpd_formats(

2610

mpd_url, video_id, fatal=dash_mpd_fatal,

2611

formats_dict=self._formats):

2612

if not df.get('filesize'):

2613

df['filesize'] = _extract_filesize(df['url'])

2614

# Do not overwrite DASH format found in some previous DASH manifest

2615

if df['format_id'] not in dash_formats:

2616

dash_formats[df['format_id']] = df

2617

# Additional DASH manifests may end up in HTTP Error 403 therefore

2618

# allow them to fail without bug report message if we already have

2619

# some DASH manifest succeeded. This is temporary workaround to reduce

2620

# burst of bug reports until we figure out the reason and whether it

2621

# can be fixed at all.

2622

dash_mpd_fatal = False

2623

except (ExtractorError, KeyError) as e:

2624

self.report_warning(

2625

'Skipping DASH manifest: %r' % e, video_id)

2626

if dash_formats:

2627

# Remove the formats we found through non-DASH, they

2628

# contain less info and it can be wrong, because we use

2629

# fixed values (for example the resolution). See

2630

# https://github.com/ytdl-org/youtube-dl/issues/5774 for an

2631

# example.

2632

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

2633

formats.extend(dash_formats.values())

2634

2635

# Check for malformed aspect ratio

2636

stretched_m = re.search(

2637

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

2638

video_webpage)

2639

if stretched_m:

2640

w = float(stretched_m.group('w'))

2641

h = float(stretched_m.group('h'))

2642

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

2643

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2648

f['stretched_ratio'] = ratio

2649

2650

if not formats:

2651

if 'reason' in video_info:

2652

if 'The uploader has not made this video available in your country.' in video_info['reason']:

2653

regions_allowed = self._html_search_meta(

2654

'regionsAllowed', video_webpage, default=None)

2655

countries = regions_allowed.split(',') if regions_allowed else None

2656

self.raise_geo_restricted(

2657

msg=video_info['reason'][0], countries=countries)

2658

reason = video_info['reason'][0]

2659

if 'Invalid parameters' in reason:

2660

unavailable_message = extract_unavailable_message()

2661

if unavailable_message:

2662

reason = unavailable_message

2663

raise ExtractorError(

2664

'YouTube said: %s' % reason,

2665

expected=True, video_id=video_id)

2666

if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):

2667

raise ExtractorError('This video is DRM protected.', expected=True)

2668

2669

self._sort_formats(formats)

2670

2671

self.mark_watched(video_id, video_info, player_response)

return {

'id': video_id,

'uploader': video_uploader,

2676

'uploader_id': video_uploader_id,

2677

'uploader_url': video_uploader_url,

2678

'channel_id': channel_id,

2679

'channel_url': channel_url,

2680

'upload_date': upload_date,

2681

'license': video_license,

2682

'creator': video_creator or artist,

2683

'title': video_title,

2684

'alt_title': video_alt_title or track,

2685

'thumbnails': thumbnails,

2686

'description': video_description,

2687

'categories': video_categories,

2688

'tags': video_tags,

2689

'subtitles': video_subtitles,

2690

'automatic_captions': automatic_captions,

2691

'duration': video_duration,

2692

'age_limit': 18 if age_gate else 0,

2693

'annotations': video_annotations,

2694

'chapters': chapters,

2695

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

2696

'view_count': view_count,

2697

'like_count': like_count,

2698

'dislike_count': dislike_count,

2699

'average_rating': average_rating,

2700

'formats': formats,

2701

'is_live': is_live,

2702

'start_time': start_time,

2703

'end_time': end_time,

2704

'series': series,

2705

'season_number': season_number,

2706

'episode_number': episode_number,

'track': track,

'artist': artist,

'album': album,

'release_date': release_date,

2711

'release_year': release_year,

2712

'subscriber_count': subscriber_count,

2713

'playable_in_embed': playable_in_embed,

2714

'comments': video_comments,

2715

'comment_count': expected_video_comment_count,

}

class YoutubeTabIE(YoutubeBaseInfoExtractor):

2720

IE_DESC = 'YouTube.com tab'

2721

_VALID_URL = r'''(?x)

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

invidio\.us

)/

(?:

(?:channel|c|user)/|

(?P<not_channel>

feed/|

(?:playlist|watch)\?.*?\blist=

2733

)|

2734

(?!(?:%s)\b) # Direct URLs

2735

)

2736

(?P<id>[^/?\#&]+)

2737

''' % YoutubeBaseInfoExtractor._RESERVED_NAMES

2738

IE_NAME = 'youtube:tab'

2739

2740

_TESTS = [{

2741

# playlists, multipage

2742

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

2743

'playlist_mincount': 94,

2744

'info_dict': {

2745

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2746

'title': 'Игорь Клейнер - Playlists',

2747

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2748

},

2749

}, {

2750

# playlists, multipage, different order

2751

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

2752

'playlist_mincount': 94,

2753

'info_dict': {

2754

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2755

'title': 'Игорь Клейнер - Playlists',

2756

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2757

},

2758

}, {

2759

# playlists, singlepage

2760

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

2761

'playlist_mincount': 4,

2762

'info_dict': {

2763

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

2764

'title': 'ThirstForScience - Playlists',

2765

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

2766

}

2767

}, {

2768

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

2769

'only_matching': True,

2770

}, {

2771

# basic, single video playlist

2772

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2773

'info_dict': {

2774

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2775

'uploader': 'Sergey M.',

2776

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2777

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

# empty playlist

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2783

'info_dict': {

2784

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2785

'uploader': 'Sergey M.',

2786

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2787

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

# Home tab

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

2793

'info_dict': {

2794

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2795

'title': 'lex will - Home',

2796

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2797

},

2798

'playlist_mincount': 2,

2799

}, {

2800

# Videos tab

2801

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

2802

'info_dict': {

2803

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2804

'title': 'lex will - Videos',

2805

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2806

},

2807

'playlist_mincount': 975,

2808

}, {

2809

# Videos tab, sorted by popular

2810

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

2811

'info_dict': {

2812

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2813

'title': 'lex will - Videos',

2814

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2815

},

2816

'playlist_mincount': 199,

2817

}, {

2818

# Playlists tab

2819

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

2820

'info_dict': {

2821

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2822

'title': 'lex will - Playlists',

2823

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2824

},

2825

'playlist_mincount': 17,

2826

}, {

2827

# Community tab

2828

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

2829

'info_dict': {

2830

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2831

'title': 'lex will - Community',

2832

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2833

},

2834

'playlist_mincount': 18,

2835

}, {

2836

# Channels tab

2837

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

2838

'info_dict': {

2839

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2840

'title': 'lex will - Channels',

2841

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2842

},

2843

'playlist_mincount': 138,

2844

}, {

2845

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2846

'only_matching': True,

2847

}, {

2848

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2849

'only_matching': True,

2850

}, {

2851

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2852

'only_matching': True,

2853

}, {

2854

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

2855

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2856

'info_dict': {

2857

'title': '29C3: Not my department',

2858

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2859

'uploader': 'Christiaan008',

2860

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

2861

},

2862

'playlist_count': 96,

2863

}, {

2864

'note': 'Large playlist',

2865

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

2866

'info_dict': {

2867

'title': 'Uploads from Cauchemar',

2868

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

2869

'uploader': 'Cauchemar',

2870

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

2871

},

2872

'playlist_mincount': 1123,

2873

}, {

2874

# even larger playlist, 8832 videos

2875

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

2876

'only_matching': True,

2877

}, {

2878

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

2879

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

2880

'info_dict': {

2881

'title': 'Uploads from Interstellar Movie',

2882

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

2883

'uploader': 'Interstellar Movie',

2884

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

2885

},

2886

'playlist_mincount': 21,

2887

}, {

2888

# https://github.com/ytdl-org/youtube-dl/issues/21844

2889

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2890

'info_dict': {

2891

'title': 'Data Analysis with Dr Mike Pound',

2892

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2893

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

2894

'uploader': 'Computerphile',

2895

},

2896

'playlist_mincount': 11,

2897

}, {

2898

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2899

'only_matching': True,

2900

}, {

2901

# Playlist URL that does not actually serve a playlist

2902

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

2907

'uploader': 'STREEM',

2908

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

2909

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

2910

'upload_date': '20150526',

2911

'license': 'Standard YouTube License',

2912

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

2913

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

2918

},

2919

'params': {

2920

'skip_download': True,

2921

},

2922

'skip': 'This video is not available.',

2923

'add_ie': [YoutubeIE.ie_key()],

2924

}, {

2925

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

2926

'only_matching': True,

2927

}, {

2928

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

2929

'only_matching': True,

2930

}, {

2931

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

'info_dict': {

'id': '9Auq9mYxFEE',

'ext': 'mp4',

'title': 'Watch Sky News live',

2936

'uploader': 'Sky News',

2937

'uploader_id': 'skynews',

2938

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

2939

'upload_date': '20191102',

2940

'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',

2941

'categories': ['News & Politics'],

2942

'tags': list,

2943

'like_count': int,

2944

'dislike_count': int,

2945

},

2946

'params': {

2947

'skip_download': True,

2948

},

2949

}, {

2950

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

2955

'uploader': 'The Young Turks',

2956

'uploader_id': 'TheYoungTurks',

2957

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

2958

'upload_date': '20150715',

2959

'license': 'Standard YouTube License',

2960

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

2961

'categories': ['News & Politics'],

2962

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

2963

'like_count': int,

2964

'dislike_count': int,

2965

},

2966

'params': {

2967

'skip_download': True,

2968

},

2969

'only_matching': True,

2970

}, {

2971

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

2972

'only_matching': True,

2973

}, {

2974

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

2975

'only_matching': True,

2976

}, {

2977

'url': 'https://www.youtube.com/feed/trending',

2978

'only_matching': True,

2979

}, {

2980

# needs auth

2981

'url': 'https://www.youtube.com/feed/library',

2982

'only_matching': True,

2983

}, {

2984

# needs auth

2985

'url': 'https://www.youtube.com/feed/history',

2986

'only_matching': True,

2987

}, {

2988

# needs auth

2989

'url': 'https://www.youtube.com/feed/subscriptions',

2990

'only_matching': True,

2991

}, {

2992

# needs auth

2993

'url': 'https://www.youtube.com/feed/watch_later',

2994

'only_matching': True,

2995

}, {

2996

# no longer available?

2997

'url': 'https://www.youtube.com/feed/recommended',

2998

'only_matching': True,

2999

}, {

3000

# inline playlist with not always working continuations

3001

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

3002

'only_matching': True,

3003

}, {

3004

'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',

3005

'only_matching': True,

3006

}, {

3007

'url': 'https://www.youtube.com/course',

3008

'only_matching': True,

3009

}, {

3010

'url': 'https://www.youtube.com/zsecurity',

3011

'only_matching': True,

3012

}, {

3013

'url': 'http://www.youtube.com/NASAgovVideo/videos',

3014

'only_matching': True,

3015

}, {

3016

'url': 'https://www.youtube.com/TheYoungTurks/live',

3017

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3022

return False if YoutubeIE.suitable(url) else super(

3023

YoutubeTabIE, cls).suitable(url)

3024

3025

def _extract_channel_id(self, webpage):

3026

channel_id = self._html_search_meta(

3027

'channelId', webpage, 'channel id', default=None)

3028

if channel_id:

3029

return channel_id

3030

channel_url = self._html_search_meta(

3031

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

3032

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

3033

'twitter:app:url:googleplay'), webpage, 'channel url')

3034

return self._search_regex(

3035

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

3036

channel_url, 'channel id')

3037

3038

@staticmethod

3039

def _extract_grid_item_renderer(item):

3040

for item_kind in ('Playlist', 'Video', 'Channel'):

3041

renderer = item.get('grid%sRenderer' % item_kind)

if renderer:

return renderer

def _grid_entries(self, grid_renderer):

3046

for item in grid_renderer['items']:

3047

if not isinstance(item, dict):

3048

continue

3049

renderer = self._extract_grid_item_renderer(item)

3050

if not isinstance(renderer, dict):

3051

continue

3052

title = try_get(

3053

renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

3054

# playlist

3055

playlist_id = renderer.get('playlistId')

3056

if playlist_id:

3057

yield self.url_result(

3058

'https://www.youtube.com/playlist?list=%s' % playlist_id,

3059

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

3060

video_title=title)

3061

# video

3062

video_id = renderer.get('videoId')

3063

if video_id:

3064

yield self._extract_video(renderer)

3065

# channel

3066

channel_id = renderer.get('channelId')

3067

if channel_id:

3068

title = try_get(

3069

renderer, lambda x: x['title']['simpleText'], compat_str)

3070

yield self.url_result(

3071

'https://www.youtube.com/channel/%s' % channel_id,

3072

ie=YoutubeTabIE.ie_key(), video_title=title)

3073

3074

def _shelf_entries_from_content(self, shelf_renderer):

3075

content = shelf_renderer.get('content')

3076

if not isinstance(content, dict):

3077

return

3078

renderer = content.get('gridRenderer')

3079

if renderer:

3080

# TODO: add support for nested playlists so each shelf is processed

3081

# as separate playlist

3082

# TODO: this includes only first N items

3083

for entry in self._grid_entries(renderer):

3084

yield entry

3085

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

3091

ep = try_get(

3092

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3093

compat_str)

3094

shelf_url = urljoin('https://www.youtube.com', ep)

3095

if shelf_url:

3096

# Skipping links to another channels, note that checking for

3097

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

3098

# will not work

3099

if skip_channels and '/channels?' in shelf_url:

3100

return

3101

title = try_get(

3102

shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

3103

yield self.url_result(shelf_url, video_title=title)

3104

# Shelf may not contain shelf URL, fallback to extraction from content

3105

for entry in self._shelf_entries_from_content(shelf_renderer):

3106

yield entry

3107

3108

def _playlist_entries(self, video_list_renderer):

3109

for content in video_list_renderer['contents']:

3110

if not isinstance(content, dict):

3111

continue

3112

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

3113

if not isinstance(renderer, dict):

3114

continue

3115

video_id = renderer.get('videoId')

3116

if not video_id:

3117

continue

3118

yield self._extract_video(renderer)

3119

3120

r""" # Not needed in the new implementation

3121

def _itemSection_entries(self, item_sect_renderer):

3122

for content in item_sect_renderer['contents']:

3123

if not isinstance(content, dict):

3124

continue

3125

renderer = content.get('videoRenderer', {})

3126

if not isinstance(renderer, dict):

3127

continue

3128

video_id = renderer.get('videoId')

3129

if not video_id:

3130

continue

3131

yield self._extract_video(renderer)

3132

"""

3133

3134

def _rich_entries(self, rich_grid_renderer):

3135

renderer = try_get(

3136

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

3137

video_id = renderer.get('videoId')

3138

if not video_id:

3139

return

3140

yield self._extract_video(renderer)

3141

3142

def _video_entry(self, video_renderer):

3143

video_id = video_renderer.get('videoId')

3144

if video_id:

3145

return self._extract_video(video_renderer)

3146

3147

def _post_thread_entries(self, post_thread_renderer):

3148

post_renderer = try_get(

3149

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3150

if not post_renderer:

3151

return

3152

# video attachment

3153

video_renderer = try_get(

3154

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)

3155

video_id = None

3156

if video_renderer:

3157

entry = self._video_entry(video_renderer)

if entry:

yield entry

# inline video links

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3162

for run in runs:

3163

if not isinstance(run, dict):

3164

continue

3165

ep_url = try_get(

3166

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3167

if not ep_url:

3168

continue

3169

if not YoutubeIE.suitable(ep_url):

3170

continue

3171

ep_video_id = YoutubeIE._match_id(ep_url)

3172

if video_id == ep_video_id:

3173

continue

3174

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)

3175

3176

def _post_thread_continuation_entries(self, post_thread_continuation):

3177

contents = post_thread_continuation.get('contents')

3178

if not isinstance(contents, list):

3179

return

3180

for content in contents:

3181

renderer = content.get('backstagePostThreadRenderer')

3182

if not isinstance(renderer, dict):

3183

continue

3184

for entry in self._post_thread_entries(renderer):

yield entry

@staticmethod

def _build_continuation_query(continuation, ctp=None):

3189

query = {

3190

'ctoken': continuation,

3191

'continuation': continuation,

}

if ctp:

query['itct'] = ctp

return query

@staticmethod

def _extract_next_continuation_data(renderer):

3199

next_continuation = try_get(

3200

renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)

3201

if not next_continuation:

3202

return

3203

continuation = next_continuation.get('continuation')

3204

if not continuation:

3205

return

3206

ctp = next_continuation.get('clickTrackingParams')

3207

return YoutubeTabIE._build_continuation_query(continuation, ctp)

3208

3209

@classmethod

3210

def _extract_continuation(cls, renderer):

3211

next_continuation = cls._extract_next_continuation_data(renderer)

3212

if next_continuation:

3213

return next_continuation

3214

contents = renderer.get('contents')

3215

if not isinstance(contents, list):

3216

return

3217

for content in contents:

3218

if not isinstance(content, dict):

3219

continue

3220

continuation_ep = try_get(

3221

content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],

3222

dict)

3223

if not continuation_ep:

3224

continue

3225

continuation = try_get(

3226

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

3227

if not continuation:

3228

continue

3229

ctp = continuation_ep.get('clickTrackingParams')

3230

return YoutubeTabIE._build_continuation_query(continuation, ctp)

3231

3232

def _entries(self, tab, identity_token):

3233

3234

def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds

3235

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3236

for content in contents:

3237

if not isinstance(content, dict):

3238

continue

3239

is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)

3240

if not is_renderer:

3241

renderer = content.get('richItemRenderer')

3242

if renderer:

3243

for entry in self._rich_entries(renderer):

3244

yield entry

3245

continuation_list[0] = self._extract_continuation(parent_renderer)

3246

continue

3247

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3248

for isr_content in isr_contents:

3249

if not isinstance(isr_content, dict):

3250

continue

3251

renderer = isr_content.get('playlistVideoListRenderer')

3252

if renderer:

3253

for entry in self._playlist_entries(renderer):

3254

yield entry

3255

continuation_list[0] = self._extract_continuation(renderer)

3256

continue

3257

renderer = isr_content.get('gridRenderer')

3258

if renderer:

3259

for entry in self._grid_entries(renderer):

3260

yield entry

3261

continuation_list[0] = self._extract_continuation(renderer)

3262

continue

3263

renderer = isr_content.get('shelfRenderer')

3264

if renderer:

3265

is_channels_tab = tab.get('title') == 'Channels'

3266

for entry in self._shelf_entries(renderer, not is_channels_tab):

3267

yield entry

3268

continue

3269

renderer = isr_content.get('backstagePostThreadRenderer')

3270

if renderer:

3271

for entry in self._post_thread_entries(renderer):

3272

yield entry

3273

continuation_list[0] = self._extract_continuation(renderer)

3274

continue

3275

renderer = isr_content.get('videoRenderer')

3276

if renderer:

3277

entry = self._video_entry(renderer)

if entry:

yield entry

if not continuation_list[0]:

3282

continuation_list[0] = self._extract_continuation(is_renderer)

3283

3284

if not continuation_list[0]:

3285

continuation_list[0] = self._extract_continuation(parent_renderer)

3286

3287

continuation_list = [None] # Python 2 doesnot support nonlocal

3288

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3293

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3294

for entry in extract_entries(parent_renderer):

3295

yield entry

3296

continuation = continuation_list[0]

3297

3298

headers = {

3299

'x-youtube-client-name': '1',

3300

'x-youtube-client-version': '2.20201112.04.01',

3301

}

3302

if identity_token:

3303

headers['x-youtube-identity-token'] = identity_token

3304

3305

for page_num in itertools.count(1):

if not continuation:

break

count = 0

retries = 3

while count <= retries:

3311

try:

3312

# Downloading page may result in intermittent 5xx HTTP error

3313

# that is usually worked around with a retry

3314

browse = self._download_json(

3315

'https://www.youtube.com/browse_ajax', None,

3316

'Downloading page %d%s'

3317

% (page_num, ' (retry #%d)' % count if count else ''),

3318

headers=headers, query=continuation)

3319

break

3320

except ExtractorError as e:

3321

if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):

count += 1

if count <= retries:

continue

raise

if not browse:

break

response = try_get(browse, lambda x: x[1]['response'], dict)

if not response:

break

continuation_contents = try_get(

3333

response, lambda x: x['continuationContents'], dict)

3334

if continuation_contents:

3335

continuation_renderer = continuation_contents.get('playlistVideoListContinuation')

3336

if continuation_renderer:

3337

for entry in self._playlist_entries(continuation_renderer):

3338

yield entry

3339

continuation = self._extract_continuation(continuation_renderer)

3340

continue

3341

continuation_renderer = continuation_contents.get('gridContinuation')

3342

if continuation_renderer:

3343

for entry in self._grid_entries(continuation_renderer):

3344

yield entry

3345

continuation = self._extract_continuation(continuation_renderer)

3346

continue

3347

continuation_renderer = continuation_contents.get('itemSectionContinuation')

3348

if continuation_renderer:

3349

for entry in self._post_thread_continuation_entries(continuation_renderer):

3350

yield entry

3351

continuation = self._extract_continuation(continuation_renderer)

3352

continue

3353

continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds

3354

if continuation_renderer:

3355

continuation_list = [None]

3356

for entry in extract_entries(continuation_renderer):

3357

yield entry

3358

continuation = continuation_list[0]

3359

continue

3360

3361

continuation_items = try_get(

3362

response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)

3363

if continuation_items:

3364

continuation_item = continuation_items[0]

3365

if not isinstance(continuation_item, dict):

3366

continue

3367

renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')

3368

if renderer:

3369

video_list_renderer = {'contents': continuation_items}

3370

for entry in self._playlist_entries(video_list_renderer):

3371

yield entry

3372

continuation = self._extract_continuation(video_list_renderer)

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3378

for tab in tabs:

3379

if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):

3380

return tab['tabRenderer']

3381

else:

3382

raise ExtractorError('Unable to find selected tab')

3383

3384

@staticmethod

3385

def _extract_uploader(data):

3386

uploader = {}

3387

sidebar_renderer = try_get(

3388

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)

3389

if sidebar_renderer:

3390

for item in sidebar_renderer:

3391

if not isinstance(item, dict):

3392

continue

3393

renderer = item.get('playlistSidebarSecondaryInfoRenderer')

3394

if not isinstance(renderer, dict):

3395

continue

3396

owner = try_get(

3397

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3398

if owner:

3399

uploader['uploader'] = owner.get('text')

3400

uploader['uploader_id'] = try_get(

3401

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3402

uploader['uploader_url'] = urljoin(

3403

'https://www.youtube.com/',

3404

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3405

return uploader

3406

3407

def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):

3408

selected_tab = self._extract_selected_tab(tabs)

3409

renderer = try_get(

3410

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3411

playlist_id = title = description = None

3412

if renderer:

3413

channel_title = renderer.get('title') or item_id

3414

tab_title = selected_tab.get('title')

3415

title = channel_title or item_id

3416

if tab_title:

3417

title += ' - %s' % tab_title

3418

description = renderer.get('description')

3419

playlist_id = renderer.get('externalId')

3420

3421

# this has thumbnails, but there is currently no thumbnail field for playlists

3422

# sidebar.playlistSidebarRenderer has even more data, but its stucture is more complec

3423

renderer = try_get(

3424

data, lambda x: x['microformat']['microformatDataRenderer'], dict)

3425

if not renderer:

3426

renderer = try_get(

3427

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3428

if renderer:

3429

title = renderer.get('title')

3430

description = renderer.get('description')

3431

playlist_id = item_id

3432

3433

if playlist_id is None:

3434

playlist_id = item_id

3435

if title is None:

3436

title = "Youtube " + playlist_id.title()

3437

playlist = self.playlist_result(

3438

self._entries(selected_tab, identity_token),

3439

playlist_id=playlist_id, playlist_title=title,

3440

playlist_description=description)

3441

playlist.update(self._extract_uploader(data))

3442

return playlist

3443

3444

def _extract_from_playlist(self, item_id, url, data, playlist):

3445

title = playlist.get('title') or try_get(

3446

data, lambda x: x['titleText']['simpleText'], compat_str)

3447

playlist_id = playlist.get('playlistId') or item_id

3448

# Inline playlist rendition continuation does not always work

3449

# at Youtube side, so delegating regular tab-based playlist URL

3450

# processing whenever possible.

3451

playlist_url = urljoin(url, try_get(

3452

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3453

compat_str))

3454

if playlist_url and playlist_url != url:

3455

return self.url_result(

3456

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

3457

video_title=title)

3458

return self.playlist_result(

3459

self._playlist_entries(playlist), playlist_id=playlist_id,

3460

playlist_title=title)

3461

3462

@staticmethod

3463

def _extract_alerts(data):

3464

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

3465

if not isinstance(alert_dict, dict):

3466

continue

3467

for renderer in alert_dict:

3468

alert = alert_dict[renderer]

3469

alert_type = alert.get('type')

3470

if not alert_type:

3471

continue

3472

message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)

3473

if message:

3474

yield alert_type, message

3475

for run in try_get(alert, lambda x: x['text']['runs'], list) or []:

3476

message = try_get(run, lambda x: x['text'], compat_str)

3477

if message:

3478

yield alert_type, message

3479

3480

def _extract_identity_token(self, webpage, item_id):

3481

ytcfg = self._extract_ytcfg(item_id, webpage)

3482

if ytcfg:

3483

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

3484

if token:

3485

return token

3486

return self._search_regex(

3487

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

3488

'identity token', default=None)

3489

3490

def _real_extract(self, url):

3491

item_id = self._match_id(url)

3492

url = compat_urlparse.urlunparse(

3493

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

3494

is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)

3495

if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':

3496

self._downloader.report_warning(

3497

'A channel/user page was given. All the channel\'s videos will be downloaded. '

3498

'To download only the videos in the home page, add a "/featured" to the URL')

3499

url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')

3500

3501

# Handle both video/playlist URLs

3502

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3503

video_id = qs.get('v', [None])[0]

3504

playlist_id = qs.get('list', [None])[0]

3505

3506

if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:

3507

if playlist_id:

3508

self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))

3509

url = 'https://www.youtube.com/playlist?list=%s' % playlist_id

3510

# return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())

3511

else:

3512

raise ExtractorError('Unable to recognize tab page')

3513

if video_id and playlist_id:

3514

if self._downloader.params.get('noplaylist'):

3515

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

3516

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3517

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

3518

3519

webpage = self._download_webpage(url, item_id)

3520

identity_token = self._extract_identity_token(webpage, item_id)

3521

data = self._extract_yt_initial_data(item_id, webpage)

3522

err_msg = None

3523

for alert_type, alert_message in self._extract_alerts(data):

3524

if alert_type.lower() == 'error':

3525

if err_msg:

3526

self._downloader.report_warning('YouTube said: %s - %s' % ('ERROR', err_msg))

3527

err_msg = alert_message

3528

else:

3529

self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))

3530

if err_msg:

3531

raise ExtractorError('YouTube said: %s' % err_msg, expected=True)

3532

tabs = try_get(

3533

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

3534

if tabs:

3535

return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)

3536

playlist = try_get(

3537

data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

3538

if playlist:

3539

return self._extract_from_playlist(item_id, url, data, playlist)

3540

# Fallback to video extraction if no playlist alike page is recognized.

3541

# First check for the current video then try the v attribute of URL query.

3542

video_id = try_get(

3543

data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],

3544

compat_str) or video_id

3545

if video_id:

3546

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3547

# Failed to recognize

3548

raise ExtractorError('Unable to recognize tab page')

3549

3550

3551

class YoutubePlaylistIE(InfoExtractor):

3552

IE_DESC = 'YouTube.com playlists'

3553

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

3564

)''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

3565

IE_NAME = 'youtube:playlist'

3566

_TESTS = [{

3567

'note': 'issue #673',

3568

'url': 'PLBB231211A4F62143',

3569

'info_dict': {

3570

'title': '[OLD]Team Fortress 2 (Class-based LP)',

3571

'id': 'PLBB231211A4F62143',

3572

'uploader': 'Wickydoo',

3573

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

3574

},

3575

'playlist_mincount': 29,

3576

}, {

3577

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3578

'info_dict': {

3579

'title': 'YDL_safe_search',

3580

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3581

},

3582

'playlist_count': 2,

3583

'skip': 'This playlist is private',

3584

}, {

3585

'note': 'embedded',

3586

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

3591

'uploader': 'milan',

3592

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

3593

}

3594

}, {

3595

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3596

'playlist_mincount': 982,

3597

'info_dict': {

3598

'title': '2018 Chinese New Singles (11/6 updated)',

3599

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3600

'uploader': 'LBK',

3601

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

3602

}

3603

}, {

3604

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

3605

'only_matching': True,

3606

}, {

3607

# music album playlist

3608

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

3609

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3614

return False if YoutubeTabIE.suitable(url) else super(

3615

YoutubePlaylistIE, cls).suitable(url)

3616

3617

def _real_extract(self, url):

3618

playlist_id = self._match_id(url)

3619

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3620

if not qs:

3621

qs = {'list': playlist_id}

3622

return self.url_result(

3623

update_url_query('https://www.youtube.com/playlist', qs),

3624

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3625

3626

3627

class YoutubeYtBeIE(InfoExtractor):

3628

IE_DESC = 'youtu.be'

3629

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

3630

_TESTS = [{

3631

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

3636

'uploader': 'Backus-Page House Museum',

3637

'uploader_id': 'backuspagemuseum',

3638

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

3639

'upload_date': '20161008',

3640

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

3641

'categories': ['Nonprofits & Activism'],

3642

'tags': list,

3643

'like_count': int,

3644

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

3649

},

3650

}, {

3651

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

3652

'only_matching': True,

3653

}]

3654

3655

def _real_extract(self, url):

3656

mobj = re.match(self._VALID_URL, url)

3657

video_id = mobj.group('id')

3658

playlist_id = mobj.group('playlist_id')

3659

return self.url_result(

3660

update_url_query('https://www.youtube.com/watch', {

3661

'v': video_id,

3662

'list': playlist_id,

3663

'feature': 'youtu.be',

3664

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3665

3666

3667

class YoutubeYtUserIE(InfoExtractor):

3668

IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'

3669

_VALID_URL = r'ytuser:(?P<id>.+)'

3670

_TESTS = [{

3671

'url': 'ytuser:phihag',

3672

'only_matching': True,

3673

}]

3674

3675

def _real_extract(self, url):

3676

user_id = self._match_id(url)

3677

return self.url_result(

3678

'https://www.youtube.com/user/%s' % user_id,

3679

ie=YoutubeTabIE.ie_key(), video_id=user_id)

3680

3681

3682

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

3683

IE_NAME = 'youtube:favorites'

3684

IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'

3685

_VALID_URL = r':ytfav(?:ou?rite)?s?'

3686

_LOGIN_REQUIRED = True

3687

_TESTS = [{

3688

'url': ':ytfav',

3689

'only_matching': True,

3690

}, {

3691

'url': ':ytfavorites',

3692

'only_matching': True,

3693

}]

3694

3695

def _real_extract(self, url):

3696

return self.url_result(

3697

'https://www.youtube.com/playlist?list=LL',

3698

ie=YoutubeTabIE.ie_key())

3699

3700

3701

class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):

3702

IE_DESC = 'YouTube.com searches'

3703

# there doesn't appear to be a real limit, for example if you search for

3704

# 'python' you get more than 8.000.000 results

3705

_MAX_RESULTS = float('inf')

3706

IE_NAME = 'youtube:search'

3707

_SEARCH_KEY = 'ytsearch'

3708

_SEARCH_PARAMS = None

3709

_TESTS = []

3710

3711

def _entries(self, query, n):

data = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

'query': query,

}

if self._SEARCH_PARAMS:

3722

data['params'] = self._SEARCH_PARAMS

3723

total = 0

3724

for page_num in itertools.count(1):

3725

search = self._download_json(

3726

'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

3727

video_id='query "%s"' % query,

3728

note='Downloading page %s' % page_num,

3729

errnote='Unable to download API page', fatal=False,

3730

data=json.dumps(data).encode('utf8'),

3731

headers={'content-type': 'application/json'})

3732

if not search:

3733

break

3734

slr_contents = try_get(

3735

search,

3736

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

3737

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

list)

if not slr_contents:

break

# Youtube sometimes adds promoted content to searches,

3743

# changing the index location of videos and token.

3744

# So we search through all entries till we find them.

3745

continuation_token = None

3746

for slr_content in slr_contents:

3747

isr_contents = try_get(

3748

slr_content,

3749

lambda x: x['itemSectionRenderer']['contents'],

list)

if not isr_contents:

continue

for content in isr_contents:

3754

if not isinstance(content, dict):

3755

continue

3756

video = content.get('videoRenderer')

3757

if not isinstance(video, dict):

3758

continue

3759

video_id = video.get('videoId')

if not video_id:

continue

yield self._extract_video(video)

total += 1

if total == n:

return

if continuation_token is None:

3769

continuation_token = try_get(

3770

slr_content,

3771

lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],

3772

compat_str)

3773

3774

if not continuation_token:

3775

break

3776

data['continuation'] = continuation_token

3777

3778

def _get_n_results(self, query, n):

3779

"""Get a specified number of results for a query"""

3780

return self.playlist_result(self._entries(query, n), query)

3781

3782

3783

class YoutubeSearchDateIE(YoutubeSearchIE):

3784

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

3785

_SEARCH_KEY = 'ytsearchdate'

3786

IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'

3787

_SEARCH_PARAMS = 'CAI%3D'

3788

3789

3790

class YoutubeSearchURLIE(YoutubeSearchIE):

3791

IE_DESC = 'YouTube.com searches, "ytsearch" keyword'

3792

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

3793

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'

3794

# _MAX_RESULTS = 100

3795

_TESTS = [{

3796

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

3797

'playlist_mincount': 5,

3798

'info_dict': {

3799

'title': 'youtube-dl test video',

3800

}

3801

}, {

3802

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

3803

'only_matching': True,

}]

@classmethod

def _make_valid_url(cls):

3808

return cls._VALID_URL

3809

3810

def _real_extract(self, url):

3811

qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

3812

query = (qs.get('search_query') or qs.get('q'))[0]

3813

self._SEARCH_PARAMS = qs.get('sp', ('',))[0]

3814

return self._get_n_results(query, self._MAX_RESULTS)

3815

3816

3817

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

3818

"""

3819

Base class for feed extractors

3820

Subclasses must define the _FEED_NAME property.

3821

"""

3822

_LOGIN_REQUIRED = True

# _MAX_PAGES = 5

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

3829

3830

def _real_initialize(self):

3831

self._login()

3832

3833

def _real_extract(self, url):

3834

return self.url_result(

3835

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

3836

ie=YoutubeTabIE.ie_key())

3837

3838

3839

class YoutubeWatchLaterIE(InfoExtractor):

3840

IE_NAME = 'youtube:watchlater'

3841

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

3842

_VALID_URL = r':ytwatchlater'

3843

_TESTS = [{

3844

'url': ':ytwatchlater',

3845

'only_matching': True,

3846

}]

3847

3848

def _real_extract(self, url):

3849

return self.url_result(

3850

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

3851

3852

3853

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

3854

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

3855

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

3856

_FEED_NAME = 'recommended'

3857

_TESTS = [{

3858

'url': ':ytrec',

3859

'only_matching': True,

3860

}, {

3861

'url': ':ytrecommended',

3862

'only_matching': True,

3863

}, {

3864

'url': 'https://youtube.com',

3865

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

3870

IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'

3871

_VALID_URL = r':ytsub(?:scription)?s?'

3872

_FEED_NAME = 'subscriptions'

3873

_TESTS = [{

3874

'url': ':ytsubs',

3875

'only_matching': True,

3876

}, {

3877

'url': ':ytsubscriptions',

3878

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

3883

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

3884

_VALID_URL = r':ythistory'

3885

_FEED_NAME = 'history'

3886

_TESTS = [{

3887

'url': ':ythistory',

3888

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

3893

IE_NAME = 'youtube:truncated_url'

3894

IE_DESC = False # Do not list

3895

_VALID_URL = r'''(?x)

3896

(?:https?://)?

3897

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

3898

(?:watch\?(?:

3899

feature=[a-z_]+|

3900

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

3913

'only_matching': True,

3914

}, {

3915

'url': 'https://www.youtube.com/watch?',

3916

'only_matching': True,

3917

}, {

3918

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

3919

'only_matching': True,

3920

}, {

3921

'url': 'https://www.youtube.com/watch?feature=foo',

3922

'only_matching': True,

3923

}, {

3924

'url': 'https://www.youtube.com/watch?hl=en-GB',

3925

'only_matching': True,

3926

}, {

3927

'url': 'https://www.youtube.com/watch?t=2372',

3928

'only_matching': True,

3929

}]

3930

3931

def _real_extract(self, url):

3932

raise ExtractorError(

3933

'Did you forget to quote the URL? Remember that & is a meta '

3934

'character in most shells, so you want to put the URL in quotes, '

3935

'like youtube-dl '

3936

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

3937

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

3942

IE_NAME = 'youtube:truncated_id'

3943

IE_DESC = False # Do not list

3944

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

3945

3946

_TESTS = [{

3947

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

3948

'only_matching': True,

3949

}]

3950

3951

def _real_extract(self, url):

3952

video_id = self._match_id(url)

3953

raise ExtractorError(

3954

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

expected=True)

# Do Youtube show urls even exist anymore? I couldn't find any

3959

r'''

3960

class YoutubeShowIE(YoutubeTabIE):

3961

IE_DESC = 'YouTube.com (multi-season) shows'

3962

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

3963

IE_NAME = 'youtube:show'

3964

_TESTS = [{

3965

'url': 'https://www.youtube.com/show/airdisasters',

3966

'playlist_mincount': 5,

3967

'info_dict': {

3968

'id': 'airdisasters',

3969

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

3974

playlist_id = self._match_id(url)

3975

return super(YoutubeShowIE, self)._real_extract(

3976

'https://www.youtube.com/show/%s/playlists' % playlist_id)

3977

'''