jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_HTTPError,
	20	compat_kwargs,
	21	compat_parse_qs,
	22	compat_urllib_parse_unquote,
	23	compat_urllib_parse_unquote_plus,
	24	compat_urllib_parse_urlencode,
	25	compat_urllib_parse_urlparse,
	26	compat_urlparse,
	27	compat_str,
	28	)
	29	from ..utils import (
	30	bool_or_none,
	31	clean_html,
	32	error_to_compat_str,
	33	ExtractorError,
	34	float_or_none,
	35	get_element_by_id,
	36	int_or_none,
	37	mimetype2ext,
	38	parse_codecs,
	39	parse_count,
	40	parse_duration,
	41	remove_quotes,
	42	remove_start,
	43	smuggle_url,
	44	str_or_none,
	45	str_to_int,
	46	try_get,
	47	unescapeHTML,
	48	unified_strdate,
	49	unsmuggle_url,
	50	update_url_query,
	51	uppercase_escape,
	52	url_or_none,
	53	urlencode_postdata,
	54	urljoin,
	55	)
	56
	57
	58	class YoutubeBaseInfoExtractor(InfoExtractor):
	59	"""Provide base functions for Youtube extractors"""
	60	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	61	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	62
	63	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	64	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	65	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	66
	67	_RESERVED_NAMES = (
	68	r'embed\|e\|watch_popup\|channel\|c\|user\|playlist\|watch\|w\|v\|movies\|results\|shared\|'
	69	r'storefront\|oops\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout\|'
	70	r'feed/(?:watch_later\|history\|subscriptions\|library\|trending\|recommended)')
	71
	72	_NETRC_MACHINE = 'youtube'
	73	# If True it will raise an error if no login info is provided
	74	_LOGIN_REQUIRED = False
	75
	76	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	77
	78	def _set_language(self):
	79	self._set_cookie(
	80	'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
	81	# YouTube sets the expire time to about two months
	82	expire_time=time.time() + 2 * 30 * 24 * 3600)
	83
	84	def _ids_to_results(self, ids):
	85	return [
	86	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	87	for vid_id in ids]
	88
	89	def _login(self):
	90	"""
	91	Attempt to log in to YouTube.
	92	True is returned if successful or skipped.
	93	False is returned if login failed.
	94
	95	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	96	"""
	97	username, password = self._get_login_info()
	98	# No authentication to be performed
	99	if username is None:
	100	if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
	101	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	102	if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
	103	self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
	104	return True
	105
	106	login_page = self._download_webpage(
	107	self._LOGIN_URL, None,
	108	note='Downloading login page',
	109	errnote='unable to fetch login page', fatal=False)
	110	if login_page is False:
	111	return
	112
	113	login_form = self._hidden_inputs(login_page)
	114
	115	def req(url, f_req, note, errnote):
	116	data = login_form.copy()
	117	data.update({
	118	'pstMsg': 1,
	119	'checkConnection': 'youtube',
	120	'checkedDomains': 'youtube',
	121	'hl': 'en',
	122	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	123	'f.req': json.dumps(f_req),
	124	'flowName': 'GlifWebSignIn',
	125	'flowEntry': 'ServiceLogin',
	126	# TODO: reverse actual botguard identifier generation algo
	127	'bgRequest': '["identifier",""]',
	128	})
	129	return self._download_json(
	130	url, None, note=note, errnote=errnote,
	131	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	132	fatal=False,
	133	data=urlencode_postdata(data), headers={
	134	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	135	'Google-Accounts-XSRF': 1,
	136	})
	137
	138	def warn(message):
	139	self._downloader.report_warning(message)
	140
	141	lookup_req = [
	142	username,
	143	None, [], None, 'US', None, None, 2, False, True,
	144	[
	145	None, None,
	146	[2, 1, None, 1,
	147	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	148	None, [], 4],
	149	1, [None, None, []], None, None, None, True
	150	],
	151	username,
	152	]
	153
	154	lookup_results = req(
	155	self._LOOKUP_URL, lookup_req,
	156	'Looking up account info', 'Unable to look up account info')
	157
	158	if lookup_results is False:
	159	return False
	160
	161	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	162	if not user_hash:
	163	warn('Unable to extract user hash')
	164	return False
	165
	166	challenge_req = [
	167	user_hash,
	168	None, 1, None, [1, None, None, None, [password, None, True]],
	169	[
	170	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	171	1, [None, None, []], None, None, None, True
	172	]]
	173
	174	challenge_results = req(
	175	self._CHALLENGE_URL, challenge_req,
	176	'Logging in', 'Unable to log in')
	177
	178	if challenge_results is False:
	179	return
	180
	181	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	182	if login_res:
	183	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	184	warn(
	185	'Unable to login: %s' % 'Invalid password'
	186	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	187	return False
	188
	189	res = try_get(challenge_results, lambda x: x[0][-1], list)
	190	if not res:
	191	warn('Unable to extract result entry')
	192	return False
	193
	194	login_challenge = try_get(res, lambda x: x[0][0], list)
	195	if login_challenge:
	196	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	197	if challenge_str == 'TWO_STEP_VERIFICATION':
	198	# SEND_SUCCESS - TFA code has been successfully sent to phone
	199	# QUOTA_EXCEEDED - reached the limit of TFA codes
	200	status = try_get(login_challenge, lambda x: x[5], compat_str)
	201	if status == 'QUOTA_EXCEEDED':
	202	warn('Exceeded the limit of TFA codes, try later')
	203	return False
	204
	205	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	206	if not tl:
	207	warn('Unable to extract TL')
	208	return False
	209
	210	tfa_code = self._get_tfa_info('2-step verification code')
	211
	212	if not tfa_code:
	213	warn(
	214	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	215	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	216	return False
	217
	218	tfa_code = remove_start(tfa_code, 'G-')
	219
	220	tfa_req = [
	221	user_hash, None, 2, None,
	222	[
	223	9, None, None, None, None, None, None, None,
	224	[None, tfa_code, True, 2]
	225	]]
	226
	227	tfa_results = req(
	228	self._TFA_URL.format(tl), tfa_req,
	229	'Submitting TFA code', 'Unable to submit TFA code')
	230
	231	if tfa_results is False:
	232	return False
	233
	234	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	235	if tfa_res:
	236	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	237	warn(
	238	'Unable to finish TFA: %s' % 'Invalid TFA code'
	239	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	240	return False
	241
	242	check_cookie_url = try_get(
	243	tfa_results, lambda x: x[0][-1][2], compat_str)
	244	else:
	245	CHALLENGES = {
	246	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	247	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	248	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	249	}
	250	challenge = CHALLENGES.get(
	251	challenge_str,
	252	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	253	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	254	return False
	255	else:
	256	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	257
	258	if not check_cookie_url:
	259	warn('Unable to extract CheckCookie URL')
	260	return False
	261
	262	check_cookie_results = self._download_webpage(
	263	check_cookie_url, None, 'Checking cookie', fatal=False)
	264
	265	if check_cookie_results is False:
	266	return False
	267
	268	if 'https://myaccount.google.com/' not in check_cookie_results:
	269	warn('Unable to log in')
	270	return False
	271
	272	return True
	273
	274	def _download_webpage_handle(self, args, *kwargs):
	275	query = kwargs.get('query', {}).copy()
	276	kwargs['query'] = query
	277	return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
	278	args, *compat_kwargs(kwargs))
	279
	280	def _get_yt_initial_data(self, video_id, webpage):
	281	config = self._search_regex(
	282	(r'window\["ytInitialData"\]\s=\s(.*?)(?<=});',
	283	r'var\s+ytInitialData\s=\s(.*?)(?<=});'),
	284	webpage, 'ytInitialData', default=None)
	285	if config:
	286	return self._parse_json(
	287	uppercase_escape(config), video_id, fatal=False)
	288
	289	def _real_initialize(self):
	290	if self._downloader is None:
	291	return
	292	self._set_language()
	293	if not self._login():
	294	return
	295
	296	_DEFAULT_API_DATA = {
	297	'context': {
	298	'client': {
	299	'clientName': 'WEB',
	300	'clientVersion': '2.20201021.03.00',
	301	}
	302	},
	303	}
	304
	305	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	306	_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s=\s({.+?})\s*;'
	307	_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta\|</script\|\n)'
	308
	309	def _call_api(self, ep, query, video_id):
	310	data = self._DEFAULT_API_DATA.copy()
	311	data.update(query)
	312
	313	response = self._download_json(
	314	'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
	315	note='Downloading API JSON', errnote='Unable to download API page',
	316	data=json.dumps(data).encode('utf8'),
	317	headers={'content-type': 'application/json'},
	318	query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
	319
	320	return response
	321
	322	def _extract_yt_initial_data(self, video_id, webpage):
	323	return self._parse_json(
	324	self._search_regex(
	325	(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
	326	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
	327	video_id)
	328
	329	def _extract_ytcfg(self, video_id, webpage):
	330	return self._parse_json(
	331	self._search_regex(
	332	r'ytcfg\.set\s\(\s({.+?})\s\)\s;', webpage, 'ytcfg',
	333	default='{}'), video_id, fatal=False)
	334
	335
	336	class YoutubeIE(YoutubeBaseInfoExtractor):
	337	IE_DESC = 'YouTube.com'
	338	_VALID_URL = r"""(?x)^
	339	(
	340	(?:https?://\|//) # http(s):// or protocol-independent URL
	341	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie\|kids)?\.com/\|
	342	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	343	(?:www\.)?pwnyoutube\.com/\|
	344	(?:www\.)?hooktube\.com/\|
	345	(?:www\.)?yourepeat\.com/\|
	346	tube\.majestyc\.net/\|
	347	# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
	348	(?:(?:www\|dev)\.)?invidio\.us/\|
	349	(?:(?:www\|no)\.)?invidiou\.sh/\|
	350	(?:(?:www\|fi)\.)?invidious\.snopyta\.org/\|
	351	(?:www\.)?invidious\.kabi\.tk/\|
	352	(?:www\.)?invidious\.13ad\.de/\|
	353	(?:www\.)?invidious\.mastodon\.host/\|
	354	(?:www\.)?invidious\.zapashcanon\.fr/\|
	355	(?:www\.)?invidious\.kavin\.rocks/\|
	356	(?:www\.)?invidious\.tube/\|
	357	(?:www\.)?invidiou\.site/\|
	358	(?:www\.)?invidious\.site/\|
	359	(?:www\.)?invidious\.xyz/\|
	360	(?:www\.)?invidious\.nixnet\.xyz/\|
	361	(?:www\.)?invidious\.drycat\.fr/\|
	362	(?:www\.)?tube\.poal\.co/\|
	363	(?:www\.)?tube\.connect\.cafe/\|
	364	(?:www\.)?vid\.wxzm\.sx/\|
	365	(?:www\.)?vid\.mint\.lgbt/\|
	366	(?:www\.)?yewtu\.be/\|
	367	(?:www\.)?yt\.elukerio\.org/\|
	368	(?:www\.)?yt\.lelux\.fi/\|
	369	(?:www\.)?invidious\.ggc-project\.de/\|
	370	(?:www\.)?yt\.maisputain\.ovh/\|
	371	(?:www\.)?invidious\.13ad\.de/\|
	372	(?:www\.)?invidious\.toot\.koeln/\|
	373	(?:www\.)?invidious\.fdn\.fr/\|
	374	(?:www\.)?watch\.nettohikari\.com/\|
	375	(?:www\.)?kgg2m7yk5aybusll\.onion/\|
	376	(?:www\.)?qklhadlycap4cnod\.onion/\|
	377	(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/\|
	378	(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/\|
	379	(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/\|
	380	(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/\|
	381	(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/\|
	382	(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/\|
	383	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	384	(?:.*?\#/)? # handle anchor (#/) redirect urls
	385	(?: # the various things that can precede the ID:
	386	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	387	\|(?: # or the v= param in all its forms
	388	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	389	(?:\?\|\#!?) # the params delimiter ? or # or #!
	390	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	391	v=
	392	)
	393	))
	394	\|(?:
	395	youtu\.be\| # just youtu.be/xxxx
	396	vid\.plus\| # or vid.plus/xxxx
	397	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	398	)/
	399	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	400	)
	401	)? # all until now is optional -> you can pass the naked ID
	402	(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	403	(?!.*?\blist=
	404	(?:
	405	%(playlist_id)s\| # combined list/video URLs are handled by the playlist IE
	406	WL # WL are handled by the watch later IE
	407	)
	408	)
	409	(?(1).+)? # if we found the ID, everything can follow
	410	$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
	411	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	412	_PLAYER_INFO_RE = (
	413	r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
	414	r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
	415	)
	416	_formats = {
	417	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	418	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	419	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	420	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	421	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	422	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	423	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	424	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	425	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	426	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	427	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	428	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	429	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	430	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	431	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	432	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	433	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	434	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	435
	436
	437	# 3D videos
	438	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	439	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	440	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	441	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	442	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	443	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	444	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	445
	446	# Apple HTTP Live Streaming
	447	'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	448	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	449	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	450	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	451	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	452	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	453	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	454	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	455
	456	# DASH mp4 video
	457	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
	458	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
	459	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	460	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
	461	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
	462	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
	463	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
	464	'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	465	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
	466	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	467	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	468	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
	469
	470	# Dash mp4 audio
	471	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
	472	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
	473	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
	474	'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	475	'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	476	'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
	477	'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
	478
	479	# Dash webm
	480	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	481	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	482	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	483	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	484	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	485	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	486	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
	487	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	488	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	489	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	490	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	491	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	492	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	493	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	494	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	495	# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
	496	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	497	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	498	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	499	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	500	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

compat_chr,

compat_HTTPError,

compat_kwargs,

compat_parse_qs,

compat_urllib_parse_unquote,

23

compat_urllib_parse_unquote_plus,

24

compat_urllib_parse_urlencode,

25

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

bool_or_none,

clean_html,

error_to_compat_str,

ExtractorError,

float_or_none,

get_element_by_id,

int_or_none,

mimetype2ext,

parse_codecs,

parse_count,

parse_duration,

remove_quotes,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

update_url_query,

uppercase_escape,

url_or_none,

urlencode_postdata,

urljoin,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

59

"""Provide base functions for Youtube extractors"""

60

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

61

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

62

63

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

64

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

65

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

_RESERVED_NAMES = (

_NETRC_MACHINE = 'youtube'

73

# If True it will raise an error if no login info is provided

74

_LOGIN_REQUIRED = False

75

76

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

77

78

def _set_language(self):

79

self._set_cookie(

80

'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',

81

# YouTube sets the expire time to about two months

82

expire_time=time.time() + 2 * 30 * 24 * 3600)

83

84

def _ids_to_results(self, ids):

85

return [

86

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

92

True is returned if successful or skipped.

93

False is returned if login failed.

94

95

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

96

"""

97

username, password = self._get_login_info()

98

# No authentication to be performed

99

if username is None:

100

if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

101

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

102

if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.

103

self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')

104

return True

105

106

login_page = self._download_webpage(

107

self._LOGIN_URL, None,

108

note='Downloading login page',

109

errnote='unable to fetch login page', fatal=False)

110

if login_page is False:

111

return

112

113

login_form = self._hidden_inputs(login_page)

114

115

def req(url, f_req, note, errnote):

116

data = login_form.copy()

117

data.update({

118

'pstMsg': 1,

119

'checkConnection': 'youtube',

120

'checkedDomains': 'youtube',

121

'hl': 'en',

122

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

123

'f.req': json.dumps(f_req),

124

'flowName': 'GlifWebSignIn',

125

'flowEntry': 'ServiceLogin',

126

# TODO: reverse actual botguard identifier generation algo

127

'bgRequest': '["identifier",""]',

128

})

129

return self._download_json(

130

url, None, note=note, errnote=errnote,

131

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

132

fatal=False,

133

data=urlencode_postdata(data), headers={

134

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

135

'Google-Accounts-XSRF': 1,

})

def warn(message):

self._downloader.report_warning(message)

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

148

None, [], 4],

149

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

155

self._LOOKUP_URL, lookup_req,

156

'Looking up account info', 'Unable to look up account info')

157

158

if lookup_results is False:

159

return False

160

161

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

162

if not user_hash:

163

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

169

[

170

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

171

1, [None, None, []], None, None, None, True

172

]]

173

174

challenge_results = req(

175

self._CHALLENGE_URL, challenge_req,

176

'Logging in', 'Unable to log in')

177

178

if challenge_results is False:

179

return

180

181

login_res = try_get(challenge_results, lambda x: x[0][5], list)

182

if login_res:

183

login_msg = try_get(login_res, lambda x: x[5], compat_str)

184

warn(

185

'Unable to login: %s' % 'Invalid password'

186

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

187

return False

188

189

res = try_get(challenge_results, lambda x: x[0][-1], list)

190

if not res:

191

warn('Unable to extract result entry')

192

return False

193

194

login_challenge = try_get(res, lambda x: x[0][0], list)

195

if login_challenge:

196

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

197

if challenge_str == 'TWO_STEP_VERIFICATION':

198

# SEND_SUCCESS - TFA code has been successfully sent to phone

199

# QUOTA_EXCEEDED - reached the limit of TFA codes

200

status = try_get(login_challenge, lambda x: x[5], compat_str)

201

if status == 'QUOTA_EXCEEDED':

202

warn('Exceeded the limit of TFA codes, try later')

203

return False

204

205

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

206

if not tl:

207

warn('Unable to extract TL')

208

return False

209

210

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

215

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

216

return False

217

218

tfa_code = remove_start(tfa_code, 'G-')

219

220

tfa_req = [

221

user_hash, None, 2, None,

222

[

223

9, None, None, None, None, None, None, None,

224

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

229

'Submitting TFA code', 'Unable to submit TFA code')

230

231

if tfa_results is False:

232

return False

233

234

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

235

if tfa_res:

236

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

237

warn(

238

'Unable to finish TFA: %s' % 'Invalid TFA code'

239

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

240

return False

241

242

check_cookie_url = try_get(

243

tfa_results, lambda x: x[0][-1][2], compat_str)

244

else:

245

CHALLENGES = {

246

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

247

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

248

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

249

}

250

challenge = CHALLENGES.get(

251

challenge_str,

252

'%s returned error %s.' % (self.IE_NAME, challenge_str))

253

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

254

return False

255

else:

256

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

257

258

if not check_cookie_url:

259

warn('Unable to extract CheckCookie URL')

260

return False

261

262

check_cookie_results = self._download_webpage(

263

check_cookie_url, None, 'Checking cookie', fatal=False)

264

265

if check_cookie_results is False:

266

return False

267

268

if 'https://myaccount.google.com/' not in check_cookie_results:

269

warn('Unable to log in')

return False

return True

def _download_webpage_handle(self, *args, **kwargs):

275

query = kwargs.get('query', {}).copy()

276

kwargs['query'] = query

277

return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(

278

*args, **compat_kwargs(kwargs))

279

280

def _get_yt_initial_data(self, video_id, webpage):

281

config = self._search_regex(

282

(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',

283

r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),

284

webpage, 'ytInitialData', default=None)

285

if config:

286

return self._parse_json(

287

uppercase_escape(config), video_id, fatal=False)

288

289

def _real_initialize(self):

290

if self._downloader is None:

291

return

292

self._set_language()

293

if not self._login():

294

return

295

296

_DEFAULT_API_DATA = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

}

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

306

_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'

307

_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

308

309

def _call_api(self, ep, query, video_id):

310

data = self._DEFAULT_API_DATA.copy()

311

data.update(query)

312

313

response = self._download_json(

314

'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,

315

note='Downloading API JSON', errnote='Unable to download API page',

316

data=json.dumps(data).encode('utf8'),

317

headers={'content-type': 'application/json'},

318

query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})

return response

def _extract_yt_initial_data(self, video_id, webpage):

323

return self._parse_json(

324

self._search_regex(

325

(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),

326

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),

327

video_id)

328

329

def _extract_ytcfg(self, video_id, webpage):

330

return self._parse_json(

331

self._search_regex(

332

r'ytcfg\.set\s*$\s*({.+?})\s*$\s*;', webpage, 'ytcfg',

333

default='{}'), video_id, fatal=False)

334

335

336

class YoutubeIE(YoutubeBaseInfoExtractor):

337

IE_DESC = 'YouTube.com'

338

_VALID_URL = r"""(?x)^

339

(

340

(?:https?://|//) # http(s):// or protocol-independent URL

341

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|

342

(?:www\.)?deturl\.com/www\.youtube\.com/|

343

(?:www\.)?pwnyoutube\.com/|

344

(?:www\.)?hooktube\.com/|

345

(?:www\.)?yourepeat\.com/|

346

tube\.majestyc\.net/|

347

# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances

348

(?:(?:www|dev)\.)?invidio\.us/|

349

(?:(?:www|no)\.)?invidiou\.sh/|

350

(?:(?:www|fi)\.)?invidious\.snopyta\.org/|

351

(?:www\.)?invidious\.kabi\.tk/|

352

(?:www\.)?invidious\.13ad\.de/|

353

(?:www\.)?invidious\.mastodon\.host/|

354

(?:www\.)?invidious\.zapashcanon\.fr/|

355

(?:www\.)?invidious\.kavin\.rocks/|

356

(?:www\.)?invidious\.tube/|

357

(?:www\.)?invidiou\.site/|

358

(?:www\.)?invidious\.site/|

359

(?:www\.)?invidious\.xyz/|

360

(?:www\.)?invidious\.nixnet\.xyz/|

361

(?:www\.)?invidious\.drycat\.fr/|

362

(?:www\.)?tube\.poal\.co/|

363

(?:www\.)?tube\.connect\.cafe/|

364

(?:www\.)?vid\.wxzm\.sx/|

365

(?:www\.)?vid\.mint\.lgbt/|

366

(?:www\.)?yewtu\.be/|

367

(?:www\.)?yt\.elukerio\.org/|

368

(?:www\.)?yt\.lelux\.fi/|

369

(?:www\.)?invidious\.ggc-project\.de/|

370

(?:www\.)?yt\.maisputain\.ovh/|

371

(?:www\.)?invidious\.13ad\.de/|

372

(?:www\.)?invidious\.toot\.koeln/|

373

(?:www\.)?invidious\.fdn\.fr/|

374

(?:www\.)?watch\.nettohikari\.com/|

375

(?:www\.)?kgg2m7yk5aybusll\.onion/|

376

(?:www\.)?qklhadlycap4cnod\.onion/|

377

(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|

378

(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|

379

(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|

380

(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|

381

(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|

382

(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|

383

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

384

(?:.*?\#/)? # handle anchor (#/) redirect urls

385

(?: # the various things that can precede the ID:

386

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

387

|(?: # or the v= param in all its forms

388

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

389

(?:\?|\#!?) # the params delimiter ? or # or #!

390

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

396

vid\.plus| # or vid.plus/xxxx

397

zwearz\.com/watch| # or zwearz.com/watch/xxxx

398

)/

399

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

400

)

401

)? # all until now is optional -> you can pass the naked ID

402

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

403

(?!.*?\blist=

404

(?:

405

%(playlist_id)s| # combined list/video URLs are handled by the playlist IE

406

WL # WL are handled by the watch later IE

407

)

408

)

409

(?(1).+)? # if we found the ID, everything can follow

410

$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

411

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

412

_PLAYER_INFO_RE = (

413

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',

414

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',

415

)

416

_formats = {

417

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

418

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

419

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

420

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

421

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

422

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

423

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

424

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

425

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

426

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

427

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

428

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

429

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

430

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

431

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

432

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

433

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

434

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

439

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

440

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

441

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

442

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

443

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

444

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

445

446

# Apple HTTP Live Streaming

447

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

448

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

449

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

450

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

451

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

452

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

453

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

454

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

455

456

# DASH mp4 video

457

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

458

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

459

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

460

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

461

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

462

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

463

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

464

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

465

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

466

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

467

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

468

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

469

470

# Dash mp4 audio

471

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

472

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

473

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

474

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

475

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

476

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

477

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

478

479

# Dash webm

480

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

481

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

482

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

483

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

484

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

485

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

486

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

487

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

488

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

489

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

490

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

491

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

492

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

493

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

494

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

495

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

496

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

497

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

498

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

499

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

500

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

501

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

502

503

# Dash webm audio

504

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

505

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

506

507

# Dash webm audio with opus inside

508

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

509

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

510

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

511

512

# RTMP (unnamed)

513

'_rtmp': {'protocol': 'rtmp'},

514

515

# av01 video only formats sometimes served with "unknown" codecs

516

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

517

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

518

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

519

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

520

}

521

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

533

'uploader': 'Philipp Hagemeister',

534

'uploader_id': 'phihag',

535

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

536

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

537

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

538

'upload_date': '20121002',

539

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

540

'categories': ['Science & Technology'],

541

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

552

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

557

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

558

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

559

'uploader': 'SET India',

560

'uploader_id': 'setindia',

561

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

567

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

572

'uploader': 'Philipp Hagemeister',

573

'uploader_id': 'phihag',

574

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

575

'upload_date': '20121002',

576

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

577

'categories': ['Science & Technology'],

578

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

583

},

584

'params': {

585

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

590

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

595

'uploader_id': '8KVIDEO',

596

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

597

'description': '',

598

'uploader': '8KVIDEO',

599

'title': 'UHDTV TEST 8K VIDEO.mp4'

600

},

601

'params': {

602

'youtube_include_dash_manifest': True,

603

'format': '141',

604

},

605

'skip': 'format 141 not served anymore',

606

},

607

# DASH manifest with encrypted signature

608

{

609

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

614

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

615

'duration': 244,

616

'uploader': 'AfrojackVEVO',

617

'uploader_id': 'AfrojackVEVO',

618

'upload_date': '20131011',

619

},

620

'params': {

621

'youtube_include_dash_manifest': True,

622

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

633

'uploader': 'Amazing Atheist',

634

'uploader_id': 'TheAmazingAtheist',

635

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

636

'title': 'Burning Everyone\'s Koran',

637

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

638

}

639

},

640

# Normal age-gate video (embed allowed)

641

{

642

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

647

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

648

'duration': 142,

649

'uploader': 'The Witcher',

650

'uploader_id': 'WitcherGame',

651

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

652

'upload_date': '20140605',

'age_limit': 18,

},

},

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

657

# YouTube Red ad is not captured for creator

658

{

659

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

665

'uploader_id': 'deadmau5',

666

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

667

'creator': 'Dada Life, deadmau5',

668

'description': 'md5:12c56784b8032162bb936a5f76d55360',

669

'uploader': 'deadmau5',

670

'title': 'Deadmau5 - Some Chords (HD)',

671

'alt_title': 'This Machine Kills Some Chords',

672

},

673

'expected_warnings': [

674

'DASH manifest missing',

675

]

676

},

677

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

678

{

679

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

685

'uploader_id': 'olympic',

686

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

687

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

688

'uploader': 'Olympic',

689

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

690

},

691

'params': {

692

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

702

'duration': 85,

703

'upload_date': '20110310',

704

'uploader_id': 'AllenMeow',

705

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

706

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

707

'uploader': '孫ᄋᄅ',

708

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

709

},

710

},

711

# url_encoded_fmt_stream_map is empty string

712

{

713

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

718

'description': '',

719

'upload_date': '20150404',

720

'uploader_id': 'spbelect',

721

'uploader': 'Наблюдатели Петербурга',

722

},

723

'params': {

724

'skip_download': 'requires avconv',

725

},

726

'skip': 'This live event has ended.',

727

},

728

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

729

{

730

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

735

'description': 'md5:116377fd2963b81ec4ce64b542173306',

736

'duration': 220,

737

'upload_date': '20150625',

738

'uploader_id': 'dorappi2000',

739

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

740

'uploader': 'dorappi2000',

741

'formats': 'mincount:31',

742

},

743

'skip': 'not actual anymore',

744

},

745

# DASH manifest with segment_list

746

{

747

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

748

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

753

'uploader': 'Airtek',

754

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

755

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

756

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

757

},

758

'params': {

759

'youtube_include_dash_manifest': True,

760

'format': '135', # bestvideo

761

},

762

'skip': 'This live event has ended.',

763

},

764

{

765

# Multifeed videos (multiple cameras), URL is for Main Camera

766

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

767

'info_dict': {

768

'id': 'jqWvoWXjCVs',

769

'title': 'teamPGP: Rocket League Noob Stream',

770

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

777

'description': 'md5:dc7872fb300e143831327f1bae3af010',

778

'duration': 7335,

779

'upload_date': '20150721',

780

'uploader': 'Beer Games Beer',

781

'uploader_id': 'beergamesbeer',

782

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

783

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

790

'description': 'md5:dc7872fb300e143831327f1bae3af010',

791

'duration': 7337,

792

'upload_date': '20150721',

793

'uploader': 'Beer Games Beer',

794

'uploader_id': 'beergamesbeer',

795

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

796

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

803

'description': 'md5:dc7872fb300e143831327f1bae3af010',

804

'duration': 7337,

805

'upload_date': '20150721',

806

'uploader': 'Beer Games Beer',

807

'uploader_id': 'beergamesbeer',

808

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

809

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

816

'description': 'md5:dc7872fb300e143831327f1bae3af010',

817

'duration': 7334,

818

'upload_date': '20150721',

819

'uploader': 'Beer Games Beer',

820

'uploader_id': 'beergamesbeer',

821

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

822

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

827

},

828

'skip': 'This video is not available.',

829

},

830

{

831

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

832

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

833

'info_dict': {

834

'id': 'gVfLd0zydlo',

835

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

836

},

837

'playlist_count': 2,

838

'skip': 'Not multifeed anymore',

839

},

840

{

841

'url': 'https://vid.plus/FlRa-iH7PGw',

842

'only_matching': True,

843

},

844

{

845

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

846

'only_matching': True,

847

},

848

{

849

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

850

# Also tests cut-off URL expansion in video description (see

851

# https://github.com/ytdl-org/youtube-dl/issues/1892,

852

# https://github.com/ytdl-org/youtube-dl/issues/8164)

853

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

858

'alt_title': 'Dark Walk - Position Music',

859

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

860

'duration': 133,

861

'upload_date': '20151119',

862

'uploader_id': 'IronSoulElf',

863

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

864

'uploader': 'IronSoulElf',

865

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

866

'track': 'Dark Walk - Position Music',

867

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

868

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

869

},

870

'params': {

871

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

876

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

877

'only_matching': True,

878

},

879

{

880

# Video with yt:stretch=17:0

881

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

886

'description': 'md5:ee18a25c350637c8faff806845bddee9',

887

'upload_date': '20151107',

888

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

889

'uploader': 'CH GAMER DROID',

890

},

891

'params': {

892

'skip_download': True,

893

},

894

'skip': 'This video does not exist.',

895

},

896

{

897

# Video licensed under Creative Commons

898

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

903

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

904

'duration': 721,

905

'upload_date': '20150127',

906

'uploader_id': 'BerkmanCenter',

907

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

908

'uploader': 'The Berkman Klein Center for Internet & Society',

909

'license': 'Creative Commons Attribution license (reuse allowed)',

910

},

911

'params': {

912

'skip_download': True,

},

},

{

# Channel-like uploader_url

917

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

922

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

923

'duration': 4060,

924

'upload_date': '20151119',

925

'uploader': 'Bernie Sanders',

926

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

927

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

928

'license': 'Creative Commons Attribution license (reuse allowed)',

929

},

930

'params': {

931

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

936

'only_matching': True,

937

},

938

{

939

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

940

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

941

'only_matching': True,

942

},

943

{

944

# Rental video preview

945

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

950

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

951

'upload_date': '20150811',

952

'uploader': 'FlixMatrix',

953

'uploader_id': 'FlixMatrixKaravan',

954

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

955

'license': 'Standard YouTube License',

956

},

957

'params': {

958

'skip_download': True,

959

},

960

'skip': 'This video is not available.',

961

},

962

{

963

# YouTube Red video with episode data

964

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

969

'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',

970

'duration': 2085,

971

'upload_date': '20170118',

972

'uploader': 'Vsauce',

973

'uploader_id': 'Vsauce',

974

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

975

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

981

},

982

'expected_warnings': [

983

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

988

# as inappropriate or offensive to some audiences.

989

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

994

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

995

'duration': 965,

996

'upload_date': '20140124',

997

'uploader': 'New Century Foundation',

998

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

999

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1000

},

1001

'params': {

1002

'skip_download': True,

},

},

{

# itag 212

'url': '1t24XAntNCY',

1008

'only_matching': True,

1009

},

1010

{

1011

# geo restricted to JP

1012

'url': 'sJL6WA-aGkQ',

1013

'only_matching': True,

1014

},

1015

{

1016

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1017

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1022

'only_matching': True,

1023

},

1024

{

1025

# Video with unsupported adaptive stream type formats

1026

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1031

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1032

'duration': 433,

1033

'upload_date': '20130923',

1034

'uploader': 'Amelia Putri Harwita',

1035

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1036

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1037

'formats': 'maxcount:10',

1038

},

1039

'params': {

1040

'skip_download': True,

1041

'youtube_include_dash_manifest': False,

1042

},

1043

'skip': 'not actual anymore',

1044

},

1045

{

1046

# Youtube Music Auto-generated description

1047

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1052

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1053

'upload_date': '20190312',

1054

'uploader': 'Stephen - Topic',

1055

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1056

'artist': 'Stephen',

1057

'track': 'Voyeur Girl',

1058

'album': 'it\'s too much love to know my dear',

1059

'release_date': '20190313',

1060

'release_year': 2019,

1061

},

1062

'params': {

1063

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1068

'only_matching': True,

1069

},

1070

{

1071

# invalid -> valid video id redirection

1072

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1077

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1078

'upload_date': '20090125',

1079

'uploader': 'Prochorowka',

1080

'uploader_id': 'Prochorowka',

1081

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1082

'artist': 'Panjabi MC',

1083

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1084

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1085

},

1086

'params': {

1087

'skip_download': True,

},

},

{

# empty description results in an empty string

1092

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1099

'uploader_id': 'ElevageOrVert',

1100

'uploader': 'ElevageOrVert',

1101

},

1102

'params': {

1103

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see [1])

1108

# see [2] for an example with '};' inside ytInitialPlayerResponse

1109

# 1. https://github.com/ytdl-org/youtube-dl/issues/27093

1110

# 2. https://github.com/ytdl-org/youtube-dl/issues/27216

1111

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1116

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1117

'upload_date': '20130831',

1118

'uploader_id': 'kudvenkat',

1119

'uploader': 'kudvenkat',

1120

},

1121

'params': {

1122

'skip_download': True,

},

},

{

# another example of '};' in ytInitialData

1127

'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',

1128

'only_matching': True,

1129

},

1130

{

1131

'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',

1132

'only_matching': True,

},

]

def __init__(self, *args, **kwargs):

1137

super(YoutubeIE, self).__init__(*args, **kwargs)

1138

self._player_cache = {}

1139

1140

def report_video_info_webpage_download(self, video_id):

1141

"""Report attempt to download video info webpage."""

1142

self.to_screen('%s: Downloading video info webpage' % video_id)

1143

1144

def report_information_extraction(self, video_id):

1145

"""Report attempt to extract video information."""

1146

self.to_screen('%s: Extracting video information' % video_id)

1147

1148

def report_unavailable_format(self, video_id, format):

1149

"""Report extracted video URL."""

1150

self.to_screen('%s: Format %s not available' % (video_id, format))

1151

1152

def report_rtmp_download(self):

1153

"""Indicate the download will use the RTMP protocol."""

1154

self.to_screen('RTMP download detected')

1155

1156

def _signature_cache_id(self, example_sig):

1157

""" Return a string representation of a signature """

1158

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1159

1160

@classmethod

1161

def _extract_player_info(cls, player_url):

1162

for player_re in cls._PLAYER_INFO_RE:

1163

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1168

return id_m.group('ext'), id_m.group('id')

1169

1170

def _extract_signature_function(self, video_id, player_url, example_sig):

1171

player_type, player_id = self._extract_player_info(player_url)

1172

1173

# Read from filesystem cache

1174

func_id = '%s_%s_%s' % (

1175

player_type, player_id, self._signature_cache_id(example_sig))

1176

assert os.path.basename(func_id) == func_id

1177

1178

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1179

if cache_spec is not None:

1180

return lambda s: ''.join(s[i] for i in cache_spec)

1181

1182

download_note = (

1183

'Downloading player %s' % player_url

1184

if self._downloader.params.get('verbose') else

1185

'Downloading %s player %s' % (player_type, player_id)

1186

)

1187

if player_type == 'js':

1188

code = self._download_webpage(

1189

player_url, video_id,

1190

note=download_note,

1191

errnote='Download of %s failed' % player_url)

1192

res = self._parse_sig_js(code)

1193

elif player_type == 'swf':

1194

urlh = self._request_webpage(

1195

player_url, video_id,

1196

note=download_note,

1197

errnote='Download of %s failed' % player_url)

1198

code = urlh.read()

1199

res = self._parse_sig_swf(code)

1200

else:

1201

assert False, 'Invalid player type %r' % player_type

1202

1203

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1204

cache_res = res(test_string)

1205

cache_spec = [ord(c) for c in cache_res]

1206

1207

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1208

return res

1209

1210

def _print_sig_code(self, func, example_sig):

1211

def gen_sig_code(idxs):

1212

def _genslice(start, end, step):

1213

starts = '' if start == 0 else str(start)

1214

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1215

steps = '' if step == 1 else (':%d' % step)

1216

return 's[%s%s%s]' % (starts, ends, steps)

1217

1218

step = None

1219

# Quelch pyflakes warnings - start will be set when step is set

1220

start = '(Never used)'

1221

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1226

step = None

1227

continue

1228

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1238

1239

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1240

cache_res = func(test_string)

1241

cache_spec = [ord(c) for c in cache_res]

1242

expr_code = ' + '.join(gen_sig_code(cache_spec))

1243

signature_id_tuple = '(%s)' % (

1244

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1245

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1246

' return %s\n') % (signature_id_tuple, expr_code)

1247

self.to_screen('Extracted signature function:\n' + code)

1248

1249

def _parse_sig_js(self, jscode):

1250

funcname = self._search_regex(

1251

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1252

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1253

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1254

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1255

# Obsolete patterns

1256

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1257

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1258

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1259

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1260

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1261

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1262

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1263

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1264

jscode, 'Initial JS player signature function name', group='sig')

1265

1266

jsi = JSInterpreter(jscode)

1267

initial_function = jsi.extract_function(funcname)

1268

return lambda s: initial_function([s])

1269

1270

def _parse_sig_swf(self, file_contents):

1271

swfi = SWFInterpreter(file_contents)

1272

TARGET_CLASSNAME = 'SignatureDecipher'

1273

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1274

initial_function = swfi.extract_function(searched_class, 'decipher')

1275

return lambda s: initial_function([s])

1276

1277

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1278

"""Turn the encrypted s field into a working signature"""

1279

1280

if player_url is None:

1281

raise ExtractorError('Cannot decrypt signature without player_url')

1282

1283

if player_url.startswith('//'):

1284

player_url = 'https:' + player_url

1285

elif not re.match(r'https?://', player_url):

1286

player_url = compat_urlparse.urljoin(

1287

'https://www.youtube.com', player_url)

1288

try:

1289

player_id = (player_url, self._signature_cache_id(s))

1290

if player_id not in self._player_cache:

1291

func = self._extract_signature_function(

1292

video_id, player_url, s

1293

)

1294

self._player_cache[player_id] = func

1295

func = self._player_cache[player_id]

1296

if self._downloader.params.get('youtube_print_sig_code'):

1297

self._print_sig_code(func, s)

1298

return func(s)

1299

except Exception as e:

1300

tb = traceback.format_exc()

1301

raise ExtractorError(

1302

'Signature extraction failed: ' + tb, cause=e)

1303

1304

def _get_subtitles(self, video_id, webpage, has_live_chat_replay):

1305

try:

1306

subs_doc = self._download_xml(

1307

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1308

video_id, note=False)

1309

except ExtractorError as err:

1310

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1315

lang = track.attrib['lang_code']

1316

if lang in sub_lang_list:

1317

continue

1318

sub_formats = []

1319

for ext in self._SUBTITLE_FORMATS:

1320

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1325

})

1326

sub_formats.append({

1327

'url': 'https://www.youtube.com/api/timedtext?' + params,

1328

'ext': ext,

1329

})

1330

sub_lang_list[lang] = sub_formats

1331

if has_live_chat_replay:

1332

sub_lang_list['live_chat'] = [

1333

{

1334

'video_id': video_id,

1335

'ext': 'json',

1336

'protocol': 'youtube_live_chat_replay',

1337

},

1338

]

1339

if not sub_lang_list:

1340

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1345

patterns = (

1346

# User data may contain arbitrary character sequences that may affect

1347

# JSON extraction with regex, e.g. when '};' is contained the second

1348

# regex won't capture the whole JSON. Yet working around by trying more

1349

# concrete regex first keeping in mind proper quoted string handling

1350

# to be implemented in future that will replace this workaround (see

1351

# https://github.com/ytdl-org/youtube-dl/issues/7468,

1352

# https://github.com/ytdl-org/youtube-dl/pull/7599)

1353

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1354

r';ytplayer\.config\s*=\s*({.+?});',

1355

)

1356

config = self._search_regex(

1357

patterns, webpage, 'ytplayer.config', default=None)

1358

if config:

1359

return self._parse_json(

1360

uppercase_escape(config), video_id, fatal=False)

1361

1362

def _get_automatic_captions(self, video_id, player_response, player_config):

1363

"""We need the webpage for getting the captions url, pass it as an

1364

argument to speed up the process."""

1365

self.to_screen('%s: Looking for automatic captions' % video_id)

1366

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1367

if not (player_response or player_config):

1368

self._downloader.report_warning(err_msg)

1369

return {}

1370

try:

1371

args = player_config.get('args') if player_config else {}

1372

caption_url = args.get('ttsurl')

1373

if caption_url:

1374

timestamp = args['timestamp']

1375

# We get the available subtitles

1376

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1382

caption_list = self._download_xml(list_url, video_id)

1383

original_lang_node = caption_list.find('track')

1384

if original_lang_node is None:

1385

self._downloader.report_warning('Video doesn\'t have automatic captions')

1386

return {}

1387

original_lang = original_lang_node.attrib['lang_code']

1388

caption_kind = original_lang_node.attrib.get('kind', '')

1389

1390

sub_lang_list = {}

1391

for lang_node in caption_list.findall('target'):

1392

sub_lang = lang_node.attrib['lang_code']

1393

sub_formats = []

1394

for ext in self._SUBTITLE_FORMATS:

1395

params = compat_urllib_parse_urlencode({

1396

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1401

})

1402

sub_formats.append({

1403

'url': caption_url + '&' + params,

1404

'ext': ext,

1405

})

1406

sub_lang_list[sub_lang] = sub_formats

1407

return sub_lang_list

1408

1409

def make_captions(sub_url, sub_langs):

1410

parsed_sub_url = compat_urllib_parse_urlparse(sub_url)

1411

caption_qs = compat_parse_qs(parsed_sub_url.query)

1412

captions = {}

1413

for sub_lang in sub_langs:

1414

sub_formats = []

1415

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(

1421

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

captions[sub_lang] = sub_formats

1427

return captions

1428

1429

# New captions format as of 22.06.2017

1430

if player_response:

1431

renderer = player_response['captions']['playerCaptionsTracklistRenderer']

1432

base_url = renderer['captionTracks'][0]['baseUrl']

1433

sub_lang_list = []

1434

for lang in renderer['translationLanguages']:

1435

lang_code = lang.get('languageCode')

1436

if lang_code:

1437

sub_lang_list.append(lang_code)

1438

return make_captions(base_url, sub_lang_list)

1439

1440

# Some videos don't provide ttsurl but rather caption_tracks and

1441

# caption_translation_languages (e.g. 20LmZk1hakA)

1442

# Does not used anymore as of 22.06.2017

1443

caption_tracks = args['caption_tracks']

1444

caption_translation_languages = args['caption_translation_languages']

1445

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1446

sub_lang_list = []

1447

for lang in caption_translation_languages.split(','):

1448

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1449

sub_lang = lang_qs.get('lc', [None])[0]

1450

if sub_lang:

1451

sub_lang_list.append(sub_lang)

1452

return make_captions(caption_url, sub_lang_list)

1453

# An extractor error can be raise by the download process if there are

1454

# no automatic captions but there are subtitles

1455

except (KeyError, IndexError, ExtractorError):

1456

self._downloader.report_warning(err_msg)

1457

return {}

1458

1459

def _mark_watched(self, video_id, video_info, player_response):

1460

playback_url = url_or_none(try_get(

1461

player_response,

1462

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(

1463

video_info, lambda x: x['videostats_playback_base_url'][0]))

1464

if not playback_url:

1465

return

1466

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1467

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1468

1469

# cpn generation algorithm is reverse engineered from base.js.

1470

# In fact it works even with dummy cpn.

1471

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1472

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1479

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1480

1481

self._download_webpage(

1482

playback_url, video_id, 'Marking watched',

1483

'Unable to mark watched', fatal=False)

1484

1485

@staticmethod

1486

def _extract_urls(webpage):

1487

# Embedded YouTube player

1488

entries = [

1489

unescapeHTML(mobj.group('url'))

1490

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1501

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1502

\1''', webpage)]

1503

1504

# lazyYT YouTube embed

1505

entries.extend(list(map(

1506

unescapeHTML,

1507

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1508

1509

# Wordpress "YouTube Video Importer" plugin

1510

matches = re.findall(r'''(?x)<div[^>]+

1511

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1512

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1513

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1519

urls = YoutubeIE._extract_urls(webpage)

1520

return urls[0] if urls else None

1521

1522

@classmethod

1523

def extract_id(cls, url):

1524

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1525

if mobj is None:

1526

raise ExtractorError('Invalid URL: %s' % url)

1527

video_id = mobj.group(2)

1528

return video_id

1529

1530

def _extract_chapters_from_json(self, webpage, video_id, duration):

1531

if not webpage:

1532

return

1533

data = self._extract_yt_initial_data(video_id, webpage)

1534

if not data or not isinstance(data, dict):

1535

return

1536

chapters_list = try_get(

1537

data,

1538

lambda x: x['playerOverlays']

1539

['playerOverlayRenderer']

1540

['decoratedPlayerBarRenderer']

1541

['decoratedPlayerBarRenderer']

1542

['playerBar']

1543

['chapteredPlayerBarRenderer']

1544

['chapters'],

1545

list)

1546

if not chapters_list:

1547

return

1548

1549

def chapter_time(chapter):

1550

return float_or_none(

1551

try_get(

1552

chapter,

1553

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1558

start_time = chapter_time(chapter)

1559

if start_time is None:

1560

continue

1561

end_time = (chapter_time(chapters_list[next_num])

1562

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1567

compat_str)

1568

chapters.append({

1569

'start_time': start_time,

1570

'end_time': end_time,

'title': title,

})

return chapters

@staticmethod

def _extract_chapters_from_description(description, duration):

1577

if not description:

1578

return None

1579

chapter_lines = re.findall(

1580

r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',

1581

description)

1582

if not chapter_lines:

1583

return None

1584

chapters = []

1585

for next_num, (chapter_line, time_point) in enumerate(

1586

chapter_lines, start=1):

1587

start_time = parse_duration(time_point)

1588

if start_time is None:

1589

continue

1590

if start_time > duration:

1591

break

1592

end_time = (duration if next_num == len(chapter_lines)

1593

else parse_duration(chapter_lines[next_num][1]))

1594

if end_time is None:

1595

continue

1596

if end_time > duration:

1597

end_time = duration

1598

if start_time > end_time:

1599

break

1600

chapter_title = re.sub(

1601

r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')

1602

chapter_title = re.sub(r'\s+', ' ', chapter_title)

1603

chapters.append({

1604

'start_time': start_time,

1605

'end_time': end_time,

1606

'title': chapter_title,

})

return chapters

def _extract_chapters(self, webpage, description, video_id, duration):

1611

return (self._extract_chapters_from_json(webpage, video_id, duration)

1612

or self._extract_chapters_from_description(description, duration))

1613

1614

def _real_extract(self, url):

1615

url, smuggled_data = unsmuggle_url(url, {})

1616

1617

proto = (

1618

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1624

for component in [parsed_url.fragment, parsed_url.query]:

1625

query = compat_parse_qs(component)

1626

if start_time is None and 't' in query:

1627

start_time = parse_duration(query['t'][0])

1628

if start_time is None and 'start' in query:

1629

start_time = parse_duration(query['start'][0])

1630

if end_time is None and 'end' in query:

1631

end_time = parse_duration(query['end'][0])

1632

1633

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1634

mobj = re.search(self._NEXT_URL_RE, url)

1635

if mobj:

1636

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1637

video_id = self.extract_id(url)

1638

1639

# Get video webpage

1640

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1641

video_webpage, urlh = self._download_webpage_handle(url, video_id)

1642

1643

qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)

1644

video_id = qs.get('v', [None])[0] or video_id

1645

1646

# Attempt to extract SWF player URL

1647

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1648

if mobj is not None:

1649

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1656

dash_mpd = video_info.get('dashmpd')

1657

if dash_mpd and dash_mpd[0] not in dash_mpds:

1658

dash_mpds.append(dash_mpd[0])

1659

1660

def add_dash_mpd_pr(pl_response):

1661

dash_mpd = url_or_none(try_get(

1662

pl_response, lambda x: x['streamingData']['dashManifestUrl'],

1663

compat_str))

1664

if dash_mpd and dash_mpd not in dash_mpds:

1665

dash_mpds.append(dash_mpd)

is_live = None

view_count = None

def extract_view_count(v_info):

1671

return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))

1672

1673

def extract_player_response(player_response, video_id):

1674

pl_response = str_or_none(player_response)

1675

if not pl_response:

1676

return

1677

pl_response = self._parse_json(pl_response, video_id, fatal=False)

1678

if isinstance(pl_response, dict):

1679

add_dash_mpd_pr(pl_response)

1680

return pl_response

1681

1682

def extract_embedded_config(embed_webpage, video_id):

1683

embedded_config = self._search_regex(

1684

r'setConfig$({.*})$;',

1685

embed_webpage, 'ytInitialData', default=None)

1686

if embedded_config:

1687

return embedded_config

video_info = {}

player_response = {}

ytplayer_config = None

embed_webpage = None

# Get video info

if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'

1696

or re.search(r'player-age-gate-content">', video_webpage) is not None):

1697

cookie_keys = self._get_cookies('https://www.youtube.com').keys()

1698

age_gate = True

1699

# We simulate the access to the video from www.youtube.com/v/{video_id}

1700

# this can be viewed without login into Youtube

1701

url = proto + '://www.youtube.com/embed/%s' % video_id

1702

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1703

ext = extract_embedded_config(embed_webpage, video_id)

1704

# playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)

1705

playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)

1706

if not playable_in_embed:

1707

self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)

1708

playable_in_embed = ''

1709

else:

1710

playable_in_embed = playable_in_embed.group('playableinEmbed')

1711

# check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)

1712

# if re.search(r'player-unavailable">', embed_webpage) is not None:

1713

if playable_in_embed == 'false':

1714

'''

1715

# TODO apply this patch when Support for Python 2.6(!) and above drops

1716

if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys

1717

or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):

1718

'''

1719

if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)

1720

or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):

1721

age_gate = False

1722

# Try looking directly into the video webpage

1723

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1724

if ytplayer_config:

1725

args = ytplayer_config.get("args")

1726

if args is not None:

1727

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1728

# Convert to the same format returned by compat_parse_qs

1729

video_info = dict((k, [v]) for k, v in args.items())

1730

add_dash_mpd(video_info)

1731

# Rental video is not rented but preview is available (e.g.

1732

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1733

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1734

if not video_info and args.get('ypc_vid'):

1735

return self.url_result(

1736

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1737

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1738

is_live = True

1739

if not player_response:

1740

player_response = extract_player_response(args.get('player_response'), video_id)

1741

elif not player_response:

1742

player_response = ytplayer_config

1743

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1744

add_dash_mpd_pr(player_response)

1745

else:

1746

raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)

1747

else:

1748

data = compat_urllib_parse_urlencode({

1749

'video_id': video_id,

1750

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1751

'sts': self._search_regex(

1752

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1753

})

1754

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1755

try:

1756

video_info_webpage = self._download_webpage(

1757

video_info_url, video_id,

1758

note='Refetching age-gated info webpage',

1759

errnote='unable to download video info webpage')

1760

except ExtractorError:

1761

video_info_webpage = None

1762

if video_info_webpage:

1763

video_info = compat_parse_qs(video_info_webpage)

1764

pl_response = video_info.get('player_response', [None])[0]

1765

player_response = extract_player_response(pl_response, video_id)

1766

add_dash_mpd(video_info)

1767

view_count = extract_view_count(video_info)

1768

else:

1769

age_gate = False

1770

# Try looking directly into the video webpage

1771

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1772

if ytplayer_config:

1773

args = ytplayer_config.get('args', {})

1774

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1775

# Convert to the same format returned by compat_parse_qs

1776

video_info = dict((k, [v]) for k, v in args.items())

1777

add_dash_mpd(video_info)

1778

# Rental video is not rented but preview is available (e.g.

1779

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1780

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1781

if not video_info and args.get('ypc_vid'):

1782

return self.url_result(

1783

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1784

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1785

is_live = True

1786

if not player_response:

1787

player_response = extract_player_response(args.get('player_response'), video_id)

1788

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1789

add_dash_mpd_pr(player_response)

1790

1791

if not video_info and not player_response:

1792

player_response = extract_player_response(

1793

self._search_regex(

1794

(r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),

1795

self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,

1796

'initial player response', default='{}'),

1797

video_id)

1798

1799

def extract_unavailable_message():

1800

messages = []

1801

for tag, kind in (('h1', 'message'), ('div', 'submessage')):

1802

msg = self._html_search_regex(

1803

r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),

1804

video_webpage, 'unavailable %s' % kind, default=None)

if msg:

messages.append(msg)

if messages:

return '\n'.join(messages)

1809

1810

if not video_info and not player_response:

1811

unavailable_message = extract_unavailable_message()

1812

if not unavailable_message:

1813

unavailable_message = 'Unable to extract video data'

1814

raise ExtractorError(

1815

'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)

1816

1817

if not isinstance(video_info, dict):

1818

video_info = {}

1819

1820

video_details = try_get(

1821

player_response, lambda x: x['videoDetails'], dict) or {}

1822

1823

microformat = try_get(

1824

player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}

1825

1826

video_title = video_info.get('title', [None])[0] or video_details.get('title')

1827

if not video_title:

1828

self._downloader.report_warning('Unable to extract video title')

1829

video_title = '_'

1830

1831

description_original = video_description = get_element_by_id("eow-description", video_webpage)

1832

if video_description:

1833

1834

def replace_url(m):

1835

redir_url = compat_urlparse.urljoin(url, m.group(1))

1836

parsed_redir_url = compat_urllib_parse_urlparse(redir_url)

1837

if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':

1838

qs = compat_parse_qs(parsed_redir_url.query)

q = qs.get('q')

if q and q[0]:

return q[0]

return redir_url

description_original = video_description = re.sub(r'''(?x)

1845

<a\s+

1846

(?:[a-zA-Z-]+="[^"]*"\s+)*?

1847

(?:title|href)="([^"]+)"\s+

1848

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', replace_url, video_description)

1853

video_description = clean_html(video_description)

1854

else:

1855

video_description = video_details.get('shortDescription')

1856

if video_description is None:

1857

video_description = self._html_search_meta('description', video_webpage)

1858

1859

if not smuggled_data.get('force_singlefeed', False):

1860

if not self._downloader.params.get('noplaylist'):

1861

multifeed_metadata_list = try_get(

1862

player_response,

1863

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

1864

compat_str) or try_get(

1865

video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)

1866

if multifeed_metadata_list:

1867

entries = []

1868

feed_ids = []

1869

for feed in multifeed_metadata_list.split(','):

1870

# Unquote should take place before split on comma (,) since textual

1871

# fields may contain comma as well (see

1872

# https://github.com/ytdl-org/youtube-dl/issues/8536)

1873

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1874

1875

def feed_entry(name):

1876

return try_get(feed_data, lambda x: x[name][0], compat_str)

1877

1878

feed_id = feed_entry('id')

1879

if not feed_id:

1880

continue

1881

feed_title = feed_entry('title')

1882

title = video_title

1883

if feed_title:

1884

title += ' (%s)' % feed_title

1885

entries.append({

1886

'_type': 'url_transparent',

1887

'ie_key': 'Youtube',

1888

'url': smuggle_url(

1889

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1890

{'force_singlefeed': True}),

1891

'title': title,

1892

})

1893

feed_ids.append(feed_id)

1894

self.to_screen(

1895

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1896

% (', '.join(feed_ids), video_id))

1897

return self.playlist_result(entries, video_id, video_title, video_description)

1898

else:

1899

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1900

1901

if view_count is None:

1902

view_count = extract_view_count(video_info)

1903

if view_count is None and video_details:

1904

view_count = int_or_none(video_details.get('viewCount'))

1905

if view_count is None and microformat:

1906

view_count = int_or_none(microformat.get('viewCount'))

1907

1908

if is_live is None:

1909

is_live = bool_or_none(video_details.get('isLive'))

1910

1911

has_live_chat_replay = False

1912

if not is_live:

1913

yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)

1914

try:

1915

yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

1916

has_live_chat_replay = True

1917

except (KeyError, IndexError, TypeError):

1918

pass

1919

1920

# Check for "rental" videos

1921

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

1922

raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)

1923

1924

def _extract_filesize(media_url):

1925

return int_or_none(self._search_regex(

1926

r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

1927

1928

streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []

1929

streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])

1930

1931

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

1932

self.report_rtmp_download()

1933

formats = [{

1934

'format_id': '_rtmp',

1935

'protocol': 'rtmp',

1936

'url': video_info['conn'][0],

1937

'player_url': player_url,

1938

}]

1939

elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):

1940

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

1941

if 'rtmpe%3Dyes' in encoded_url_map:

1942

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)

1943

formats = []

1944

formats_spec = {}

1945

fmt_list = video_info.get('fmt_list', [''])[0]

1946

if fmt_list:

1947

for fmt in fmt_list.split(','):

1948

spec = fmt.split('/')

1949

if len(spec) > 1:

1950

width_height = spec[1].split('x')

1951

if len(width_height) == 2:

1952

formats_spec[spec[0]] = {

1953

'resolution': spec[1],

1954

'width': int_or_none(width_height[0]),

1955

'height': int_or_none(width_height[1]),

1956

}

1957

for fmt in streaming_formats:

1958

itag = str_or_none(fmt.get('itag'))

1959

if not itag:

1960

continue

1961

quality = fmt.get('quality')

1962

quality_label = fmt.get('qualityLabel') or quality

1963

formats_spec[itag] = {

1964

'asr': int_or_none(fmt.get('audioSampleRate')),

1965

'filesize': int_or_none(fmt.get('contentLength')),

1966

'format_note': quality_label,

1967

'fps': int_or_none(fmt.get('fps')),

1968

'height': int_or_none(fmt.get('height')),

1969

# bitrate for itag 43 is always 2147483647

1970

'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,

1971

'width': int_or_none(fmt.get('width')),

1972

}

1973

1974

for fmt in streaming_formats:

1975

if fmt.get('drmFamilies') or fmt.get('drm_families'):

1976

continue

1977

url = url_or_none(fmt.get('url'))

1978

1979

if not url:

1980

cipher = fmt.get('cipher') or fmt.get('signatureCipher')

1981

if not cipher:

1982

continue

1983

url_data = compat_parse_qs(cipher)

1984

url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))

if not url:

continue

else:

cipher = None

url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

1990

1991

stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))

1992

# Unsupported FORMAT_STREAM_TYPE_OTF

if stream_type == 3:

continue

format_id = fmt.get('itag') or url_data['itag'][0]

1997

if not format_id:

1998

continue

1999

format_id = compat_str(format_id)

2000

2001

if cipher:

2002

if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):

2003

ASSETS_RE = (

2004

r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',

2005

r'"jsUrl"\s*:\s*("[^"]+")',

2006

r'"assets":.+?"js":\s*("[^"]+")')

2007

jsplayer_url_json = self._search_regex(

2008

ASSETS_RE,

2009

embed_webpage if age_gate else video_webpage,

2010

'JS player URL (1)', default=None)

2011

if not jsplayer_url_json and not age_gate:

2012

# We need the embed website after all

2013

if embed_webpage is None:

2014

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

2015

embed_webpage = self._download_webpage(

2016

embed_url, video_id, 'Downloading embed webpage')

2017

jsplayer_url_json = self._search_regex(

2018

ASSETS_RE, embed_webpage, 'JS player URL')

2019

2020

player_url = json.loads(jsplayer_url_json)

2021

if player_url is None:

2022

player_url_json = self._search_regex(

2023

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

2024

video_webpage, 'age gate player URL')

2025

player_url = json.loads(player_url_json)

2026

2027

if 'sig' in url_data:

2028

url += '&signature=' + url_data['sig'][0]

2029

elif 's' in url_data:

2030

encrypted_sig = url_data['s'][0]

2031

2032

if self._downloader.params.get('verbose'):

2033

if player_url is None:

2034

player_desc = 'unknown'

2035

else:

2036

player_type, player_version = self._extract_player_info(player_url)

2037

player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)

2038

parts_sizes = self._signature_cache_id(encrypted_sig)

2039

self.to_screen('{%s} signature length %s, %s' %

2040

(format_id, parts_sizes, player_desc))

2041

2042

signature = self._decrypt_signature(

2043

encrypted_sig, video_id, player_url, age_gate)

2044

sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'

2045

url += '&%s=%s' % (sp, signature)

2046

if 'ratebypass' not in url:

2047

url += '&ratebypass=yes'

2048

2049

dct = {

2050

'format_id': format_id,

2051

'url': url,

2052

'player_url': player_url,

2053

}

2054

if format_id in self._formats:

2055

dct.update(self._formats[format_id])

2056

if format_id in formats_spec:

2057

dct.update(formats_spec[format_id])

2058

2059

# Some itags are not included in DASH manifest thus corresponding formats will

2060

# lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).

2061

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

2062

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

2063

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

2064

2065

if width is None:

2066

width = int_or_none(fmt.get('width'))

2067

if height is None:

2068

height = int_or_none(fmt.get('height'))

2069

2070

filesize = int_or_none(url_data.get(

2071

'clen', [None])[0]) or _extract_filesize(url)

2072

2073

quality = url_data.get('quality', [None])[0] or fmt.get('quality')

2074

quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')

2075

2076

tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)

2077

or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None

2078

fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

2079

2080

more_fields = {

2081

'filesize': filesize,

'tbr': tbr,

'width': width,

'height': height,

'fps': fps,

'format_note': quality_label or quality,

2087

}

2088

for key, value in more_fields.items():

2089

if value:

2090

dct[key] = value

2091

type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')

2092

if type_:

2093

type_split = type_.split(';')

2094

kind_ext = type_split[0].split('/')

2095

if len(kind_ext) == 2:

2096

kind, _ = kind_ext

2097

dct['ext'] = mimetype2ext(type_split[0])

2098

if kind in ('audio', 'video'):

2099

codecs = None

2100

for mobj in re.finditer(

2101

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

2102

if mobj.group('key') == 'codecs':

2103

codecs = mobj.group('val')

2104

break

2105

if codecs:

2106

dct.update(parse_codecs(codecs))

2107

if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':

2108

dct['downloader_options'] = {

2109

# Youtube throttles chunks >~10M

2110

'http_chunk_size': 10485760,

}

formats.append(dct)

else:

manifest_url = (

url_or_none(try_get(

player_response,

lambda x: x['streamingData']['hlsManifestUrl'],

2118

compat_str))

2119

or url_or_none(try_get(

2120

video_info, lambda x: x['hlsvp'][0], compat_str)))

2121

if manifest_url:

2122

formats = []

2123

m3u8_formats = self._extract_m3u8_formats(

2124

manifest_url, video_id, 'mp4', fatal=False)

2125

for a_format in m3u8_formats:

2126

itag = self._search_regex(

2127

r'/itag/(\d+)/', a_format['url'], 'itag', default=None)

2128

if itag:

2129

a_format['format_id'] = itag

2130

if itag in self._formats:

2131

dct = self._formats[itag].copy()

2132

dct.update(a_format)

2133

a_format = dct

2134

a_format['player_url'] = player_url

2135

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

2136

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

2137

if self._downloader.params.get('youtube_include_hls_manifest', True):

2138

formats.append(a_format)

2139

else:

2140

error_message = extract_unavailable_message()

2141

if not error_message:

2142

reason_list = try_get(

2143

player_response,

2144

lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],

2145

list) or []

2146

for reason in reason_list:

2147

if not isinstance(reason, dict):

2148

continue

2149

reason_text = try_get(reason, lambda x: x['text'], compat_str)

2150

if reason_text:

2151

if not error_message:

2152

error_message = ''

2153

error_message += reason_text

2154

if error_message:

2155

error_message = clean_html(error_message)

2156

if not error_message:

2157

error_message = clean_html(try_get(

2158

player_response, lambda x: x['playabilityStatus']['reason'],

2159

compat_str))

2160

if not error_message:

2161

error_message = clean_html(

2162

try_get(video_info, lambda x: x['reason'][0], compat_str))

2163

if error_message:

2164

raise ExtractorError(error_message, expected=True)

2165

raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')

2166

2167

# uploader

2168

video_uploader = try_get(

2169

video_info, lambda x: x['author'][0],

2170

compat_str) or str_or_none(video_details.get('author'))

2171

if video_uploader:

2172

video_uploader = compat_urllib_parse_unquote_plus(video_uploader)

2173

else:

2174

self._downloader.report_warning('unable to extract uploader name')

2175

2176

# uploader_id

2177

video_uploader_id = None

2178

video_uploader_url = None

2179

mobj = re.search(

2180

r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

2181

video_webpage)

2182

if mobj is not None:

2183

video_uploader_id = mobj.group('uploader_id')

2184

video_uploader_url = mobj.group('uploader_url')

2185

else:

2186

owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))

2187

if owner_profile_url:

2188

video_uploader_id = self._search_regex(

2189

r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',

2190

default=None)

2191

video_uploader_url = owner_profile_url

2192

2193

channel_id = (

2194

str_or_none(video_details.get('channelId'))

2195

or self._html_search_meta(

2196

'channelId', video_webpage, 'channel id', default=None)

2197

or self._search_regex(

2198

r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',

2199

video_webpage, 'channel id', default=None, group='id'))

2200

channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None

2201

2202

thumbnails = []

2203

thumbnails_list = try_get(

2204

video_details, lambda x: x['thumbnail']['thumbnails'], list) or []

2205

for t in thumbnails_list:

2206

if not isinstance(t, dict):

2207

continue

2208

thumbnail_url = url_or_none(t.get('url'))

2209

if not thumbnail_url:

2210

continue

2211

thumbnails.append({

2212

'url': thumbnail_url,

2213

'width': int_or_none(t.get('width')),

2214

'height': int_or_none(t.get('height')),

})

if not thumbnails:

video_thumbnail = None

2219

# We try first to get a high quality image:

2220

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

2221

video_webpage, re.DOTALL)

2222

if m_thumb is not None:

2223

video_thumbnail = m_thumb.group(1)

2224

thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)

2225

if thumbnail_url:

2226

video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)

2227

if video_thumbnail:

2228

thumbnails.append({'url': video_thumbnail})

2229

2230

# upload date

2231

upload_date = self._html_search_meta(

2232

'datePublished', video_webpage, 'upload date', default=None)

2233

if not upload_date:

2234

upload_date = self._search_regex(

2235

[r'(?s)id="eow-date.*?>(.*?)</span>',

2236

r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],

2237

video_webpage, 'upload date', default=None)

2238

if not upload_date:

2239

upload_date = microformat.get('publishDate') or microformat.get('uploadDate')

2240

upload_date = unified_strdate(upload_date)

2241

2242

video_license = self._html_search_regex(

2243

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

2244

video_webpage, 'license', default=None)

m_music = re.search(

r'''(?x)

<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*

<ul[^>]*>\s*

<li>(?P<title>.+?)

by (?P<creator>.+?)

(?:

$.+?$|

<a[^>]*

(?:

\bhref=["\']/red[^>]*>| # drop possible

2257

>\s*Listen ad-free with YouTube Red # YouTube Red ad

)

.*?

)?</li

''',

video_webpage)

if m_music:

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

2265

video_creator = clean_html(m_music.group('creator'))

2266

else:

2267

video_alt_title = video_creator = None

2268

2269

def extract_meta(field):

2270

return self._html_search_regex(

2271

r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,

2272

video_webpage, field, default=None)

2273

2274

track = extract_meta('Song')

2275

artist = extract_meta('Artist')

2276

album = extract_meta('Album')

2277

2278

# Youtube Music Auto-generated description

2279

release_date = release_year = None

2280

if video_description:

2281

mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)

2282

if mobj:

2283

if not track:

2284

track = mobj.group('track').strip()

2285

if not artist:

2286

artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))

2287

if not album:

2288

album = mobj.group('album'.strip())

2289

release_year = mobj.group('release_year')

2290

release_date = mobj.group('release_date')

2291

if release_date:

2292

release_date = release_date.replace('-', '')

2293

if not release_year:

2294

release_year = int(release_date[:4])

2295

if release_year:

2296

release_year = int(release_year)

2297

2298

yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)

2299

contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []

2300

for content in contents:

2301

rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []

2302

multiple_songs = False

2303

for row in rows:

2304

if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:

2305

multiple_songs = True

2306

break

2307

for row in rows:

2308

mrr = row.get('metadataRowRenderer') or {}

2309

mrr_title = try_get(

2310

mrr, lambda x: x['title']['simpleText'], compat_str)

2311

mrr_contents = try_get(

2312

mrr, lambda x: x['contents'][0], dict) or {}

2313

mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)

2314

if not (mrr_title and mrr_contents_text):

2315

continue

2316

if mrr_title == 'License':

2317

video_license = mrr_contents_text

2318

elif not multiple_songs:

2319

if mrr_title == 'Album':

2320

album = mrr_contents_text

2321

elif mrr_title == 'Artist':

2322

artist = mrr_contents_text

2323

elif mrr_title == 'Song':

2324

track = mrr_contents_text

2325

2326

m_episode = re.search(

2327

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

2328

video_webpage)

2329

if m_episode:

2330

series = unescapeHTML(m_episode.group('series'))

2331

season_number = int(m_episode.group('season'))

2332

episode_number = int(m_episode.group('episode'))

2333

else:

2334

series = season_number = episode_number = None

2335

2336

m_cat_container = self._search_regex(

2337

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

2338

video_webpage, 'categories', default=None)

2339

category = None

2340

if m_cat_container:

2341

category = self._html_search_regex(

2342

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

default=None)

if not category:

category = try_get(

microformat, lambda x: x['category'], compat_str)

2347

video_categories = None if category is None else [category]

2348

2349

video_tags = [

2350

unescapeHTML(m.group('content'))

2351

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

2352

if not video_tags:

2353

video_tags = try_get(video_details, lambda x: x['keywords'], list)

2354

2355

def _extract_count(count_name):

2356

return str_to_int(self._search_regex(

2357

(r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),

2358

r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),

2359

video_webpage, count_name, default=None))

2360

2361

like_count = _extract_count('like')

2362

dislike_count = _extract_count('dislike')

2363

2364

if view_count is None:

2365

view_count = str_to_int(self._search_regex(

2366

r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,

2367

'view count', default=None))

2368

2369

average_rating = (

2370

float_or_none(video_details.get('averageRating'))

2371

or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))

2372

2373

# subtitles

2374

video_subtitles = self.extract_subtitles(

2375

video_id, video_webpage, has_live_chat_replay)

2376

automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)

2377

2378

video_duration = try_get(

2379

video_info, lambda x: int_or_none(x['length_seconds'][0]))

2380

if not video_duration:

2381

video_duration = int_or_none(video_details.get('lengthSeconds'))

2382

if not video_duration:

2383

video_duration = parse_duration(self._html_search_meta(

2384

'duration', video_webpage, 'video duration'))

2385

2386

# Get Subscriber Count of channel

2387

subscriber_count = parse_count(self._search_regex(

2388

r'"text":"([\d\.]+\w?) subscribers"',

video_webpage,

'subscriber count',

default=None

))

# annotations

video_annotations = None

2396

if self._downloader.params.get('writeannotations', False):

2397

xsrf_token = None

2398

ytcfg = self._extract_ytcfg(video_id, video_webpage)

2399

if ytcfg:

2400

xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)

2401

if not xsrf_token:

2402

xsrf_token = self._search_regex(

2403

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',

2404

video_webpage, 'xsrf token', group='xsrf_token', fatal=False)

2405

invideo_url = try_get(

2406

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

2407

if xsrf_token and invideo_url:

2408

xsrf_field_name = None

2409

if ytcfg:

2410

xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)

2411

if not xsrf_field_name:

2412

xsrf_field_name = self._search_regex(

2413

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

2414

video_webpage, 'xsrf field name',

2415

group='xsrf_field_name', default='session_token')

2416

video_annotations = self._download_webpage(

2417

self._proto_relative_url(invideo_url),

2418

video_id, note='Downloading annotations',

2419

errnote='Unable to download video annotations', fatal=False,

2420

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

2421

2422

chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)

2423

2424

# Look for the DASH manifest

2425

if self._downloader.params.get('youtube_include_dash_manifest', True):

2426

dash_mpd_fatal = True

2427

for mpd_url in dash_mpds:

2428

dash_formats = {}

2429

try:

2430

def decrypt_sig(mobj):

2431

s = mobj.group(1)

2432

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

2433

return '/signature/%s' % dec_s

2434

2435

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

2436

2437

for df in self._extract_mpd_formats(

2438

mpd_url, video_id, fatal=dash_mpd_fatal,

2439

formats_dict=self._formats):

2440

if not df.get('filesize'):

2441

df['filesize'] = _extract_filesize(df['url'])

2442

# Do not overwrite DASH format found in some previous DASH manifest

2443

if df['format_id'] not in dash_formats:

2444

dash_formats[df['format_id']] = df

2445

# Additional DASH manifests may end up in HTTP Error 403 therefore

2446

# allow them to fail without bug report message if we already have

2447

# some DASH manifest succeeded. This is temporary workaround to reduce

2448

# burst of bug reports until we figure out the reason and whether it

2449

# can be fixed at all.

2450

dash_mpd_fatal = False

2451

except (ExtractorError, KeyError) as e:

2452

self.report_warning(

2453

'Skipping DASH manifest: %r' % e, video_id)

2454

if dash_formats:

2455

# Remove the formats we found through non-DASH, they

2456

# contain less info and it can be wrong, because we use

2457

# fixed values (for example the resolution). See

2458

# https://github.com/ytdl-org/youtube-dl/issues/5774 for an

2459

# example.

2460

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

2461

formats.extend(dash_formats.values())

2462

2463

# Check for malformed aspect ratio

2464

stretched_m = re.search(

2465

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

2466

video_webpage)

2467

if stretched_m:

2468

w = float(stretched_m.group('w'))

2469

h = float(stretched_m.group('h'))

2470

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

2471

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2476

f['stretched_ratio'] = ratio

2477

2478

if not formats:

2479

if 'reason' in video_info:

2480

if 'The uploader has not made this video available in your country.' in video_info['reason']:

2481

regions_allowed = self._html_search_meta(

2482

'regionsAllowed', video_webpage, default=None)

2483

countries = regions_allowed.split(',') if regions_allowed else None

2484

self.raise_geo_restricted(

2485

msg=video_info['reason'][0], countries=countries)

2486

reason = video_info['reason'][0]

2487

if 'Invalid parameters' in reason:

2488

unavailable_message = extract_unavailable_message()

2489

if unavailable_message:

2490

reason = unavailable_message

2491

raise ExtractorError(

2492

'YouTube said: %s' % reason,

2493

expected=True, video_id=video_id)

2494

if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):

2495

raise ExtractorError('This video is DRM protected.', expected=True)

2496

2497

self._sort_formats(formats)

2498

2499

self.mark_watched(video_id, video_info, player_response)

return {

'id': video_id,

'uploader': video_uploader,

2504

'uploader_id': video_uploader_id,

2505

'uploader_url': video_uploader_url,

2506

'channel_id': channel_id,

2507

'channel_url': channel_url,

2508

'upload_date': upload_date,

2509

'license': video_license,

2510

'creator': video_creator or artist,

2511

'title': video_title,

2512

'alt_title': video_alt_title or track,

2513

'thumbnails': thumbnails,

2514

'description': video_description,

2515

'categories': video_categories,

2516

'tags': video_tags,

2517

'subtitles': video_subtitles,

2518

'automatic_captions': automatic_captions,

2519

'duration': video_duration,

2520

'age_limit': 18 if age_gate else 0,

2521

'annotations': video_annotations,

2522

'chapters': chapters,

2523

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

2524

'view_count': view_count,

2525

'like_count': like_count,

2526

'dislike_count': dislike_count,

2527

'average_rating': average_rating,

2528

'formats': formats,

2529

'is_live': is_live,

2530

'start_time': start_time,

2531

'end_time': end_time,

2532

'series': series,

2533

'season_number': season_number,

2534

'episode_number': episode_number,

'track': track,

'artist': artist,

'album': album,

'release_date': release_date,

2539

'release_year': release_year,

2540

'subscriber_count': subscriber_count,

}

class YoutubeTabIE(YoutubeBaseInfoExtractor):

2545

IE_DESC = 'YouTube.com tab'

2546

_VALID_URL = r'''(?x)

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

invidio\.us

)/

(?:

(?:channel|c|user)/|

(?P<not_channel>

feed/|

(?:playlist|watch)\?.*?\blist=

2558

)|

2559

(?!(?:%s)\b) # Direct URLs

2560

)

2561

(?P<id>[^/?\#&]+)

2562

''' % YoutubeBaseInfoExtractor._RESERVED_NAMES

2563

IE_NAME = 'youtube:tab'

2564

2565

_TESTS = [{

2566

# playlists, multipage

2567

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

2568

'playlist_mincount': 94,

2569

'info_dict': {

2570

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2571

'title': 'Игорь Клейнер - Playlists',

2572

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2573

},

2574

}, {

2575

# playlists, multipage, different order

2576

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

2577

'playlist_mincount': 94,

2578

'info_dict': {

2579

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2580

'title': 'Игорь Клейнер - Playlists',

2581

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2582

},

2583

}, {

2584

# playlists, singlepage

2585

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

2586

'playlist_mincount': 4,

2587

'info_dict': {

2588

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

2589

'title': 'ThirstForScience - Playlists',

2590

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

2591

}

2592

}, {

2593

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

2594

'only_matching': True,

2595

}, {

2596

# basic, single video playlist

2597

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2598

'info_dict': {

2599

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2600

'uploader': 'Sergey M.',

2601

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2602

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

# empty playlist

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2608

'info_dict': {

2609

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2610

'uploader': 'Sergey M.',

2611

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2612

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

# Home tab

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

2618

'info_dict': {

2619

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2620

'title': 'lex will - Home',

2621

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2622

},

2623

'playlist_mincount': 2,

2624

}, {

2625

# Videos tab

2626

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

2627

'info_dict': {

2628

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2629

'title': 'lex will - Videos',

2630

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2631

},

2632

'playlist_mincount': 975,

2633

}, {

2634

# Videos tab, sorted by popular

2635

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

2636

'info_dict': {

2637

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2638

'title': 'lex will - Videos',

2639

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2640

},

2641

'playlist_mincount': 199,

2642

}, {

2643

# Playlists tab

2644

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

2645

'info_dict': {

2646

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2647

'title': 'lex will - Playlists',

2648

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2649

},

2650

'playlist_mincount': 17,

2651

}, {

2652

# Community tab

2653

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

2654

'info_dict': {

2655

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2656

'title': 'lex will - Community',

2657

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2658

},

2659

'playlist_mincount': 18,

2660

}, {

2661

# Channels tab

2662

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

2663

'info_dict': {

2664

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2665

'title': 'lex will - Channels',

2666

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2667

},

2668

'playlist_mincount': 138,

2669

}, {

2670

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2671

'only_matching': True,

2672

}, {

2673

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2674

'only_matching': True,

2675

}, {

2676

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2677

'only_matching': True,

2678

}, {

2679

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

2680

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2681

'info_dict': {

2682

'title': '29C3: Not my department',

2683

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2684

'uploader': 'Christiaan008',

2685

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

2686

},

2687

'playlist_count': 96,

2688

}, {

2689

'note': 'Large playlist',

2690

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

2691

'info_dict': {

2692

'title': 'Uploads from Cauchemar',

2693

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

2694

'uploader': 'Cauchemar',

2695

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

2696

},

2697

'playlist_mincount': 1123,

2698

}, {

2699

# even larger playlist, 8832 videos

2700

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

2701

'only_matching': True,

2702

}, {

2703

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

2704

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

2705

'info_dict': {

2706

'title': 'Uploads from Interstellar Movie',

2707

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

2708

'uploader': 'Interstellar Movie',

2709

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

2710

},

2711

'playlist_mincount': 21,

2712

}, {

2713

# https://github.com/ytdl-org/youtube-dl/issues/21844

2714

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2715

'info_dict': {

2716

'title': 'Data Analysis with Dr Mike Pound',

2717

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2718

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

2719

'uploader': 'Computerphile',

2720

},

2721

'playlist_mincount': 11,

2722

}, {

2723

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2724

'only_matching': True,

2725

}, {

2726

# Playlist URL that does not actually serve a playlist

2727

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

2732

'uploader': 'STREEM',

2733

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

2734

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

2735

'upload_date': '20150526',

2736

'license': 'Standard YouTube License',

2737

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

2738

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

2743

},

2744

'params': {

2745

'skip_download': True,

2746

},

2747

'skip': 'This video is not available.',

2748

'add_ie': [YoutubeIE.ie_key()],

2749

}, {

2750

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

2751

'only_matching': True,

2752

}, {

2753

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

2754

'only_matching': True,

2755

}, {

2756

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

'info_dict': {

'id': '9Auq9mYxFEE',

'ext': 'mp4',

'title': 'Watch Sky News live',

2761

'uploader': 'Sky News',

2762

'uploader_id': 'skynews',

2763

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

2764

'upload_date': '20191102',

2765

'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',

2766

'categories': ['News & Politics'],

2767

'tags': list,

2768

'like_count': int,

2769

'dislike_count': int,

2770

},

2771

'params': {

2772

'skip_download': True,

2773

},

2774

}, {

2775

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

2780

'uploader': 'The Young Turks',

2781

'uploader_id': 'TheYoungTurks',

2782

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

2783

'upload_date': '20150715',

2784

'license': 'Standard YouTube License',

2785

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

2786

'categories': ['News & Politics'],

2787

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

2788

'like_count': int,

2789

'dislike_count': int,

2790

},

2791

'params': {

2792

'skip_download': True,

2793

},

2794

'only_matching': True,

2795

}, {

2796

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

2797

'only_matching': True,

2798

}, {

2799

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

2800

'only_matching': True,

2801

}, {

2802

'url': 'https://www.youtube.com/feed/trending',

2803

'only_matching': True,

2804

}, {

2805

# needs auth

2806

'url': 'https://www.youtube.com/feed/library',

2807

'only_matching': True,

2808

}, {

2809

# needs auth

2810

'url': 'https://www.youtube.com/feed/history',

2811

'only_matching': True,

2812

}, {

2813

# needs auth

2814

'url': 'https://www.youtube.com/feed/subscriptions',

2815

'only_matching': True,

2816

}, {

2817

# needs auth

2818

'url': 'https://www.youtube.com/feed/watch_later',

2819

'only_matching': True,

2820

}, {

2821

# no longer available?

2822

'url': 'https://www.youtube.com/feed/recommended',

2823

'only_matching': True,

2824

}, {

2825

# inline playlist with not always working continuations

2826

'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',

2827

'only_matching': True,

2828

}, {

2829

'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',

2830

'only_matching': True,

2831

}, {

2832

'url': 'https://www.youtube.com/course',

2833

'only_matching': True,

2834

}, {

2835

'url': 'https://www.youtube.com/zsecurity',

2836

'only_matching': True,

2837

}, {

2838

'url': 'http://www.youtube.com/NASAgovVideo/videos',

2839

'only_matching': True,

2840

}, {

2841

'url': 'https://www.youtube.com/TheYoungTurks/live',

2842

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

2847

return False if YoutubeIE.suitable(url) else super(

2848

YoutubeTabIE, cls).suitable(url)

2849

2850

def _extract_channel_id(self, webpage):

2851

channel_id = self._html_search_meta(

2852

'channelId', webpage, 'channel id', default=None)

2853

if channel_id:

2854

return channel_id

2855

channel_url = self._html_search_meta(

2856

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

2857

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

2858

'twitter:app:url:googleplay'), webpage, 'channel url')

2859

return self._search_regex(

2860

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

2861

channel_url, 'channel id')

2862

2863

@staticmethod

2864

def _extract_grid_item_renderer(item):

2865

for item_kind in ('Playlist', 'Video', 'Channel'):

2866

renderer = item.get('grid%sRenderer' % item_kind)

if renderer:

return renderer

def _extract_video(self, renderer):

2871

video_id = renderer.get('videoId')

2872

title = try_get(

2873

renderer,

2874

(lambda x: x['title']['runs'][0]['text'],

2875

lambda x: x['title']['simpleText']), compat_str)

2876

description = try_get(

2877

renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],

2878

compat_str)

2879

duration = parse_duration(try_get(

2880

renderer, lambda x: x['lengthText']['simpleText'], compat_str))

2881

view_count_text = try_get(

2882

renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

2883

view_count = str_to_int(self._search_regex(

2884

r'^([\d,]+)', re.sub(r'\s', '', view_count_text),

2885

'view count', default=None))

2886

uploader = try_get(

2887

renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

2888

return {

2889

'_type': 'url_transparent',

2890

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

2895

'duration': duration,

2896

'view_count': view_count,

2897

'uploader': uploader,

2898

}

2899

2900

def _grid_entries(self, grid_renderer):

2901

for item in grid_renderer['items']:

2902

if not isinstance(item, dict):

2903

continue

2904

renderer = self._extract_grid_item_renderer(item)

2905

if not isinstance(renderer, dict):

2906

continue

2907

title = try_get(

2908

renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

2909

# playlist

2910

playlist_id = renderer.get('playlistId')

2911

if playlist_id:

2912

yield self.url_result(

2913

'https://www.youtube.com/playlist?list=%s' % playlist_id,

2914

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

2915

video_title=title)

2916

# video

2917

video_id = renderer.get('videoId')

2918

if video_id:

2919

yield self._extract_video(renderer)

2920

# channel

2921

channel_id = renderer.get('channelId')

2922

if channel_id:

2923

title = try_get(

2924

renderer, lambda x: x['title']['simpleText'], compat_str)

2925

yield self.url_result(

2926

'https://www.youtube.com/channel/%s' % channel_id,

2927

ie=YoutubeTabIE.ie_key(), video_title=title)

2928

2929

def _shelf_entries_from_content(self, shelf_renderer):

2930

content = shelf_renderer.get('content')

2931

if not isinstance(content, dict):

2932

return

2933

renderer = content.get('gridRenderer')

2934

if renderer:

2935

# TODO: add support for nested playlists so each shelf is processed

2936

# as separate playlist

2937

# TODO: this includes only first N items

2938

for entry in self._grid_entries(renderer):

2939

yield entry

2940

renderer = content.get('horizontalListRenderer')

if renderer:

# TODO

pass

def _shelf_entries(self, shelf_renderer, skip_channels=False):

2946

ep = try_get(

2947

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

2948

compat_str)

2949

shelf_url = urljoin('https://www.youtube.com', ep)

2950

if shelf_url:

2951

# Skipping links to another channels, note that checking for

2952

# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL

2953

# will not work

2954

if skip_channels and '/channels?' in shelf_url:

2955

return

2956

title = try_get(

2957

shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

2958

yield self.url_result(shelf_url, video_title=title)

2959

# Shelf may not contain shelf URL, fallback to extraction from content

2960

for entry in self._shelf_entries_from_content(shelf_renderer):

2961

yield entry

2962

2963

def _playlist_entries(self, video_list_renderer):

2964

for content in video_list_renderer['contents']:

2965

if not isinstance(content, dict):

2966

continue

2967

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

2968

if not isinstance(renderer, dict):

2969

continue

2970

video_id = renderer.get('videoId')

2971

if not video_id:

2972

continue

2973

yield self._extract_video(renderer)

2974

2975

r""" # Not needed in the new implementation

2976

def _itemSection_entries(self, item_sect_renderer):

2977

for content in item_sect_renderer['contents']:

2978

if not isinstance(content, dict):

2979

continue

2980

renderer = content.get('videoRenderer', {})

2981

if not isinstance(renderer, dict):

2982

continue

2983

video_id = renderer.get('videoId')

2984

if not video_id:

2985

continue

2986

yield self._extract_video(renderer)

2987

"""

2988

2989

def _rich_entries(self, rich_grid_renderer):

2990

renderer = try_get(

2991

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

2992

video_id = renderer.get('videoId')

2993

if not video_id:

2994

return

2995

yield self._extract_video(renderer)

2996

2997

def _video_entry(self, video_renderer):

2998

video_id = video_renderer.get('videoId')

2999

if video_id:

3000

return self._extract_video(video_renderer)

3001

3002

def _post_thread_entries(self, post_thread_renderer):

3003

post_renderer = try_get(

3004

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

3005

if not post_renderer:

3006

return

3007

# video attachment

3008

video_renderer = try_get(

3009

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)

3010

video_id = None

3011

if video_renderer:

3012

entry = self._video_entry(video_renderer)

if entry:

yield entry

# inline video links

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

3017

for run in runs:

3018

if not isinstance(run, dict):

3019

continue

3020

ep_url = try_get(

3021

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

3022

if not ep_url:

3023

continue

3024

if not YoutubeIE.suitable(ep_url):

3025

continue

3026

ep_video_id = YoutubeIE._match_id(ep_url)

3027

if video_id == ep_video_id:

3028

continue

3029

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)

3030

3031

def _post_thread_continuation_entries(self, post_thread_continuation):

3032

contents = post_thread_continuation.get('contents')

3033

if not isinstance(contents, list):

3034

return

3035

for content in contents:

3036

renderer = content.get('backstagePostThreadRenderer')

3037

if not isinstance(renderer, dict):

3038

continue

3039

for entry in self._post_thread_entries(renderer):

yield entry

@staticmethod

def _build_continuation_query(continuation, ctp=None):

3044

query = {

3045

'ctoken': continuation,

3046

'continuation': continuation,

}

if ctp:

query['itct'] = ctp

return query

@staticmethod

def _extract_next_continuation_data(renderer):

3054

next_continuation = try_get(

3055

renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)

3056

if not next_continuation:

3057

return

3058

continuation = next_continuation.get('continuation')

3059

if not continuation:

3060

return

3061

ctp = next_continuation.get('clickTrackingParams')

3062

return YoutubeTabIE._build_continuation_query(continuation, ctp)

3063

3064

@classmethod

3065

def _extract_continuation(cls, renderer):

3066

next_continuation = cls._extract_next_continuation_data(renderer)

3067

if next_continuation:

3068

return next_continuation

3069

contents = renderer.get('contents')

3070

if not isinstance(contents, list):

3071

return

3072

for content in contents:

3073

if not isinstance(content, dict):

3074

continue

3075

continuation_ep = try_get(

3076

content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],

3077

dict)

3078

if not continuation_ep:

3079

continue

3080

continuation = try_get(

3081

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

3082

if not continuation:

3083

continue

3084

ctp = continuation_ep.get('clickTrackingParams')

3085

return YoutubeTabIE._build_continuation_query(continuation, ctp)

3086

3087

def _entries(self, tab, identity_token):

3088

3089

def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds

3090

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3091

for content in contents:

3092

if not isinstance(content, dict):

3093

continue

3094

is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)

3095

if not is_renderer:

3096

renderer = content.get('richItemRenderer')

3097

if renderer:

3098

for entry in self._rich_entries(renderer):

3099

yield entry

3100

continuation_list[0] = self._extract_continuation(parent_renderer)

3101

continue

3102

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3103

for isr_content in isr_contents:

3104

if not isinstance(isr_content, dict):

3105

continue

3106

renderer = isr_content.get('playlistVideoListRenderer')

3107

if renderer:

3108

for entry in self._playlist_entries(renderer):

3109

yield entry

3110

continuation_list[0] = self._extract_continuation(renderer)

3111

continue

3112

renderer = isr_content.get('gridRenderer')

3113

if renderer:

3114

for entry in self._grid_entries(renderer):

3115

yield entry

3116

continuation_list[0] = self._extract_continuation(renderer)

3117

continue

3118

renderer = isr_content.get('shelfRenderer')

3119

if renderer:

3120

is_channels_tab = tab.get('title') == 'Channels'

3121

for entry in self._shelf_entries(renderer, not is_channels_tab):

3122

yield entry

3123

continue

3124

renderer = isr_content.get('backstagePostThreadRenderer')

3125

if renderer:

3126

for entry in self._post_thread_entries(renderer):

3127

yield entry

3128

continuation_list[0] = self._extract_continuation(renderer)

3129

continue

3130

renderer = isr_content.get('videoRenderer')

3131

if renderer:

3132

entry = self._video_entry(renderer)

if entry:

yield entry

if not continuation_list[0]:

3137

continuation_list[0] = self._extract_continuation(is_renderer)

3138

3139

if not continuation_list[0]:

3140

continuation_list[0] = self._extract_continuation(parent_renderer)

3141

3142

continuation_list = [None] # Python 2 doesnot support nonlocal

3143

tab_content = try_get(tab, lambda x: x['content'], dict)

if not tab_content:

return

parent_renderer = (

try_get(tab_content, lambda x: x['sectionListRenderer'], dict)

3148

or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})

3149

for entry in extract_entries(parent_renderer):

3150

yield entry

3151

continuation = continuation_list[0]

3152

3153

headers = {

3154

'x-youtube-client-name': '1',

3155

'x-youtube-client-version': '2.20201112.04.01',

3156

}

3157

if identity_token:

3158

headers['x-youtube-identity-token'] = identity_token

3159

3160

for page_num in itertools.count(1):

if not continuation:

break

count = 0

retries = 3

while count <= retries:

3166

try:

3167

# Downloading page may result in intermittent 5xx HTTP error

3168

# that is usually worked around with a retry

3169

browse = self._download_json(

3170

'https://www.youtube.com/browse_ajax', None,

3171

'Downloading page %d%s'

3172

% (page_num, ' (retry #%d)' % count if count else ''),

3173

headers=headers, query=continuation)

3174

break

3175

except ExtractorError as e:

3176

if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):

count += 1

if count <= retries:

continue

raise

if not browse:

break

response = try_get(browse, lambda x: x[1]['response'], dict)

if not response:

break

continuation_contents = try_get(

3188

response, lambda x: x['continuationContents'], dict)

3189

if continuation_contents:

3190

continuation_renderer = continuation_contents.get('playlistVideoListContinuation')

3191

if continuation_renderer:

3192

for entry in self._playlist_entries(continuation_renderer):

3193

yield entry

3194

continuation = self._extract_continuation(continuation_renderer)

3195

continue

3196

continuation_renderer = continuation_contents.get('gridContinuation')

3197

if continuation_renderer:

3198

for entry in self._grid_entries(continuation_renderer):

3199

yield entry

3200

continuation = self._extract_continuation(continuation_renderer)

3201

continue

3202

continuation_renderer = continuation_contents.get('itemSectionContinuation')

3203

if continuation_renderer:

3204

for entry in self._post_thread_continuation_entries(continuation_renderer):

3205

yield entry

3206

continuation = self._extract_continuation(continuation_renderer)

3207

continue

3208

continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds

3209

if continuation_renderer:

3210

continuation_list = [None]

3211

for entry in extract_entries(continuation_renderer):

3212

yield entry

3213

continuation = continuation_list[0]

3214

continue

3215

3216

continuation_items = try_get(

3217

response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)

3218

if continuation_items:

3219

continuation_item = continuation_items[0]

3220

if not isinstance(continuation_item, dict):

3221

continue

3222

renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')

3223

if renderer:

3224

video_list_renderer = {'contents': continuation_items}

3225

for entry in self._playlist_entries(video_list_renderer):

3226

yield entry

3227

continuation = self._extract_continuation(video_list_renderer)

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3233

for tab in tabs:

3234

if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):

3235

return tab['tabRenderer']

3236

else:

3237

raise ExtractorError('Unable to find selected tab')

3238

3239

@staticmethod

3240

def _extract_uploader(data):

3241

uploader = {}

3242

sidebar_renderer = try_get(

3243

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)

3244

if sidebar_renderer:

3245

for item in sidebar_renderer:

3246

if not isinstance(item, dict):

3247

continue

3248

renderer = item.get('playlistSidebarSecondaryInfoRenderer')

3249

if not isinstance(renderer, dict):

3250

continue

3251

owner = try_get(

3252

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3253

if owner:

3254

uploader['uploader'] = owner.get('text')

3255

uploader['uploader_id'] = try_get(

3256

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3257

uploader['uploader_url'] = urljoin(

3258

'https://www.youtube.com/',

3259

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3260

return uploader

3261

3262

def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):

3263

selected_tab = self._extract_selected_tab(tabs)

3264

renderer = try_get(

3265

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3266

playlist_id = title = description = None

3267

if renderer:

3268

channel_title = renderer.get('title') or item_id

3269

tab_title = selected_tab.get('title')

3270

title = channel_title or item_id

3271

if tab_title:

3272

title += ' - %s' % tab_title

3273

description = renderer.get('description')

3274

playlist_id = renderer.get('externalId')

3275

renderer = try_get(

3276

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3277

if renderer:

3278

title = renderer.get('title')

3279

description = None

3280

playlist_id = item_id

3281

if playlist_id is None:

3282

playlist_id = item_id

3283

if title is None:

3284

title = "Youtube " + playlist_id.title()

3285

playlist = self.playlist_result(

3286

self._entries(selected_tab, identity_token),

3287

playlist_id=playlist_id, playlist_title=title,

3288

playlist_description=description)

3289

playlist.update(self._extract_uploader(data))

3290

return playlist

3291

3292

def _extract_from_playlist(self, item_id, url, data, playlist):

3293

title = playlist.get('title') or try_get(

3294

data, lambda x: x['titleText']['simpleText'], compat_str)

3295

playlist_id = playlist.get('playlistId') or item_id

3296

# Inline playlist rendition continuation does not always work

3297

# at Youtube side, so delegating regular tab-based playlist URL

3298

# processing whenever possible.

3299

playlist_url = urljoin(url, try_get(

3300

playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

3301

compat_str))

3302

if playlist_url and playlist_url != url:

3303

return self.url_result(

3304

playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

3305

video_title=title)

3306

return self.playlist_result(

3307

self._playlist_entries(playlist), playlist_id=playlist_id,

3308

playlist_title=title)

3309

3310

@staticmethod

3311

def _extract_alerts(data):

3312

for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:

3313

if not isinstance(alert_dict, dict):

3314

continue

3315

for renderer in alert_dict:

3316

alert = alert_dict[renderer]

3317

alert_type = alert.get('type')

3318

if not alert_type:

3319

continue

3320

message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)

3321

if message:

3322

yield alert_type, message

3323

for run in try_get(alert, lambda x: x['text']['runs'], list) or []:

3324

message = try_get(run, lambda x: x['text'], compat_str)

3325

if message:

3326

yield alert_type, message

3327

3328

def _extract_identity_token(self, webpage, item_id):

3329

ytcfg = self._extract_ytcfg(item_id, webpage)

3330

if ytcfg:

3331

token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)

3332

if token:

3333

return token

3334

return self._search_regex(

3335

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

3336

'identity token', default=None)

3337

3338

def _real_extract(self, url):

3339

item_id = self._match_id(url)

3340

url = compat_urlparse.urlunparse(

3341

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

3342

is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)

3343

if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':

3344

self._downloader.report_warning(

3345

'A channel/user page was given. All the channel\'s videos will be downloaded. '

3346

'To download only the videos in the home page, add a "/featured" to the URL')

3347

url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')

3348

3349

# Handle both video/playlist URLs

3350

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3351

video_id = qs.get('v', [None])[0]

3352

playlist_id = qs.get('list', [None])[0]

3353

3354

if is_home is not None and is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:

3355

if playlist_id:

3356

self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))

3357

url = 'https://www.youtube.com/playlist?list=%s' % playlist_id

3358

# return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())

3359

else:

3360

raise ExtractorError('Unable to recognize tab page')

3361

if video_id and playlist_id:

3362

if self._downloader.params.get('noplaylist'):

3363

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

3364

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3365

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

3366

3367

webpage = self._download_webpage(url, item_id)

3368

identity_token = self._extract_identity_token(webpage, item_id)

3369

data = self._extract_yt_initial_data(item_id, webpage)

3370

for alert_type, alert_message in self._extract_alerts(data):

3371

self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))

3372

tabs = try_get(

3373

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

3374

if tabs:

3375

return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)

3376

playlist = try_get(

3377

data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

3378

if playlist:

3379

return self._extract_from_playlist(item_id, url, data, playlist)

3380

# Fallback to video extraction if no playlist alike page is recognized.

3381

# First check for the current video then try the v attribute of URL query.

3382

video_id = try_get(

3383

data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],

3384

compat_str) or video_id

3385

if video_id:

3386

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3387

# Failed to recognize

3388

raise ExtractorError('Unable to recognize tab page')

3389

3390

3391

class YoutubePlaylistIE(InfoExtractor):

3392

IE_DESC = 'YouTube.com playlists'

3393

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

3404

)''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

3405

IE_NAME = 'youtube:playlist'

3406

_TESTS = [{

3407

'note': 'issue #673',

3408

'url': 'PLBB231211A4F62143',

3409

'info_dict': {

3410

'title': '[OLD]Team Fortress 2 (Class-based LP)',

3411

'id': 'PLBB231211A4F62143',

3412

'uploader': 'Wickydoo',

3413

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

3414

},

3415

'playlist_mincount': 29,

3416

}, {

3417

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3418

'info_dict': {

3419

'title': 'YDL_safe_search',

3420

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3421

},

3422

'playlist_count': 2,

3423

'skip': 'This playlist is private',

3424

}, {

3425

'note': 'embedded',

3426

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

3431

'uploader': 'milan',

3432

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

3433

}

3434

}, {

3435

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3436

'playlist_mincount': 982,

3437

'info_dict': {

3438

'title': '2018 Chinese New Singles (11/6 updated)',

3439

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3440

'uploader': 'LBK',

3441

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

3442

}

3443

}, {

3444

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

3445

'only_matching': True,

3446

}, {

3447

# music album playlist

3448

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

3449

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3454

return False if YoutubeTabIE.suitable(url) else super(

3455

YoutubePlaylistIE, cls).suitable(url)

3456

3457

def _real_extract(self, url):

3458

playlist_id = self._match_id(url)

3459

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3460

if not qs:

3461

qs = {'list': playlist_id}

3462

return self.url_result(

3463

update_url_query('https://www.youtube.com/playlist', qs),

3464

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3465

3466

3467

class YoutubeYtBeIE(InfoExtractor):

3468

IE_DESC = 'youtu.be'

3469

_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

3470

_TESTS = [{

3471

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

3476

'uploader': 'Backus-Page House Museum',

3477

'uploader_id': 'backuspagemuseum',

3478

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

3479

'upload_date': '20161008',

3480

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

3481

'categories': ['Nonprofits & Activism'],

3482

'tags': list,

3483

'like_count': int,

3484

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

3489

},

3490

}, {

3491

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

3492

'only_matching': True,

3493

}]

3494

3495

def _real_extract(self, url):

3496

mobj = re.match(self._VALID_URL, url)

3497

video_id = mobj.group('id')

3498

playlist_id = mobj.group('playlist_id')

3499

return self.url_result(

3500

update_url_query('https://www.youtube.com/watch', {

3501

'v': video_id,

3502

'list': playlist_id,

3503

'feature': 'youtu.be',

3504

}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3505

3506

3507

class YoutubeYtUserIE(InfoExtractor):

3508

IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'

3509

_VALID_URL = r'ytuser:(?P<id>.+)'

3510

_TESTS = [{

3511

'url': 'ytuser:phihag',

3512

'only_matching': True,

3513

}]

3514

3515

def _real_extract(self, url):

3516

user_id = self._match_id(url)

3517

return self.url_result(

3518

'https://www.youtube.com/user/%s' % user_id,

3519

ie=YoutubeTabIE.ie_key(), video_id=user_id)

3520

3521

3522

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

3523

IE_NAME = 'youtube:favorites'

3524

IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'

3525

_VALID_URL = r':ytfav(?:ou?rite)?s?'

3526

_LOGIN_REQUIRED = True

3527

_TESTS = [{

3528

'url': ':ytfav',

3529

'only_matching': True,

3530

}, {

3531

'url': ':ytfavorites',

3532

'only_matching': True,

3533

}]

3534

3535

def _real_extract(self, url):

3536

return self.url_result(

3537

'https://www.youtube.com/playlist?list=LL',

3538

ie=YoutubeTabIE.ie_key())

3539

3540

3541

class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):

3542

IE_DESC = 'YouTube.com searches'

3543

# there doesn't appear to be a real limit, for example if you search for

3544

# 'python' you get more than 8.000.000 results

3545

_MAX_RESULTS = float('inf')

3546

IE_NAME = 'youtube:search'

3547

_SEARCH_KEY = 'ytsearch'

3548

_SEARCH_PARAMS = None

3549

_TESTS = []

3550

3551

def _entries(self, query, n):

data = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

'query': query,

}

if self._SEARCH_PARAMS:

3562

data['params'] = self._SEARCH_PARAMS

3563

total = 0

3564

for page_num in itertools.count(1):

3565

search = self._download_json(

3566

'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

3567

video_id='query "%s"' % query,

3568

note='Downloading page %s' % page_num,

3569

errnote='Unable to download API page', fatal=False,

3570

data=json.dumps(data).encode('utf8'),

3571

headers={'content-type': 'application/json'})

3572

if not search:

3573

break

3574

slr_contents = try_get(

3575

search,

3576

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

3577

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

list)

if not slr_contents:

break

isr_contents = []

continuation_token = None

3584

# Youtube sometimes adds promoted content to searches,

3585

# changing the index location of videos and token.

3586

# So we search through all entries till we find them.

3587

for index, isr in enumerate(slr_contents):

3588

if not isr_contents:

3589

isr_contents = try_get(

3590

slr_contents,

3591

(lambda x: x[index]['itemSectionRenderer']['contents']),

3592

list)

3593

for content in isr_contents:

3594

if content.get('videoRenderer') is not None:

break

else:

isr_contents = []

if continuation_token is None:

3600

continuation_token = try_get(

3601

slr_contents,

3602

lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][

3603

'token'],

3604

compat_str)

3605

if continuation_token is not None and isr_contents:

break

if not isr_contents:

break

for content in isr_contents:

3611

if not isinstance(content, dict):

3612

continue

3613

video = content.get('videoRenderer')

3614

if not isinstance(video, dict):

3615

continue

3616

video_id = video.get('videoId')

3617

if not video_id:

3618

continue

3619

title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)

3620

description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)

3621

duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))

3622

view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

3623

view_count = int_or_none(self._search_regex(

3624

r'^(\d+)', re.sub(r'\s', '', view_count_text),

3625

'view count', default=None))

3626

uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

3627

total += 1

3628

yield {

3629

'_type': 'url_transparent',

3630

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

3635

'duration': duration,

3636

'view_count': view_count,

3637

'uploader': uploader,

}

if total == n:

return

if not continuation_token:

3642

break

3643

data['continuation'] = continuation_token

3644

3645

def _get_n_results(self, query, n):

3646

"""Get a specified number of results for a query"""

3647

return self.playlist_result(self._entries(query, n), query)

3648

3649

3650

class YoutubeSearchDateIE(YoutubeSearchIE):

3651

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

3652

_SEARCH_KEY = 'ytsearchdate'

3653

IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'

3654

_SEARCH_PARAMS = 'CAI%3D'

3655

3656

3657

class YoutubeSearchURLIE(YoutubeSearchIE):

3658

IE_DESC = 'YouTube.com searches, "ytsearch" keyword'

3659

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

3660

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'

3661

# _MAX_RESULTS = 100

3662

_TESTS = [{

3663

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

3664

'playlist_mincount': 5,

3665

'info_dict': {

3666

'title': 'youtube-dl test video',

3667

}

3668

}, {

3669

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

3670

'only_matching': True,

}]

@classmethod

def _make_valid_url(cls):

3675

return cls._VALID_URL

3676

3677

def _real_extract(self, url):

3678

qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

3679

query = (qs.get('search_query') or qs.get('q'))[0]

3680

self._SEARCH_PARAMS = qs.get('sp', ('',))[0]

3681

return self._get_n_results(query, self._MAX_RESULTS)

3682

3683

3684

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

3685

"""

3686

Base class for feed extractors

3687

Subclasses must define the _FEED_NAME property.

3688

"""

3689

_LOGIN_REQUIRED = True

# _MAX_PAGES = 5

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

3696

3697

def _real_initialize(self):

3698

self._login()

3699

3700

def _real_extract(self, url):

3701

return self.url_result(

3702

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

3703

ie=YoutubeTabIE.ie_key())

3704

3705

3706

class YoutubeWatchLaterIE(InfoExtractor):

3707

IE_NAME = 'youtube:watchlater'

3708

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

3709

_VALID_URL = r':ytwatchlater'

3710

_TESTS = [{

3711

'url': ':ytwatchlater',

3712

'only_matching': True,

3713

}]

3714

3715

def _real_extract(self, url):

3716

return self.url_result(

3717

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

3718

3719

3720

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

3721

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

3722

_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'

3723

_FEED_NAME = 'recommended'

3724

_TESTS = [{

3725

'url': ':ytrec',

3726

'only_matching': True,

3727

}, {

3728

'url': ':ytrecommended',

3729

'only_matching': True,

3730

}, {

3731

'url': 'https://youtube.com',

3732

'only_matching': True,

}]

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

3737

IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'

3738

_VALID_URL = r':ytsub(?:scription)?s?'

3739

_FEED_NAME = 'subscriptions'

3740

_TESTS = [{

3741

'url': ':ytsubs',

3742

'only_matching': True,

3743

}, {

3744

'url': ':ytsubscriptions',

3745

'only_matching': True,

}]

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

3750

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

3751

_VALID_URL = r':ythistory'

3752

_FEED_NAME = 'history'

3753

_TESTS = [{

3754

'url': ':ythistory',

3755

'only_matching': True,

}]

class YoutubeTruncatedURLIE(InfoExtractor):

3760

IE_NAME = 'youtube:truncated_url'

3761

IE_DESC = False # Do not list

3762

_VALID_URL = r'''(?x)

3763

(?:https?://)?

3764

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

3765

(?:watch\?(?:

3766

feature=[a-z_]+|

3767

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

3780

'only_matching': True,

3781

}, {

3782

'url': 'https://www.youtube.com/watch?',

3783

'only_matching': True,

3784

}, {

3785

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

3786

'only_matching': True,

3787

}, {

3788

'url': 'https://www.youtube.com/watch?feature=foo',

3789

'only_matching': True,

3790

}, {

3791

'url': 'https://www.youtube.com/watch?hl=en-GB',

3792

'only_matching': True,

3793

}, {

3794

'url': 'https://www.youtube.com/watch?t=2372',

3795

'only_matching': True,

3796

}]

3797

3798

def _real_extract(self, url):

3799

raise ExtractorError(

3800

'Did you forget to quote the URL? Remember that & is a meta '

3801

'character in most shells, so you want to put the URL in quotes, '

3802

'like youtube-dl '

3803

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

3804

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

3809

IE_NAME = 'youtube:truncated_id'

3810

IE_DESC = False # Do not list

3811

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

3812

3813

_TESTS = [{

3814

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

3815

'only_matching': True,

3816

}]

3817

3818

def _real_extract(self, url):

3819

video_id = self._match_id(url)

3820

raise ExtractorError(

3821

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

expected=True)

# Do Youtube show urls even exist anymore? I couldn't find any

3826

r'''

3827

class YoutubeShowIE(YoutubeTabIE):

3828

IE_DESC = 'YouTube.com (multi-season) shows'

3829

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

3830

IE_NAME = 'youtube:show'

3831

_TESTS = [{

3832

'url': 'https://www.youtube.com/show/airdisasters',

3833

'playlist_mincount': 5,

3834

'info_dict': {

3835

'id': 'airdisasters',

3836

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

3841

playlist_id = self._match_id(url)

3842

return super(YoutubeShowIE, self)._real_extract(

3843

'https://www.youtube.com/show/%s/playlists' % playlist_id)

3844

'''