jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_kwargs,
	20	compat_parse_qs,
	21	compat_urllib_parse_unquote,
	22	compat_urllib_parse_unquote_plus,
	23	compat_urllib_parse_urlencode,
	24	compat_urllib_parse_urlparse,
	25	compat_urlparse,
	26	compat_str,
	27	)
	28	from ..utils import (
	29	bool_or_none,
	30	clean_html,
	31	error_to_compat_str,
	32	ExtractorError,
	33	float_or_none,
	34	get_element_by_id,
	35	int_or_none,
	36	mimetype2ext,
	37	parse_codecs,
	38	parse_count,
	39	parse_duration,
	40	remove_quotes,
	41	remove_start,
	42	smuggle_url,
	43	str_or_none,
	44	str_to_int,
	45	try_get,
	46	unescapeHTML,
	47	unified_strdate,
	48	unsmuggle_url,
	49	update_url_query,
	50	uppercase_escape,
	51	url_or_none,
	52	urlencode_postdata,
	53	urljoin,
	54	)
	55
	56
	57	class YoutubeBaseInfoExtractor(InfoExtractor):
	58	"""Provide base functions for Youtube extractors"""
	59	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	60	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	61
	62	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	63	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	64	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	65
	66	_RESERVED_NAMES = (
	67	r'course\|embed\|channel\|c\|user\|playlist\|watch\|w\|results\|storefront\|'
	68	r'shared\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout\|'
	69	r'feed/(watch_later\|history\|subscriptions\|library\|trending\|recommended)')
	70
	71	_NETRC_MACHINE = 'youtube'
	72	# If True it will raise an error if no login info is provided
	73	_LOGIN_REQUIRED = False
	74
	75	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|WL\|LL\|LM)'
	76
	77	_YOUTUBE_CLIENT_HEADERS = {
	78	'x-youtube-client-name': '1',
	79	'x-youtube-client-version': '1.20200609.04.02',
	80	}
	81
	82	def _set_language(self):
	83	self._set_cookie(
	84	'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
	85	# YouTube sets the expire time to about two months
	86	expire_time=time.time() + 2 * 30 * 24 * 3600)
	87
	88	def _ids_to_results(self, ids):
	89	return [
	90	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	91	for vid_id in ids]
	92
	93	def _login(self):
	94	"""
	95	Attempt to log in to YouTube.
	96	True is returned if successful or skipped.
	97	False is returned if login failed.
	98
	99	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	100	"""
	101	username, password = self._get_login_info()
	102	# No authentication to be performed
	103	if username is None:
	104	if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
	105	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	106	if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
	107	self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
	108	return True
	109
	110	login_page = self._download_webpage(
	111	self._LOGIN_URL, None,
	112	note='Downloading login page',
	113	errnote='unable to fetch login page', fatal=False)
	114	if login_page is False:
	115	return
	116
	117	login_form = self._hidden_inputs(login_page)
	118
	119	def req(url, f_req, note, errnote):
	120	data = login_form.copy()
	121	data.update({
	122	'pstMsg': 1,
	123	'checkConnection': 'youtube',
	124	'checkedDomains': 'youtube',
	125	'hl': 'en',
	126	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	127	'f.req': json.dumps(f_req),
	128	'flowName': 'GlifWebSignIn',
	129	'flowEntry': 'ServiceLogin',
	130	# TODO: reverse actual botguard identifier generation algo
	131	'bgRequest': '["identifier",""]',
	132	})
	133	return self._download_json(
	134	url, None, note=note, errnote=errnote,
	135	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	136	fatal=False,
	137	data=urlencode_postdata(data), headers={
	138	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	139	'Google-Accounts-XSRF': 1,
	140	})
	141
	142	def warn(message):
	143	self._downloader.report_warning(message)
	144
	145	lookup_req = [
	146	username,
	147	None, [], None, 'US', None, None, 2, False, True,
	148	[
	149	None, None,
	150	[2, 1, None, 1,
	151	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	152	None, [], 4],
	153	1, [None, None, []], None, None, None, True
	154	],
	155	username,
	156	]
	157
	158	lookup_results = req(
	159	self._LOOKUP_URL, lookup_req,
	160	'Looking up account info', 'Unable to look up account info')
	161
	162	if lookup_results is False:
	163	return False
	164
	165	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	166	if not user_hash:
	167	warn('Unable to extract user hash')
	168	return False
	169
	170	challenge_req = [
	171	user_hash,
	172	None, 1, None, [1, None, None, None, [password, None, True]],
	173	[
	174	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	175	1, [None, None, []], None, None, None, True
	176	]]
	177
	178	challenge_results = req(
	179	self._CHALLENGE_URL, challenge_req,
	180	'Logging in', 'Unable to log in')
	181
	182	if challenge_results is False:
	183	return
	184
	185	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	186	if login_res:
	187	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	188	warn(
	189	'Unable to login: %s' % 'Invalid password'
	190	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	191	return False
	192
	193	res = try_get(challenge_results, lambda x: x[0][-1], list)
	194	if not res:
	195	warn('Unable to extract result entry')
	196	return False
	197
	198	login_challenge = try_get(res, lambda x: x[0][0], list)
	199	if login_challenge:
	200	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	201	if challenge_str == 'TWO_STEP_VERIFICATION':
	202	# SEND_SUCCESS - TFA code has been successfully sent to phone
	203	# QUOTA_EXCEEDED - reached the limit of TFA codes
	204	status = try_get(login_challenge, lambda x: x[5], compat_str)
	205	if status == 'QUOTA_EXCEEDED':
	206	warn('Exceeded the limit of TFA codes, try later')
	207	return False
	208
	209	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	210	if not tl:
	211	warn('Unable to extract TL')
	212	return False
	213
	214	tfa_code = self._get_tfa_info('2-step verification code')
	215
	216	if not tfa_code:
	217	warn(
	218	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	219	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	220	return False
	221
	222	tfa_code = remove_start(tfa_code, 'G-')
	223
	224	tfa_req = [
	225	user_hash, None, 2, None,
	226	[
	227	9, None, None, None, None, None, None, None,
	228	[None, tfa_code, True, 2]
	229	]]
	230
	231	tfa_results = req(
	232	self._TFA_URL.format(tl), tfa_req,
	233	'Submitting TFA code', 'Unable to submit TFA code')
	234
	235	if tfa_results is False:
	236	return False
	237
	238	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	239	if tfa_res:
	240	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	241	warn(
	242	'Unable to finish TFA: %s' % 'Invalid TFA code'
	243	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	244	return False
	245
	246	check_cookie_url = try_get(
	247	tfa_results, lambda x: x[0][-1][2], compat_str)
	248	else:
	249	CHALLENGES = {
	250	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	251	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	252	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	253	}
	254	challenge = CHALLENGES.get(
	255	challenge_str,
	256	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	257	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	258	return False
	259	else:
	260	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	261
	262	if not check_cookie_url:
	263	warn('Unable to extract CheckCookie URL')
	264	return False
	265
	266	check_cookie_results = self._download_webpage(
	267	check_cookie_url, None, 'Checking cookie', fatal=False)
	268
	269	if check_cookie_results is False:
	270	return False
	271
	272	if 'https://myaccount.google.com/' not in check_cookie_results:
	273	warn('Unable to log in')
	274	return False
	275
	276	return True
	277
	278	def _download_webpage_handle(self, args, *kwargs):
	279	query = kwargs.get('query', {}).copy()
	280	kwargs['query'] = query
	281	return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
	282	args, *compat_kwargs(kwargs))
	283
	284	def _get_yt_initial_data(self, video_id, webpage):
	285	config = self._search_regex(
	286	(r'window\["ytInitialData"\]\s=\s(.*?)(?<=});',
	287	r'var\s+ytInitialData\s=\s(.*?)(?<=});'),
	288	webpage, 'ytInitialData', default=None)
	289	if config:
	290	return self._parse_json(
	291	uppercase_escape(config), video_id, fatal=False)
	292
	293	def _real_initialize(self):
	294	if self._downloader is None:
	295	return
	296	self._set_language()
	297	if not self._login():
	298	return
	299
	300	_DEFAULT_API_DATA = {
	301	'context': {
	302	'client': {
	303	'clientName': 'WEB',
	304	'clientVersion': '2.20201021.03.00',
	305	}
	306	},
	307	}
	308
	309	_YT_INITIAL_DATA_RE = r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;'
	310
	311	def _call_api(self, ep, query, video_id):
	312	data = self._DEFAULT_API_DATA.copy()
	313	data.update(query)
	314
	315	response = self._download_json(
	316	'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
	317	note='Downloading API JSON', errnote='Unable to download API page',
	318	data=json.dumps(data).encode('utf8'),
	319	headers={'content-type': 'application/json'},
	320	query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
	321
	322	return response
	323
	324	def _extract_yt_initial_data(self, video_id, webpage):
	325	return self._parse_json(
	326	self._search_regex(
	327	(r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
	328	self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
	329	video_id)
	330
	331
	332	class YoutubeIE(YoutubeBaseInfoExtractor):
	333	IE_DESC = 'YouTube.com'
	334	_VALID_URL = r"""(?x)^
	335	(
	336	(?:https?://\|//) # http(s):// or protocol-independent URL
	337	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie\|kids)?\.com/\|
	338	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	339	(?:www\.)?pwnyoutube\.com/\|
	340	(?:www\.)?hooktube\.com/\|
	341	(?:www\.)?yourepeat\.com/\|
	342	tube\.majestyc\.net/\|
	343	# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
	344	(?:(?:www\|dev)\.)?invidio\.us/\|
	345	(?:(?:www\|no)\.)?invidiou\.sh/\|
	346	(?:(?:www\|fi\|de)\.)?invidious\.snopyta\.org/\|
	347	(?:www\.)?invidious\.kabi\.tk/\|
	348	(?:www\.)?invidious\.13ad\.de/\|
	349	(?:www\.)?invidious\.mastodon\.host/\|
	350	(?:www\.)?invidious\.nixnet\.xyz/\|
	351	(?:www\.)?invidious\.drycat\.fr/\|
	352	(?:www\.)?tube\.poal\.co/\|
	353	(?:www\.)?vid\.wxzm\.sx/\|
	354	(?:www\.)?yewtu\.be/\|
	355	(?:www\.)?yt\.elukerio\.org/\|
	356	(?:www\.)?yt\.lelux\.fi/\|
	357	(?:www\.)?invidious\.ggc-project\.de/\|
	358	(?:www\.)?yt\.maisputain\.ovh/\|
	359	(?:www\.)?invidious\.13ad\.de/\|
	360	(?:www\.)?invidious\.toot\.koeln/\|
	361	(?:www\.)?invidious\.fdn\.fr/\|
	362	(?:www\.)?watch\.nettohikari\.com/\|
	363	(?:www\.)?kgg2m7yk5aybusll\.onion/\|
	364	(?:www\.)?qklhadlycap4cnod\.onion/\|
	365	(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/\|
	366	(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/\|
	367	(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/\|
	368	(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/\|
	369	(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/\|
	370	(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/\|
	371	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	372	(?:.*?\#/)? # handle anchor (#/) redirect urls
	373	(?: # the various things that can precede the ID:
	374	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	375	\|(?: # or the v= param in all its forms
	376	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	377	(?:\?\|\#!?) # the params delimiter ? or # or #!
	378	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	379	v=
	380	)
	381	))
	382	\|(?:
	383	youtu\.be\| # just youtu.be/xxxx
	384	vid\.plus\| # or vid.plus/xxxx
	385	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	386	)/
	387	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	388	)
	389	)? # all until now is optional -> you can pass the naked ID
	390	(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	391	(?!.*?\blist=
	392	(?:
	393	%(playlist_id)s\| # combined list/video URLs are handled by the playlist IE
	394	WL # WL are handled by the watch later IE
	395	)
	396	)
	397	(?(1).+)? # if we found the ID, everything can follow
	398	$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
	399	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	400	_PLAYER_INFO_RE = (
	401	r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
	402	r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
	403	)
	404	_formats = {
	405	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	406	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	407	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	408	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	409	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	410	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	411	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	412	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	413	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	414	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	415	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	416	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	417	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	418	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	419	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	420	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	421	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	422	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	423
	424
	425	# 3D videos
	426	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	427	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	428	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	429	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	430	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	431	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	432	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	433
	434	# Apple HTTP Live Streaming
	435	'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	436	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	437	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	438	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	439	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	440	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	441	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	442	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	443
	444	# DASH mp4 video
	445	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
	446	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
	447	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	448	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
	449	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
	450	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
	451	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
	452	'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	453	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
	454	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	455	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	456	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
	457
	458	# Dash mp4 audio
	459	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
	460	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
	461	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
	462	'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	463	'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	464	'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
	465	'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
	466
	467	# Dash webm
	468	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	469	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	470	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	471	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	472	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	473	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	474	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
	475	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	476	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	477	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	478	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	479	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	480	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	481	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	482	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	483	# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
	484	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	485	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	486	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	487	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	488	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	489	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	490
	491	# Dash webm audio
	492	'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
	493	'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
	494
	495	# Dash webm audio with opus inside
	496	'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
	497	'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
	498	'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
	499
	500	# RTMP (unnamed)

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

compat_chr,

compat_kwargs,

compat_parse_qs,

compat_urllib_parse_unquote,

22

compat_urllib_parse_unquote_plus,

23

compat_urllib_parse_urlencode,

24

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

bool_or_none,

clean_html,

error_to_compat_str,

ExtractorError,

float_or_none,

get_element_by_id,

int_or_none,

mimetype2ext,

parse_codecs,

parse_count,

parse_duration,

remove_quotes,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

update_url_query,

uppercase_escape,

url_or_none,

urlencode_postdata,

urljoin,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

58

"""Provide base functions for Youtube extractors"""

59

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

60

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

61

62

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

63

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

64

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

_RESERVED_NAMES = (

_NETRC_MACHINE = 'youtube'

72

# If True it will raise an error if no login info is provided

73

_LOGIN_REQUIRED = False

74

75

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'

76

77

_YOUTUBE_CLIENT_HEADERS = {

78

'x-youtube-client-name': '1',

79

'x-youtube-client-version': '1.20200609.04.02',

80

}

81

82

def _set_language(self):

83

self._set_cookie(

84

'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',

85

# YouTube sets the expire time to about two months

86

expire_time=time.time() + 2 * 30 * 24 * 3600)

87

88

def _ids_to_results(self, ids):

89

return [

90

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

96

True is returned if successful or skipped.

97

False is returned if login failed.

98

99

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

100

"""

101

username, password = self._get_login_info()

102

# No authentication to be performed

103

if username is None:

104

if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

105

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

106

if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.

107

self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')

108

return True

109

110

login_page = self._download_webpage(

111

self._LOGIN_URL, None,

112

note='Downloading login page',

113

errnote='unable to fetch login page', fatal=False)

114

if login_page is False:

115

return

116

117

login_form = self._hidden_inputs(login_page)

118

119

def req(url, f_req, note, errnote):

120

data = login_form.copy()

121

data.update({

122

'pstMsg': 1,

123

'checkConnection': 'youtube',

124

'checkedDomains': 'youtube',

125

'hl': 'en',

126

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

127

'f.req': json.dumps(f_req),

128

'flowName': 'GlifWebSignIn',

129

'flowEntry': 'ServiceLogin',

130

# TODO: reverse actual botguard identifier generation algo

131

'bgRequest': '["identifier",""]',

132

})

133

return self._download_json(

134

url, None, note=note, errnote=errnote,

135

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

136

fatal=False,

137

data=urlencode_postdata(data), headers={

138

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

139

'Google-Accounts-XSRF': 1,

})

def warn(message):

self._downloader.report_warning(message)

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

152

None, [], 4],

153

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

159

self._LOOKUP_URL, lookup_req,

160

'Looking up account info', 'Unable to look up account info')

161

162

if lookup_results is False:

163

return False

164

165

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

166

if not user_hash:

167

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

173

[

174

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

175

1, [None, None, []], None, None, None, True

176

]]

177

178

challenge_results = req(

179

self._CHALLENGE_URL, challenge_req,

180

'Logging in', 'Unable to log in')

181

182

if challenge_results is False:

183

return

184

185

login_res = try_get(challenge_results, lambda x: x[0][5], list)

186

if login_res:

187

login_msg = try_get(login_res, lambda x: x[5], compat_str)

188

warn(

189

'Unable to login: %s' % 'Invalid password'

190

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

191

return False

192

193

res = try_get(challenge_results, lambda x: x[0][-1], list)

194

if not res:

195

warn('Unable to extract result entry')

196

return False

197

198

login_challenge = try_get(res, lambda x: x[0][0], list)

199

if login_challenge:

200

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

201

if challenge_str == 'TWO_STEP_VERIFICATION':

202

# SEND_SUCCESS - TFA code has been successfully sent to phone

203

# QUOTA_EXCEEDED - reached the limit of TFA codes

204

status = try_get(login_challenge, lambda x: x[5], compat_str)

205

if status == 'QUOTA_EXCEEDED':

206

warn('Exceeded the limit of TFA codes, try later')

207

return False

208

209

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

210

if not tl:

211

warn('Unable to extract TL')

212

return False

213

214

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

219

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

220

return False

221

222

tfa_code = remove_start(tfa_code, 'G-')

223

224

tfa_req = [

225

user_hash, None, 2, None,

226

[

227

9, None, None, None, None, None, None, None,

228

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

233

'Submitting TFA code', 'Unable to submit TFA code')

234

235

if tfa_results is False:

236

return False

237

238

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

239

if tfa_res:

240

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

241

warn(

242

'Unable to finish TFA: %s' % 'Invalid TFA code'

243

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

244

return False

245

246

check_cookie_url = try_get(

247

tfa_results, lambda x: x[0][-1][2], compat_str)

248

else:

249

CHALLENGES = {

250

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

251

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

252

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

253

}

254

challenge = CHALLENGES.get(

255

challenge_str,

256

'%s returned error %s.' % (self.IE_NAME, challenge_str))

257

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

258

return False

259

else:

260

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

261

262

if not check_cookie_url:

263

warn('Unable to extract CheckCookie URL')

264

return False

265

266

check_cookie_results = self._download_webpage(

267

check_cookie_url, None, 'Checking cookie', fatal=False)

268

269

if check_cookie_results is False:

270

return False

271

272

if 'https://myaccount.google.com/' not in check_cookie_results:

273

warn('Unable to log in')

return False

return True

def _download_webpage_handle(self, *args, **kwargs):

279

query = kwargs.get('query', {}).copy()

280

kwargs['query'] = query

281

return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(

282

*args, **compat_kwargs(kwargs))

283

284

def _get_yt_initial_data(self, video_id, webpage):

285

config = self._search_regex(

286

(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',

287

r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),

288

webpage, 'ytInitialData', default=None)

289

if config:

290

return self._parse_json(

291

uppercase_escape(config), video_id, fatal=False)

292

293

def _real_initialize(self):

294

if self._downloader is None:

295

return

296

self._set_language()

297

if not self._login():

298

return

299

300

_DEFAULT_API_DATA = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

}

_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'

310

311

def _call_api(self, ep, query, video_id):

312

data = self._DEFAULT_API_DATA.copy()

313

data.update(query)

314

315

response = self._download_json(

316

'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,

317

note='Downloading API JSON', errnote='Unable to download API page',

318

data=json.dumps(data).encode('utf8'),

319

headers={'content-type': 'application/json'},

320

query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})

return response

def _extract_yt_initial_data(self, video_id, webpage):

325

return self._parse_json(

326

self._search_regex(

327

(r'%s\s*\n' % self._YT_INITIAL_DATA_RE,

328

self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),

video_id)

class YoutubeIE(YoutubeBaseInfoExtractor):

333

IE_DESC = 'YouTube.com'

334

_VALID_URL = r"""(?x)^

335

(

336

(?:https?://|//) # http(s):// or protocol-independent URL

337

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|

338

(?:www\.)?deturl\.com/www\.youtube\.com/|

339

(?:www\.)?pwnyoutube\.com/|

340

(?:www\.)?hooktube\.com/|

341

(?:www\.)?yourepeat\.com/|

342

tube\.majestyc\.net/|

343

# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances

344

(?:(?:www|dev)\.)?invidio\.us/|

345

(?:(?:www|no)\.)?invidiou\.sh/|

346

(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|

347

(?:www\.)?invidious\.kabi\.tk/|

348

(?:www\.)?invidious\.13ad\.de/|

349

(?:www\.)?invidious\.mastodon\.host/|

350

(?:www\.)?invidious\.nixnet\.xyz/|

351

(?:www\.)?invidious\.drycat\.fr/|

352

(?:www\.)?tube\.poal\.co/|

353

(?:www\.)?vid\.wxzm\.sx/|

354

(?:www\.)?yewtu\.be/|

355

(?:www\.)?yt\.elukerio\.org/|

356

(?:www\.)?yt\.lelux\.fi/|

357

(?:www\.)?invidious\.ggc-project\.de/|

358

(?:www\.)?yt\.maisputain\.ovh/|

359

(?:www\.)?invidious\.13ad\.de/|

360

(?:www\.)?invidious\.toot\.koeln/|

361

(?:www\.)?invidious\.fdn\.fr/|

362

(?:www\.)?watch\.nettohikari\.com/|

363

(?:www\.)?kgg2m7yk5aybusll\.onion/|

364

(?:www\.)?qklhadlycap4cnod\.onion/|

365

(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|

366

(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|

367

(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|

368

(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|

369

(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|

370

(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|

371

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

372

(?:.*?\#/)? # handle anchor (#/) redirect urls

373

(?: # the various things that can precede the ID:

374

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

375

|(?: # or the v= param in all its forms

376

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

377

(?:\?|\#!?) # the params delimiter ? or # or #!

378

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

384

vid\.plus| # or vid.plus/xxxx

385

zwearz\.com/watch| # or zwearz.com/watch/xxxx

386

)/

387

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

388

)

389

)? # all until now is optional -> you can pass the naked ID

390

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

391

(?!.*?\blist=

392

(?:

393

%(playlist_id)s| # combined list/video URLs are handled by the playlist IE

394

WL # WL are handled by the watch later IE

395

)

396

)

397

(?(1).+)? # if we found the ID, everything can follow

398

$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

399

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

400

_PLAYER_INFO_RE = (

401

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',

402

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',

403

)

404

_formats = {

405

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

406

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

407

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

408

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

409

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

410

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

411

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

412

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

413

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

414

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

415

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

416

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

417

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

418

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

419

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

420

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

421

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

422

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

427

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

428

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

429

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

430

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

431

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

432

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

433

434

# Apple HTTP Live Streaming

435

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

436

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

437

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

438

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

439

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

440

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

441

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

442

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

443

444

# DASH mp4 video

445

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

446

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

447

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

448

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

449

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

450

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

451

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

452

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

453

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

454

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

455

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

456

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

457

458

# Dash mp4 audio

459

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

460

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

461

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

462

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

463

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

464

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

465

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

466

467

# Dash webm

468

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

469

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

470

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

471

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

472

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

473

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

474

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

475

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

476

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

477

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

478

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

479

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

480

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

481

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

482

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

483

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

484

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

485

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

486

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

487

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

488

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

489

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

490

491

# Dash webm audio

492

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

493

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

494

495

# Dash webm audio with opus inside

496

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

497

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

498

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

499

500

# RTMP (unnamed)

501

'_rtmp': {'protocol': 'rtmp'},

502

503

# av01 video only formats sometimes served with "unknown" codecs

504

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

505

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

506

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

507

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

508

}

509

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

521

'uploader': 'Philipp Hagemeister',

522

'uploader_id': 'phihag',

523

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

524

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

525

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

526

'upload_date': '20121002',

527

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

528

'categories': ['Science & Technology'],

529

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

540

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

545

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

546

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

547

'uploader': 'SET India',

548

'uploader_id': 'setindia',

549

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

555

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

560

'uploader': 'Philipp Hagemeister',

561

'uploader_id': 'phihag',

562

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

563

'upload_date': '20121002',

564

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

565

'categories': ['Science & Technology'],

566

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

571

},

572

'params': {

573

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

578

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

583

'uploader_id': '8KVIDEO',

584

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

585

'description': '',

586

'uploader': '8KVIDEO',

587

'title': 'UHDTV TEST 8K VIDEO.mp4'

588

},

589

'params': {

590

'youtube_include_dash_manifest': True,

591

'format': '141',

592

},

593

'skip': 'format 141 not served anymore',

594

},

595

# DASH manifest with encrypted signature

596

{

597

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

602

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

603

'duration': 244,

604

'uploader': 'AfrojackVEVO',

605

'uploader_id': 'AfrojackVEVO',

606

'upload_date': '20131011',

607

},

608

'params': {

609

'youtube_include_dash_manifest': True,

610

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

621

'uploader': 'Amazing Atheist',

622

'uploader_id': 'TheAmazingAtheist',

623

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

624

'title': 'Burning Everyone\'s Koran',

625

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

626

}

627

},

628

# Normal age-gate video (embed allowed)

629

{

630

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

635

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

636

'duration': 142,

637

'uploader': 'The Witcher',

638

'uploader_id': 'WitcherGame',

639

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

640

'upload_date': '20140605',

'age_limit': 18,

},

},

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

645

# YouTube Red ad is not captured for creator

646

{

647

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

653

'uploader_id': 'deadmau5',

654

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

655

'creator': 'Dada Life, deadmau5',

656

'description': 'md5:12c56784b8032162bb936a5f76d55360',

657

'uploader': 'deadmau5',

658

'title': 'Deadmau5 - Some Chords (HD)',

659

'alt_title': 'This Machine Kills Some Chords',

660

},

661

'expected_warnings': [

662

'DASH manifest missing',

663

]

664

},

665

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

666

{

667

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

673

'uploader_id': 'olympic',

674

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

675

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

676

'uploader': 'Olympic',

677

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

678

},

679

'params': {

680

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

690

'duration': 85,

691

'upload_date': '20110310',

692

'uploader_id': 'AllenMeow',

693

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

694

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

695

'uploader': '孫ᄋᄅ',

696

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

697

},

698

},

699

# url_encoded_fmt_stream_map is empty string

700

{

701

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

706

'description': '',

707

'upload_date': '20150404',

708

'uploader_id': 'spbelect',

709

'uploader': 'Наблюдатели Петербурга',

710

},

711

'params': {

712

'skip_download': 'requires avconv',

713

},

714

'skip': 'This live event has ended.',

715

},

716

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

717

{

718

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

723

'description': 'md5:116377fd2963b81ec4ce64b542173306',

724

'duration': 220,

725

'upload_date': '20150625',

726

'uploader_id': 'dorappi2000',

727

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

728

'uploader': 'dorappi2000',

729

'formats': 'mincount:31',

730

},

731

'skip': 'not actual anymore',

732

},

733

# DASH manifest with segment_list

734

{

735

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

736

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

741

'uploader': 'Airtek',

742

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

743

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

744

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

745

},

746

'params': {

747

'youtube_include_dash_manifest': True,

748

'format': '135', # bestvideo

749

},

750

'skip': 'This live event has ended.',

751

},

752

{

753

# Multifeed videos (multiple cameras), URL is for Main Camera

754

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

755

'info_dict': {

756

'id': 'jqWvoWXjCVs',

757

'title': 'teamPGP: Rocket League Noob Stream',

758

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

765

'description': 'md5:dc7872fb300e143831327f1bae3af010',

766

'duration': 7335,

767

'upload_date': '20150721',

768

'uploader': 'Beer Games Beer',

769

'uploader_id': 'beergamesbeer',

770

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

771

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

778

'description': 'md5:dc7872fb300e143831327f1bae3af010',

779

'duration': 7337,

780

'upload_date': '20150721',

781

'uploader': 'Beer Games Beer',

782

'uploader_id': 'beergamesbeer',

783

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

784

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

791

'description': 'md5:dc7872fb300e143831327f1bae3af010',

792

'duration': 7337,

793

'upload_date': '20150721',

794

'uploader': 'Beer Games Beer',

795

'uploader_id': 'beergamesbeer',

796

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

797

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

804

'description': 'md5:dc7872fb300e143831327f1bae3af010',

805

'duration': 7334,

806

'upload_date': '20150721',

807

'uploader': 'Beer Games Beer',

808

'uploader_id': 'beergamesbeer',

809

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

810

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

815

},

816

'skip': 'This video is not available.',

817

},

818

{

819

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

820

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

821

'info_dict': {

822

'id': 'gVfLd0zydlo',

823

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

824

},

825

'playlist_count': 2,

826

'skip': 'Not multifeed anymore',

827

},

828

{

829

'url': 'https://vid.plus/FlRa-iH7PGw',

830

'only_matching': True,

831

},

832

{

833

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

834

'only_matching': True,

835

},

836

{

837

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

838

# Also tests cut-off URL expansion in video description (see

839

# https://github.com/ytdl-org/youtube-dl/issues/1892,

840

# https://github.com/ytdl-org/youtube-dl/issues/8164)

841

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

846

'alt_title': 'Dark Walk - Position Music',

847

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

848

'duration': 133,

849

'upload_date': '20151119',

850

'uploader_id': 'IronSoulElf',

851

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

852

'uploader': 'IronSoulElf',

853

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

854

'track': 'Dark Walk - Position Music',

855

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

856

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

857

},

858

'params': {

859

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

864

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

865

'only_matching': True,

866

},

867

{

868

# Video with yt:stretch=17:0

869

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

874

'description': 'md5:ee18a25c350637c8faff806845bddee9',

875

'upload_date': '20151107',

876

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

877

'uploader': 'CH GAMER DROID',

878

},

879

'params': {

880

'skip_download': True,

881

},

882

'skip': 'This video does not exist.',

883

},

884

{

885

# Video licensed under Creative Commons

886

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

891

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

892

'duration': 721,

893

'upload_date': '20150127',

894

'uploader_id': 'BerkmanCenter',

895

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

896

'uploader': 'The Berkman Klein Center for Internet & Society',

897

'license': 'Creative Commons Attribution license (reuse allowed)',

898

},

899

'params': {

900

'skip_download': True,

},

},

{

# Channel-like uploader_url

905

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

910

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

911

'duration': 4060,

912

'upload_date': '20151119',

913

'uploader': 'Bernie Sanders',

914

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

915

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

916

'license': 'Creative Commons Attribution license (reuse allowed)',

917

},

918

'params': {

919

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

924

'only_matching': True,

925

},

926

{

927

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

928

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

929

'only_matching': True,

930

},

931

{

932

# Rental video preview

933

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

938

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

939

'upload_date': '20150811',

940

'uploader': 'FlixMatrix',

941

'uploader_id': 'FlixMatrixKaravan',

942

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

943

'license': 'Standard YouTube License',

944

},

945

'params': {

946

'skip_download': True,

947

},

948

'skip': 'This video is not available.',

949

},

950

{

951

# YouTube Red video with episode data

952

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

957

'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',

958

'duration': 2085,

959

'upload_date': '20170118',

960

'uploader': 'Vsauce',

961

'uploader_id': 'Vsauce',

962

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

963

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

969

},

970

'expected_warnings': [

971

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

976

# as inappropriate or offensive to some audiences.

977

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

982

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

983

'duration': 965,

984

'upload_date': '20140124',

985

'uploader': 'New Century Foundation',

986

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

987

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

988

},

989

'params': {

990

'skip_download': True,

},

},

{

# itag 212

'url': '1t24XAntNCY',

996

'only_matching': True,

997

},

998

{

999

# geo restricted to JP

1000

'url': 'sJL6WA-aGkQ',

1001

'only_matching': True,

1002

},

1003

{

1004

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1005

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1010

'only_matching': True,

1011

},

1012

{

1013

# Video with unsupported adaptive stream type formats

1014

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1019

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1020

'duration': 433,

1021

'upload_date': '20130923',

1022

'uploader': 'Amelia Putri Harwita',

1023

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1024

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1025

'formats': 'maxcount:10',

1026

},

1027

'params': {

1028

'skip_download': True,

1029

'youtube_include_dash_manifest': False,

1030

},

1031

'skip': 'not actual anymore',

1032

},

1033

{

1034

# Youtube Music Auto-generated description

1035

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1040

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1041

'upload_date': '20190312',

1042

'uploader': 'Stephen - Topic',

1043

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1044

'artist': 'Stephen',

1045

'track': 'Voyeur Girl',

1046

'album': 'it\'s too much love to know my dear',

1047

'release_date': '20190313',

1048

'release_year': 2019,

1049

},

1050

'params': {

1051

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1056

'only_matching': True,

1057

},

1058

{

1059

# invalid -> valid video id redirection

1060

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1065

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1066

'upload_date': '20090125',

1067

'uploader': 'Prochorowka',

1068

'uploader_id': 'Prochorowka',

1069

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1070

'artist': 'Panjabi MC',

1071

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1072

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1073

},

1074

'params': {

1075

'skip_download': True,

},

},

{

# empty description results in an empty string

1080

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1087

'uploader_id': 'ElevageOrVert',

1088

'uploader': 'ElevageOrVert',

1089

},

1090

'params': {

1091

'skip_download': True,

},

},

{

# with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)

1096

'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',

'info_dict': {

'id': 'CHqg6qOn4no',

'ext': 'mp4',

'title': 'Part 77 Sort a list of simple types in c#',

1101

'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',

1102

'upload_date': '20130831',

1103

'uploader_id': 'kudvenkat',

1104

'uploader': 'kudvenkat',

1105

},

1106

'params': {

1107

'skip_download': True,

},

},

]

def __init__(self, *args, **kwargs):

1113

super(YoutubeIE, self).__init__(*args, **kwargs)

1114

self._player_cache = {}

1115

1116

def report_video_info_webpage_download(self, video_id):

1117

"""Report attempt to download video info webpage."""

1118

self.to_screen('%s: Downloading video info webpage' % video_id)

1119

1120

def report_information_extraction(self, video_id):

1121

"""Report attempt to extract video information."""

1122

self.to_screen('%s: Extracting video information' % video_id)

1123

1124

def report_unavailable_format(self, video_id, format):

1125

"""Report extracted video URL."""

1126

self.to_screen('%s: Format %s not available' % (video_id, format))

1127

1128

def report_rtmp_download(self):

1129

"""Indicate the download will use the RTMP protocol."""

1130

self.to_screen('RTMP download detected')

1131

1132

def _signature_cache_id(self, example_sig):

1133

""" Return a string representation of a signature """

1134

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1135

1136

@classmethod

1137

def _extract_player_info(cls, player_url):

1138

for player_re in cls._PLAYER_INFO_RE:

1139

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1144

return id_m.group('ext'), id_m.group('id')

1145

1146

def _extract_signature_function(self, video_id, player_url, example_sig):

1147

player_type, player_id = self._extract_player_info(player_url)

1148

1149

# Read from filesystem cache

1150

func_id = '%s_%s_%s' % (

1151

player_type, player_id, self._signature_cache_id(example_sig))

1152

assert os.path.basename(func_id) == func_id

1153

1154

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1155

if cache_spec is not None:

1156

return lambda s: ''.join(s[i] for i in cache_spec)

1157

1158

download_note = (

1159

'Downloading player %s' % player_url

1160

if self._downloader.params.get('verbose') else

1161

'Downloading %s player %s' % (player_type, player_id)

1162

)

1163

if player_type == 'js':

1164

code = self._download_webpage(

1165

player_url, video_id,

1166

note=download_note,

1167

errnote='Download of %s failed' % player_url)

1168

res = self._parse_sig_js(code)

1169

elif player_type == 'swf':

1170

urlh = self._request_webpage(

1171

player_url, video_id,

1172

note=download_note,

1173

errnote='Download of %s failed' % player_url)

1174

code = urlh.read()

1175

res = self._parse_sig_swf(code)

1176

else:

1177

assert False, 'Invalid player type %r' % player_type

1178

1179

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1180

cache_res = res(test_string)

1181

cache_spec = [ord(c) for c in cache_res]

1182

1183

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1184

return res

1185

1186

def _print_sig_code(self, func, example_sig):

1187

def gen_sig_code(idxs):

1188

def _genslice(start, end, step):

1189

starts = '' if start == 0 else str(start)

1190

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1191

steps = '' if step == 1 else (':%d' % step)

1192

return 's[%s%s%s]' % (starts, ends, steps)

1193

1194

step = None

1195

# Quelch pyflakes warnings - start will be set when step is set

1196

start = '(Never used)'

1197

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1202

step = None

1203

continue

1204

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1214

1215

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1216

cache_res = func(test_string)

1217

cache_spec = [ord(c) for c in cache_res]

1218

expr_code = ' + '.join(gen_sig_code(cache_spec))

1219

signature_id_tuple = '(%s)' % (

1220

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1221

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1222

' return %s\n') % (signature_id_tuple, expr_code)

1223

self.to_screen('Extracted signature function:\n' + code)

1224

1225

def _parse_sig_js(self, jscode):

1226

funcname = self._search_regex(

1227

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1228

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1229

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1230

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1231

# Obsolete patterns

1232

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1233

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1234

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1235

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1236

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1237

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1238

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1239

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1240

jscode, 'Initial JS player signature function name', group='sig')

1241

1242

jsi = JSInterpreter(jscode)

1243

initial_function = jsi.extract_function(funcname)

1244

return lambda s: initial_function([s])

1245

1246

def _parse_sig_swf(self, file_contents):

1247

swfi = SWFInterpreter(file_contents)

1248

TARGET_CLASSNAME = 'SignatureDecipher'

1249

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1250

initial_function = swfi.extract_function(searched_class, 'decipher')

1251

return lambda s: initial_function([s])

1252

1253

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1254

"""Turn the encrypted s field into a working signature"""

1255

1256

if player_url is None:

1257

raise ExtractorError('Cannot decrypt signature without player_url')

1258

1259

if player_url.startswith('//'):

1260

player_url = 'https:' + player_url

1261

elif not re.match(r'https?://', player_url):

1262

player_url = compat_urlparse.urljoin(

1263

'https://www.youtube.com', player_url)

1264

try:

1265

player_id = (player_url, self._signature_cache_id(s))

1266

if player_id not in self._player_cache:

1267

func = self._extract_signature_function(

1268

video_id, player_url, s

1269

)

1270

self._player_cache[player_id] = func

1271

func = self._player_cache[player_id]

1272

if self._downloader.params.get('youtube_print_sig_code'):

1273

self._print_sig_code(func, s)

1274

return func(s)

1275

except Exception as e:

1276

tb = traceback.format_exc()

1277

raise ExtractorError(

1278

'Signature extraction failed: ' + tb, cause=e)

1279

1280

def _get_subtitles(self, video_id, webpage, has_live_chat_replay):

1281

try:

1282

subs_doc = self._download_xml(

1283

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1284

video_id, note=False)

1285

except ExtractorError as err:

1286

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1291

lang = track.attrib['lang_code']

1292

if lang in sub_lang_list:

1293

continue

1294

sub_formats = []

1295

for ext in self._SUBTITLE_FORMATS:

1296

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1301

})

1302

sub_formats.append({

1303

'url': 'https://www.youtube.com/api/timedtext?' + params,

1304

'ext': ext,

1305

})

1306

sub_lang_list[lang] = sub_formats

1307

if has_live_chat_replay:

1308

sub_lang_list['live_chat'] = [

1309

{

1310

'video_id': video_id,

1311

'ext': 'json',

1312

'protocol': 'youtube_live_chat_replay',

1313

},

1314

]

1315

if not sub_lang_list:

1316

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1321

patterns = (

1322

# User data may contain arbitrary character sequences that may affect

1323

# JSON extraction with regex, e.g. when '};' is contained the second

1324

# regex won't capture the whole JSON. Yet working around by trying more

1325

# concrete regex first keeping in mind proper quoted string handling

1326

# to be implemented in future that will replace this workaround (see

1327

# https://github.com/ytdl-org/youtube-dl/issues/7468,

1328

# https://github.com/ytdl-org/youtube-dl/pull/7599)

1329

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1330

r';ytplayer\.config\s*=\s*({.+?});',

1331

)

1332

config = self._search_regex(

1333

patterns, webpage, 'ytplayer.config', default=None)

1334

if config:

1335

return self._parse_json(

1336

uppercase_escape(config), video_id, fatal=False)

1337

1338

def _get_music_metadata_from_yt_initial(self, yt_initial):

music_metadata = []

key_map = {

'Album': 'album',

'Artist': 'artist',

'Song': 'track'

}

contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])

1346

if type(contents) is list:

1347

for content in contents:

1348

music_track = {}

1349

if type(content) is not dict:

1350

continue

1351

videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])

1352

if type(videoSecondaryInfoRenderer) is not dict:

1353

continue

1354

rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])

1355

if type(rows) is not list:

1356

continue

1357

for row in rows:

1358

metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])

1359

if type(metadataRowRenderer) is not dict:

1360

continue

1361

key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])

1362

value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \

1363

try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])

1364

if type(key) is not str or type(value) is not str:

1365

continue

1366

if key in key_map:

1367

if key_map[key] in music_track:

1368

# we've started on a new track

1369

music_metadata.append(music_track)

1370

music_track = {}

1371

music_track[key_map[key]] = value

1372

if len(music_track.keys()):

1373

music_metadata.append(music_track)

1374

return music_metadata

1375

1376

def _get_automatic_captions(self, video_id, webpage):

1377

"""We need the webpage for getting the captions url, pass it as an

1378

argument to speed up the process."""

1379

self.to_screen('%s: Looking for automatic captions' % video_id)

1380

player_config = self._get_ytplayer_config(video_id, webpage)

1381

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1382

if not player_config:

1383

self._downloader.report_warning(err_msg)

1384

return {}

1385

try:

1386

args = player_config['args']

1387

caption_url = args.get('ttsurl')

1388

if caption_url:

1389

timestamp = args['timestamp']

1390

# We get the available subtitles

1391

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1397

caption_list = self._download_xml(list_url, video_id)

1398

original_lang_node = caption_list.find('track')

1399

if original_lang_node is None:

1400

self._downloader.report_warning('Video doesn\'t have automatic captions')

1401

return {}

1402

original_lang = original_lang_node.attrib['lang_code']

1403

caption_kind = original_lang_node.attrib.get('kind', '')

1404

1405

sub_lang_list = {}

1406

for lang_node in caption_list.findall('target'):

1407

sub_lang = lang_node.attrib['lang_code']

1408

sub_formats = []

1409

for ext in self._SUBTITLE_FORMATS:

1410

params = compat_urllib_parse_urlencode({

1411

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1416

})

1417

sub_formats.append({

1418

'url': caption_url + '&' + params,

1419

'ext': ext,

1420

})

1421

sub_lang_list[sub_lang] = sub_formats

1422

return sub_lang_list

1423

1424

def make_captions(sub_url, sub_langs):

1425

parsed_sub_url = compat_urllib_parse_urlparse(sub_url)

1426

caption_qs = compat_parse_qs(parsed_sub_url.query)

1427

captions = {}

1428

for sub_lang in sub_langs:

1429

sub_formats = []

1430

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(

1436

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

captions[sub_lang] = sub_formats

1442

return captions

1443

1444

# New captions format as of 22.06.2017

1445

player_response = args.get('player_response')

1446

if player_response and isinstance(player_response, compat_str):

1447

player_response = self._parse_json(

1448

player_response, video_id, fatal=False)

1449

if player_response:

1450

renderer = player_response['captions']['playerCaptionsTracklistRenderer']

1451

base_url = renderer['captionTracks'][0]['baseUrl']

1452

sub_lang_list = []

1453

for lang in renderer['translationLanguages']:

1454

lang_code = lang.get('languageCode')

1455

if lang_code:

1456

sub_lang_list.append(lang_code)

1457

return make_captions(base_url, sub_lang_list)

1458

1459

# Some videos don't provide ttsurl but rather caption_tracks and

1460

# caption_translation_languages (e.g. 20LmZk1hakA)

1461

# Does not used anymore as of 22.06.2017

1462

caption_tracks = args['caption_tracks']

1463

caption_translation_languages = args['caption_translation_languages']

1464

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1465

sub_lang_list = []

1466

for lang in caption_translation_languages.split(','):

1467

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1468

sub_lang = lang_qs.get('lc', [None])[0]

1469

if sub_lang:

1470

sub_lang_list.append(sub_lang)

1471

return make_captions(caption_url, sub_lang_list)

1472

# An extractor error can be raise by the download process if there are

1473

# no automatic captions but there are subtitles

1474

except (KeyError, IndexError, ExtractorError):

1475

self._downloader.report_warning(err_msg)

1476

return {}

1477

1478

def _mark_watched(self, video_id, video_info, player_response):

1479

playback_url = url_or_none(try_get(

1480

player_response,

1481

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(

1482

video_info, lambda x: x['videostats_playback_base_url'][0]))

1483

if not playback_url:

1484

return

1485

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1486

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1487

1488

# cpn generation algorithm is reverse engineered from base.js.

1489

# In fact it works even with dummy cpn.

1490

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1491

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1498

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1499

1500

self._download_webpage(

1501

playback_url, video_id, 'Marking watched',

1502

'Unable to mark watched', fatal=False)

1503

1504

@staticmethod

1505

def _extract_urls(webpage):

1506

# Embedded YouTube player

1507

entries = [

1508

unescapeHTML(mobj.group('url'))

1509

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1520

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1521

\1''', webpage)]

1522

1523

# lazyYT YouTube embed

1524

entries.extend(list(map(

1525

unescapeHTML,

1526

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1527

1528

# Wordpress "YouTube Video Importer" plugin

1529

matches = re.findall(r'''(?x)<div[^>]+

1530

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1531

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1532

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1538

urls = YoutubeIE._extract_urls(webpage)

1539

return urls[0] if urls else None

1540

1541

@classmethod

1542

def extract_id(cls, url):

1543

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1544

if mobj is None:

1545

raise ExtractorError('Invalid URL: %s' % url)

1546

video_id = mobj.group(2)

1547

return video_id

1548

1549

def _extract_chapters_from_json(self, webpage, video_id, duration):

1550

if not webpage:

1551

return

1552

data = self._extract_yt_initial_data(video_id, webpage)

1553

if not data or not isinstance(data, dict):

1554

return

1555

chapters_list = try_get(

1556

data,

1557

lambda x: x['playerOverlays']

1558

['playerOverlayRenderer']

1559

['decoratedPlayerBarRenderer']

1560

['decoratedPlayerBarRenderer']

1561

['playerBar']

1562

['chapteredPlayerBarRenderer']

1563

['chapters'],

1564

list)

1565

if not chapters_list:

1566

return

1567

1568

def chapter_time(chapter):

1569

return float_or_none(

1570

try_get(

1571

chapter,

1572

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1577

start_time = chapter_time(chapter)

1578

if start_time is None:

1579

continue

1580

end_time = (chapter_time(chapters_list[next_num])

1581

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1586

compat_str)

1587

chapters.append({

1588

'start_time': start_time,

1589

'end_time': end_time,

'title': title,

})

return chapters

@staticmethod

def _extract_chapters_from_description(description, duration):

1596

if not description:

1597

return None

1598

chapter_lines = re.findall(

1599

r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',

1600

description)

1601

if not chapter_lines:

1602

return None

1603

chapters = []

1604

for next_num, (chapter_line, time_point) in enumerate(

1605

chapter_lines, start=1):

1606

start_time = parse_duration(time_point)

1607

if start_time is None:

1608

continue

1609

if start_time > duration:

1610

break

1611

end_time = (duration if next_num == len(chapter_lines)

1612

else parse_duration(chapter_lines[next_num][1]))

1613

if end_time is None:

1614

continue

1615

if end_time > duration:

1616

end_time = duration

1617

if start_time > end_time:

1618

break

1619

chapter_title = re.sub(

1620

r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')

1621

chapter_title = re.sub(r'\s+', ' ', chapter_title)

1622

chapters.append({

1623

'start_time': start_time,

1624

'end_time': end_time,

1625

'title': chapter_title,

})

return chapters

def _extract_chapters(self, webpage, description, video_id, duration):

1630

return (self._extract_chapters_from_json(webpage, video_id, duration)

1631

or self._extract_chapters_from_description(description, duration))

1632

1633

def _real_extract(self, url):

1634

url, smuggled_data = unsmuggle_url(url, {})

1635

1636

proto = (

1637

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1643

for component in [parsed_url.fragment, parsed_url.query]:

1644

query = compat_parse_qs(component)

1645

if start_time is None and 't' in query:

1646

start_time = parse_duration(query['t'][0])

1647

if start_time is None and 'start' in query:

1648

start_time = parse_duration(query['start'][0])

1649

if end_time is None and 'end' in query:

1650

end_time = parse_duration(query['end'][0])

1651

1652

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1653

mobj = re.search(self._NEXT_URL_RE, url)

1654

if mobj:

1655

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1656

video_id = self.extract_id(url)

1657

1658

# Get video webpage

1659

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1660

video_webpage, urlh = self._download_webpage_handle(url, video_id)

1661

1662

qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)

1663

video_id = qs.get('v', [None])[0] or video_id

1664

1665

# Attempt to extract SWF player URL

1666

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1667

if mobj is not None:

1668

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1675

dash_mpd = video_info.get('dashmpd')

1676

if dash_mpd and dash_mpd[0] not in dash_mpds:

1677

dash_mpds.append(dash_mpd[0])

1678

1679

def add_dash_mpd_pr(pl_response):

1680

dash_mpd = url_or_none(try_get(

1681

pl_response, lambda x: x['streamingData']['dashManifestUrl'],

1682

compat_str))

1683

if dash_mpd and dash_mpd not in dash_mpds:

1684

dash_mpds.append(dash_mpd)

is_live = None

view_count = None

def extract_view_count(v_info):

1690

return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))

1691

1692

def extract_player_response(player_response, video_id):

1693

pl_response = str_or_none(player_response)

1694

if not pl_response:

1695

return

1696

pl_response = self._parse_json(pl_response, video_id, fatal=False)

1697

if isinstance(pl_response, dict):

1698

add_dash_mpd_pr(pl_response)

1699

return pl_response

1700

1701

def extract_embedded_config(embed_webpage, video_id):

1702

embedded_config = self._search_regex(

1703

r'setConfig$({.*})$;',

1704

embed_webpage, 'ytInitialData', default=None)

1705

if embedded_config:

1706

return embedded_config

player_response = {}

# Get video info

video_info = {}

embed_webpage = None

if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'

1714

or re.search(r'player-age-gate-content">', video_webpage) is not None):

1715

cookie_keys = self._get_cookies('https://www.youtube.com').keys()

1716

age_gate = True

1717

# We simulate the access to the video from www.youtube.com/v/{video_id}

1718

# this can be viewed without login into Youtube

1719

url = proto + '://www.youtube.com/embed/%s' % video_id

1720

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1721

ext = extract_embedded_config(embed_webpage, video_id)

1722

# playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)

1723

playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)

1724

if not playable_in_embed:

1725

self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)

1726

playable_in_embed = ''

1727

else:

1728

playable_in_embed = playable_in_embed.group('playableinEmbed')

1729

# check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)

1730

# if re.search(r'player-unavailable">', embed_webpage) is not None:

1731

if playable_in_embed == 'false':

1732

'''

1733

# TODO apply this patch when Support for Python 2.6(!) and above drops

1734

if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys

1735

or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):

1736

'''

1737

if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)

1738

or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):

1739

age_gate = False

1740

# Try looking directly into the video webpage

1741

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1742

if ytplayer_config:

1743

args = ytplayer_config.get("args")

1744

if args is not None:

1745

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1746

# Convert to the same format returned by compat_parse_qs

1747

video_info = dict((k, [v]) for k, v in args.items())

1748

add_dash_mpd(video_info)

1749

# Rental video is not rented but preview is available (e.g.

1750

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1751

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1752

if not video_info and args.get('ypc_vid'):

1753

return self.url_result(

1754

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1755

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1756

is_live = True

1757

if not player_response:

1758

player_response = extract_player_response(args.get('player_response'), video_id)

1759

elif not player_response:

1760

player_response = ytplayer_config

1761

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1762

add_dash_mpd_pr(player_response)

1763

else:

1764

raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)

1765

else:

1766

data = compat_urllib_parse_urlencode({

1767

'video_id': video_id,

1768

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1769

'sts': self._search_regex(

1770

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1771

})

1772

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1773

try:

1774

video_info_webpage = self._download_webpage(

1775

video_info_url, video_id,

1776

note='Refetching age-gated info webpage',

1777

errnote='unable to download video info webpage')

1778

except ExtractorError:

1779

video_info_webpage = None

1780

if video_info_webpage:

1781

video_info = compat_parse_qs(video_info_webpage)

1782

pl_response = video_info.get('player_response', [None])[0]

1783

player_response = extract_player_response(pl_response, video_id)

1784

add_dash_mpd(video_info)

1785

view_count = extract_view_count(video_info)

1786

else:

1787

age_gate = False

1788

# Try looking directly into the video webpage

1789

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1790

if ytplayer_config:

1791

args = ytplayer_config.get('args', {})

1792

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1793

# Convert to the same format returned by compat_parse_qs

1794

video_info = dict((k, [v]) for k, v in args.items())

1795

add_dash_mpd(video_info)

1796

# Rental video is not rented but preview is available (e.g.

1797

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1798

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1799

if not video_info and args.get('ypc_vid'):

1800

return self.url_result(

1801

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1802

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1803

is_live = True

1804

if not player_response:

1805

player_response = extract_player_response(args.get('player_response'), video_id)

1806

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1807

add_dash_mpd_pr(player_response)

1808

1809

if not video_info and not player_response:

1810

player_response = extract_player_response(

1811

self._search_regex(

1812

r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,

1813

'initial player response', default='{}'),

1814

video_id)

1815

1816

def extract_unavailable_message():

1817

messages = []

1818

for tag, kind in (('h1', 'message'), ('div', 'submessage')):

1819

msg = self._html_search_regex(

1820

r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),

1821

video_webpage, 'unavailable %s' % kind, default=None)

if msg:

messages.append(msg)

if messages:

return '\n'.join(messages)

1826

1827

if not video_info and not player_response:

1828

unavailable_message = extract_unavailable_message()

1829

if not unavailable_message:

1830

unavailable_message = 'Unable to extract video data'

1831

raise ExtractorError(

1832

'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)

1833

1834

if not isinstance(video_info, dict):

1835

video_info = {}

1836

1837

video_details = try_get(

1838

player_response, lambda x: x['videoDetails'], dict) or {}

1839

1840

microformat = try_get(

1841

player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}

1842

1843

video_title = video_info.get('title', [None])[0] or video_details.get('title')

1844

if not video_title:

1845

self._downloader.report_warning('Unable to extract video title')

1846

video_title = '_'

1847

1848

description_original = video_description = get_element_by_id("eow-description", video_webpage)

1849

if video_description:

1850

1851

def replace_url(m):

1852

redir_url = compat_urlparse.urljoin(url, m.group(1))

1853

parsed_redir_url = compat_urllib_parse_urlparse(redir_url)

1854

if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':

1855

qs = compat_parse_qs(parsed_redir_url.query)

q = qs.get('q')

if q and q[0]:

return q[0]

return redir_url

description_original = video_description = re.sub(r'''(?x)

1862

<a\s+

1863

(?:[a-zA-Z-]+="[^"]*"\s+)*?

1864

(?:title|href)="([^"]+)"\s+

1865

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', replace_url, video_description)

1870

video_description = clean_html(video_description)

1871

else:

1872

video_description = video_details.get('shortDescription')

1873

if video_description is None:

1874

video_description = self._html_search_meta('description', video_webpage)

1875

1876

if not smuggled_data.get('force_singlefeed', False):

1877

if not self._downloader.params.get('noplaylist'):

1878

multifeed_metadata_list = try_get(

1879

player_response,

1880

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

1881

compat_str) or try_get(

1882

video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)

1883

if multifeed_metadata_list:

1884

entries = []

1885

feed_ids = []

1886

for feed in multifeed_metadata_list.split(','):

1887

# Unquote should take place before split on comma (,) since textual

1888

# fields may contain comma as well (see

1889

# https://github.com/ytdl-org/youtube-dl/issues/8536)

1890

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1891

1892

def feed_entry(name):

1893

return try_get(feed_data, lambda x: x[name][0], compat_str)

1894

1895

feed_id = feed_entry('id')

1896

if not feed_id:

1897

continue

1898

feed_title = feed_entry('title')

1899

title = video_title

1900

if feed_title:

1901

title += ' (%s)' % feed_title

1902

entries.append({

1903

'_type': 'url_transparent',

1904

'ie_key': 'Youtube',

1905

'url': smuggle_url(

1906

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1907

{'force_singlefeed': True}),

1908

'title': title,

1909

})

1910

feed_ids.append(feed_id)

1911

self.to_screen(

1912

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1913

% (', '.join(feed_ids), video_id))

1914

return self.playlist_result(entries, video_id, video_title, video_description)

1915

else:

1916

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1917

1918

if view_count is None:

1919

view_count = extract_view_count(video_info)

1920

if view_count is None and video_details:

1921

view_count = int_or_none(video_details.get('viewCount'))

1922

if view_count is None and microformat:

1923

view_count = int_or_none(microformat.get('viewCount'))

1924

1925

if is_live is None:

1926

is_live = bool_or_none(video_details.get('isLive'))

1927

1928

has_live_chat_replay = False

1929

if not is_live:

1930

yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)

1931

try:

1932

yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

1933

has_live_chat_replay = True

1934

except (KeyError, IndexError, TypeError):

1935

pass

1936

1937

# Check for "rental" videos

1938

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

1939

raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)

1940

1941

def _extract_filesize(media_url):

1942

return int_or_none(self._search_regex(

1943

r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

1944

1945

streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []

1946

streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])

1947

1948

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

1949

self.report_rtmp_download()

1950

formats = [{

1951

'format_id': '_rtmp',

1952

'protocol': 'rtmp',

1953

'url': video_info['conn'][0],

1954

'player_url': player_url,

1955

}]

1956

elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):

1957

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

1958

if 'rtmpe%3Dyes' in encoded_url_map:

1959

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)

1960

formats = []

1961

formats_spec = {}

1962

fmt_list = video_info.get('fmt_list', [''])[0]

1963

if fmt_list:

1964

for fmt in fmt_list.split(','):

1965

spec = fmt.split('/')

1966

if len(spec) > 1:

1967

width_height = spec[1].split('x')

1968

if len(width_height) == 2:

1969

formats_spec[spec[0]] = {

1970

'resolution': spec[1],

1971

'width': int_or_none(width_height[0]),

1972

'height': int_or_none(width_height[1]),

1973

}

1974

for fmt in streaming_formats:

1975

itag = str_or_none(fmt.get('itag'))

1976

if not itag:

1977

continue

1978

quality = fmt.get('quality')

1979

quality_label = fmt.get('qualityLabel') or quality

1980

formats_spec[itag] = {

1981

'asr': int_or_none(fmt.get('audioSampleRate')),

1982

'filesize': int_or_none(fmt.get('contentLength')),

1983

'format_note': quality_label,

1984

'fps': int_or_none(fmt.get('fps')),

1985

'height': int_or_none(fmt.get('height')),

1986

# bitrate for itag 43 is always 2147483647

1987

'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,

1988

'width': int_or_none(fmt.get('width')),

1989

}

1990

1991

for fmt in streaming_formats:

1992

if fmt.get('drmFamilies') or fmt.get('drm_families'):

1993

continue

1994

url = url_or_none(fmt.get('url'))

1995

1996

if not url:

1997

cipher = fmt.get('cipher') or fmt.get('signatureCipher')

1998

if not cipher:

1999

continue

2000

url_data = compat_parse_qs(cipher)

2001

url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))

if not url:

continue

else:

cipher = None

url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

2007

2008

stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))

2009

# Unsupported FORMAT_STREAM_TYPE_OTF

if stream_type == 3:

continue

format_id = fmt.get('itag') or url_data['itag'][0]

2014

if not format_id:

2015

continue

2016

format_id = compat_str(format_id)

2017

2018

if cipher:

2019

if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):

2020

ASSETS_RE = (

2021

r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',

2022

r'"jsUrl"\s*:\s*("[^"]+")',

2023

r'"assets":.+?"js":\s*("[^"]+")')

2024

jsplayer_url_json = self._search_regex(

2025

ASSETS_RE,

2026

embed_webpage if age_gate else video_webpage,

2027

'JS player URL (1)', default=None)

2028

if not jsplayer_url_json and not age_gate:

2029

# We need the embed website after all

2030

if embed_webpage is None:

2031

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

2032

embed_webpage = self._download_webpage(

2033

embed_url, video_id, 'Downloading embed webpage')

2034

jsplayer_url_json = self._search_regex(

2035

ASSETS_RE, embed_webpage, 'JS player URL')

2036

2037

player_url = json.loads(jsplayer_url_json)

2038

if player_url is None:

2039

player_url_json = self._search_regex(

2040

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

2041

video_webpage, 'age gate player URL')

2042

player_url = json.loads(player_url_json)

2043

2044

if 'sig' in url_data:

2045

url += '&signature=' + url_data['sig'][0]

2046

elif 's' in url_data:

2047

encrypted_sig = url_data['s'][0]

2048

2049

if self._downloader.params.get('verbose'):

2050

if player_url is None:

2051

player_desc = 'unknown'

2052

else:

2053

player_type, player_version = self._extract_player_info(player_url)

2054

player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)

2055

parts_sizes = self._signature_cache_id(encrypted_sig)

2056

self.to_screen('{%s} signature length %s, %s' %

2057

(format_id, parts_sizes, player_desc))

2058

2059

signature = self._decrypt_signature(

2060

encrypted_sig, video_id, player_url, age_gate)

2061

sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'

2062

url += '&%s=%s' % (sp, signature)

2063

if 'ratebypass' not in url:

2064

url += '&ratebypass=yes'

2065

2066

dct = {

2067

'format_id': format_id,

2068

'url': url,

2069

'player_url': player_url,

2070

}

2071

if format_id in self._formats:

2072

dct.update(self._formats[format_id])

2073

if format_id in formats_spec:

2074

dct.update(formats_spec[format_id])

2075

2076

# Some itags are not included in DASH manifest thus corresponding formats will

2077

# lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).

2078

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

2079

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

2080

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

2081

2082

if width is None:

2083

width = int_or_none(fmt.get('width'))

2084

if height is None:

2085

height = int_or_none(fmt.get('height'))

2086

2087

filesize = int_or_none(url_data.get(

2088

'clen', [None])[0]) or _extract_filesize(url)

2089

2090

quality = url_data.get('quality', [None])[0] or fmt.get('quality')

2091

quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')

2092

2093

tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)

2094

or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None

2095

fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

2096

2097

more_fields = {

2098

'filesize': filesize,

'tbr': tbr,

'width': width,

'height': height,

'fps': fps,

'format_note': quality_label or quality,

2104

}

2105

for key, value in more_fields.items():

2106

if value:

2107

dct[key] = value

2108

type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')

2109

if type_:

2110

type_split = type_.split(';')

2111

kind_ext = type_split[0].split('/')

2112

if len(kind_ext) == 2:

2113

kind, _ = kind_ext

2114

dct['ext'] = mimetype2ext(type_split[0])

2115

if kind in ('audio', 'video'):

2116

codecs = None

2117

for mobj in re.finditer(

2118

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

2119

if mobj.group('key') == 'codecs':

2120

codecs = mobj.group('val')

2121

break

2122

if codecs:

2123

dct.update(parse_codecs(codecs))

2124

if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':

2125

dct['downloader_options'] = {

2126

# Youtube throttles chunks >~10M

2127

'http_chunk_size': 10485760,

}

formats.append(dct)

else:

manifest_url = (

url_or_none(try_get(

player_response,

lambda x: x['streamingData']['hlsManifestUrl'],

2135

compat_str))

2136

or url_or_none(try_get(

2137

video_info, lambda x: x['hlsvp'][0], compat_str)))

2138

if manifest_url:

2139

formats = []

2140

m3u8_formats = self._extract_m3u8_formats(

2141

manifest_url, video_id, 'mp4', fatal=False)

2142

for a_format in m3u8_formats:

2143

itag = self._search_regex(

2144

r'/itag/(\d+)/', a_format['url'], 'itag', default=None)

2145

if itag:

2146

a_format['format_id'] = itag

2147

if itag in self._formats:

2148

dct = self._formats[itag].copy()

2149

dct.update(a_format)

2150

a_format = dct

2151

a_format['player_url'] = player_url

2152

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

2153

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

2154

if self._downloader.params.get('youtube_include_hls_manifest', True):

2155

formats.append(a_format)

2156

else:

2157

error_message = extract_unavailable_message()

2158

if not error_message:

2159

reason_list = try_get(

2160

player_response,

2161

lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],

2162

list) or []

2163

for reason in reason_list:

2164

if not isinstance(reason, dict):

2165

continue

2166

reason_text = try_get(reason, lambda x: x['text'], compat_str)

2167

if reason_text:

2168

if not error_message:

2169

error_message = ''

2170

error_message += reason_text

2171

if error_message:

2172

error_message = clean_html(error_message)

2173

if not error_message:

2174

error_message = clean_html(try_get(

2175

player_response, lambda x: x['playabilityStatus']['reason'],

2176

compat_str))

2177

if not error_message:

2178

error_message = clean_html(

2179

try_get(video_info, lambda x: x['reason'][0], compat_str))

2180

if error_message:

2181

raise ExtractorError(error_message, expected=True)

2182

raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')

2183

2184

# uploader

2185

video_uploader = try_get(

2186

video_info, lambda x: x['author'][0],

2187

compat_str) or str_or_none(video_details.get('author'))

2188

if video_uploader:

2189

video_uploader = compat_urllib_parse_unquote_plus(video_uploader)

2190

else:

2191

self._downloader.report_warning('unable to extract uploader name')

2192

2193

# uploader_id

2194

video_uploader_id = None

2195

video_uploader_url = None

2196

mobj = re.search(

2197

r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

2198

video_webpage)

2199

if mobj is not None:

2200

video_uploader_id = mobj.group('uploader_id')

2201

video_uploader_url = mobj.group('uploader_url')

2202

else:

2203

owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))

2204

if owner_profile_url:

2205

video_uploader_id = self._search_regex(

2206

r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',

2207

default=None)

2208

video_uploader_url = owner_profile_url

2209

2210

channel_id = (

2211

str_or_none(video_details.get('channelId'))

2212

or self._html_search_meta(

2213

'channelId', video_webpage, 'channel id', default=None)

2214

or self._search_regex(

2215

r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',

2216

video_webpage, 'channel id', default=None, group='id'))

2217

channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None

2218

2219

thumbnails = []

2220

thumbnails_list = try_get(

2221

video_details, lambda x: x['thumbnail']['thumbnails'], list) or []

2222

for t in thumbnails_list:

2223

if not isinstance(t, dict):

2224

continue

2225

thumbnail_url = url_or_none(t.get('url'))

2226

if not thumbnail_url:

2227

continue

2228

thumbnails.append({

2229

'url': thumbnail_url,

2230

'width': int_or_none(t.get('width')),

2231

'height': int_or_none(t.get('height')),

})

if not thumbnails:

video_thumbnail = None

2236

# We try first to get a high quality image:

2237

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

2238

video_webpage, re.DOTALL)

2239

if m_thumb is not None:

2240

video_thumbnail = m_thumb.group(1)

2241

thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)

2242

if thumbnail_url:

2243

video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)

2244

if video_thumbnail:

2245

thumbnails.append({'url': video_thumbnail})

2246

2247

# upload date

2248

upload_date = self._html_search_meta(

2249

'datePublished', video_webpage, 'upload date', default=None)

2250

if not upload_date:

2251

upload_date = self._search_regex(

2252

[r'(?s)id="eow-date.*?>(.*?)</span>',

2253

r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],

2254

video_webpage, 'upload date', default=None)

2255

if not upload_date:

2256

upload_date = microformat.get('publishDate') or microformat.get('uploadDate')

2257

upload_date = unified_strdate(upload_date)

2258

2259

video_license = self._html_search_regex(

2260

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

2261

video_webpage, 'license', default=None)

m_music = re.search(

r'''(?x)

<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*

<ul[^>]*>\s*

<li>(?P<title>.+?)

by (?P<creator>.+?)

(?:

$.+?$|

<a[^>]*

(?:

\bhref=["\']/red[^>]*>| # drop possible

2274

>\s*Listen ad-free with YouTube Red # YouTube Red ad

)

.*?

)?</li

''',

video_webpage)

if m_music:

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

2282

video_creator = clean_html(m_music.group('creator'))

2283

else:

2284

video_alt_title = video_creator = None

2285

2286

def extract_meta(field):

2287

return self._html_search_regex(

2288

r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,

2289

video_webpage, field, default=None)

2290

2291

track = extract_meta('Song')

2292

artist = extract_meta('Artist')

2293

album = extract_meta('Album')

2294

2295

# Youtube Music Auto-generated description

2296

release_date = release_year = None

2297

if video_description:

2298

mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)

2299

if mobj:

2300

if not track:

2301

track = mobj.group('track').strip()

2302

if not artist:

2303

artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))

2304

if not album:

2305

album = mobj.group('album'.strip())

2306

release_year = mobj.group('release_year')

2307

release_date = mobj.group('release_date')

2308

if release_date:

2309

release_date = release_date.replace('-', '')

2310

if not release_year:

2311

release_year = int(release_date[:4])

2312

if release_year:

2313

release_year = int(release_year)

2314

2315

yt_initial = self._get_yt_initial_data(video_id, video_webpage)

2316

if yt_initial:

2317

music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)

2318

if len(music_metadata):

2319

album = music_metadata[0].get('album')

2320

artist = music_metadata[0].get('artist')

2321

track = music_metadata[0].get('track')

2322

2323

m_episode = re.search(

2324

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

2325

video_webpage)

2326

if m_episode:

2327

series = unescapeHTML(m_episode.group('series'))

2328

season_number = int(m_episode.group('season'))

2329

episode_number = int(m_episode.group('episode'))

2330

else:

2331

series = season_number = episode_number = None

2332

2333

m_cat_container = self._search_regex(

2334

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

2335

video_webpage, 'categories', default=None)

2336

category = None

2337

if m_cat_container:

2338

category = self._html_search_regex(

2339

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

default=None)

if not category:

category = try_get(

microformat, lambda x: x['category'], compat_str)

2344

video_categories = None if category is None else [category]

2345

2346

video_tags = [

2347

unescapeHTML(m.group('content'))

2348

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

2349

if not video_tags:

2350

video_tags = try_get(video_details, lambda x: x['keywords'], list)

2351

2352

def _extract_count(count_name):

2353

return str_to_int(self._search_regex(

2354

(r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),

2355

r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),

2356

video_webpage, count_name, default=None))

2357

2358

like_count = _extract_count('like')

2359

dislike_count = _extract_count('dislike')

2360

2361

if view_count is None:

2362

view_count = str_to_int(self._search_regex(

2363

r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,

2364

'view count', default=None))

2365

2366

average_rating = (

2367

float_or_none(video_details.get('averageRating'))

2368

or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))

2369

2370

# subtitles

2371

video_subtitles = self.extract_subtitles(

2372

video_id, video_webpage, has_live_chat_replay)

2373

automatic_captions = self.extract_automatic_captions(video_id, video_webpage)

2374

2375

video_duration = try_get(

2376

video_info, lambda x: int_or_none(x['length_seconds'][0]))

2377

if not video_duration:

2378

video_duration = int_or_none(video_details.get('lengthSeconds'))

2379

if not video_duration:

2380

video_duration = parse_duration(self._html_search_meta(

2381

'duration', video_webpage, 'video duration'))

2382

2383

# Get Subscriber Count of channel

2384

subscriber_count = parse_count(self._search_regex(

2385

r'"text":"([\d\.]+\w?) subscribers"',

video_webpage,

'subscriber count',

default=None

))

# annotations

video_annotations = None

2393

if self._downloader.params.get('writeannotations', False):

2394

xsrf_token = self._search_regex(

2395

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',

2396

video_webpage, 'xsrf token', group='xsrf_token', fatal=False)

2397

invideo_url = try_get(

2398

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

2399

if xsrf_token and invideo_url:

2400

xsrf_field_name = self._search_regex(

2401

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

2402

video_webpage, 'xsrf field name',

2403

group='xsrf_field_name', default='session_token')

2404

video_annotations = self._download_webpage(

2405

self._proto_relative_url(invideo_url),

2406

video_id, note='Downloading annotations',

2407

errnote='Unable to download video annotations', fatal=False,

2408

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

2409

2410

chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)

2411

2412

# Look for the DASH manifest

2413

if self._downloader.params.get('youtube_include_dash_manifest', True):

2414

dash_mpd_fatal = True

2415

for mpd_url in dash_mpds:

2416

dash_formats = {}

2417

try:

2418

def decrypt_sig(mobj):

2419

s = mobj.group(1)

2420

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

2421

return '/signature/%s' % dec_s

2422

2423

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

2424

2425

for df in self._extract_mpd_formats(

2426

mpd_url, video_id, fatal=dash_mpd_fatal,

2427

formats_dict=self._formats):

2428

if not df.get('filesize'):

2429

df['filesize'] = _extract_filesize(df['url'])

2430

# Do not overwrite DASH format found in some previous DASH manifest

2431

if df['format_id'] not in dash_formats:

2432

dash_formats[df['format_id']] = df

2433

# Additional DASH manifests may end up in HTTP Error 403 therefore

2434

# allow them to fail without bug report message if we already have

2435

# some DASH manifest succeeded. This is temporary workaround to reduce

2436

# burst of bug reports until we figure out the reason and whether it

2437

# can be fixed at all.

2438

dash_mpd_fatal = False

2439

except (ExtractorError, KeyError) as e:

2440

self.report_warning(

2441

'Skipping DASH manifest: %r' % e, video_id)

2442

if dash_formats:

2443

# Remove the formats we found through non-DASH, they

2444

# contain less info and it can be wrong, because we use

2445

# fixed values (for example the resolution). See

2446

# https://github.com/ytdl-org/youtube-dl/issues/5774 for an

2447

# example.

2448

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

2449

formats.extend(dash_formats.values())

2450

2451

# Check for malformed aspect ratio

2452

stretched_m = re.search(

2453

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

2454

video_webpage)

2455

if stretched_m:

2456

w = float(stretched_m.group('w'))

2457

h = float(stretched_m.group('h'))

2458

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

2459

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2464

f['stretched_ratio'] = ratio

2465

2466

if not formats:

2467

if 'reason' in video_info:

2468

if 'The uploader has not made this video available in your country.' in video_info['reason']:

2469

regions_allowed = self._html_search_meta(

2470

'regionsAllowed', video_webpage, default=None)

2471

countries = regions_allowed.split(',') if regions_allowed else None

2472

self.raise_geo_restricted(

2473

msg=video_info['reason'][0], countries=countries)

2474

reason = video_info['reason'][0]

2475

if 'Invalid parameters' in reason:

2476

unavailable_message = extract_unavailable_message()

2477

if unavailable_message:

2478

reason = unavailable_message

2479

raise ExtractorError(

2480

'YouTube said: %s' % reason,

2481

expected=True, video_id=video_id)

2482

if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):

2483

raise ExtractorError('This video is DRM protected.', expected=True)

2484

2485

self._sort_formats(formats)

2486

2487

self.mark_watched(video_id, video_info, player_response)

return {

'id': video_id,

'uploader': video_uploader,

2492

'uploader_id': video_uploader_id,

2493

'uploader_url': video_uploader_url,

2494

'channel_id': channel_id,

2495

'channel_url': channel_url,

2496

'upload_date': upload_date,

2497

'license': video_license,

2498

'creator': video_creator or artist,

2499

'title': video_title,

2500

'alt_title': video_alt_title or track,

2501

'thumbnails': thumbnails,

2502

'description': video_description,

2503

'categories': video_categories,

2504

'tags': video_tags,

2505

'subtitles': video_subtitles,

2506

'automatic_captions': automatic_captions,

2507

'duration': video_duration,

2508

'age_limit': 18 if age_gate else 0,

2509

'annotations': video_annotations,

2510

'chapters': chapters,

2511

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

2512

'view_count': view_count,

2513

'like_count': like_count,

2514

'dislike_count': dislike_count,

2515

'average_rating': average_rating,

2516

'formats': formats,

2517

'is_live': is_live,

2518

'start_time': start_time,

2519

'end_time': end_time,

2520

'series': series,

2521

'season_number': season_number,

2522

'episode_number': episode_number,

'track': track,

'artist': artist,

'album': album,

'release_date': release_date,

2527

'release_year': release_year,

2528

'subscriber_count': subscriber_count,

}

class YoutubeTabIE(YoutubeBaseInfoExtractor):

2533

IE_DESC = 'YouTube.com tab'

2534

_VALID_URL = r'''(?x)

https?://

(?:\w+\.)?

(?:

youtube(?:kids)?\.com|

invidio\.us

)/

(?:

(?:channel|c|user)/|

(?P<not_channel>

(?:playlist|watch)\?.*?\blist=

2545

)|

2546

(?!(%s)([/#?]|$)) # Direct URLs

2547

)

2548

(?P<id>[^/?\#&]+)

2549

''' % YoutubeBaseInfoExtractor._RESERVED_NAMES

2550

IE_NAME = 'youtube:tab'

2551

2552

_TESTS = [{

2553

# playlists, multipage

2554

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

2555

'playlist_mincount': 94,

2556

'info_dict': {

2557

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2558

'title': 'Игорь Клейнер - Playlists',

2559

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2560

},

2561

}, {

2562

# playlists, multipage, different order

2563

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

2564

'playlist_mincount': 94,

2565

'info_dict': {

2566

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2567

'title': 'Игорь Клейнер - Playlists',

2568

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2569

},

2570

}, {

2571

# playlists, singlepage

2572

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

2573

'playlist_mincount': 4,

2574

'info_dict': {

2575

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

2576

'title': 'ThirstForScience - Playlists',

2577

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

2578

}

2579

}, {

2580

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

2581

'only_matching': True,

2582

}, {

2583

# basic, single video playlist

2584

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2585

'info_dict': {

2586

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2587

'uploader': 'Sergey M.',

2588

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2589

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

# empty playlist

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2595

'info_dict': {

2596

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2597

'uploader': 'Sergey M.',

2598

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2599

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

# Home tab

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

2605

'info_dict': {

2606

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2607

'title': 'lex will - Home',

2608

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2609

},

2610

'playlist_mincount': 2,

2611

}, {

2612

# Videos tab

2613

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

2614

'info_dict': {

2615

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2616

'title': 'lex will - Videos',

2617

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2618

},

2619

'playlist_mincount': 975,

2620

}, {

2621

# Videos tab, sorted by popular

2622

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

2623

'info_dict': {

2624

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2625

'title': 'lex will - Videos',

2626

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2627

},

2628

'playlist_mincount': 199,

2629

}, {

2630

# Playlists tab

2631

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

2632

'info_dict': {

2633

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2634

'title': 'lex will - Playlists',

2635

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2636

},

2637

'playlist_mincount': 17,

2638

}, {

2639

# Community tab

2640

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

2641

'info_dict': {

2642

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2643

'title': 'lex will - Community',

2644

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2645

},

2646

'playlist_mincount': 18,

2647

}, {

2648

# Channels tab

2649

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

2650

'info_dict': {

2651

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2652

'title': 'lex will - Channels',

2653

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2654

},

2655

'playlist_mincount': 138,

2656

}, {

2657

'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2658

'only_matching': True,

2659

}, {

2660

'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2661

'only_matching': True,

2662

}, {

2663

'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',

2664

'only_matching': True,

2665

}, {

2666

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

2667

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2668

'info_dict': {

2669

'title': '29C3: Not my department',

2670

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2671

'uploader': 'Christiaan008',

2672

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

2673

},

2674

'playlist_count': 96,

2675

}, {

2676

'note': 'Large playlist',

2677

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

2678

'info_dict': {

2679

'title': 'Uploads from Cauchemar',

2680

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

2681

'uploader': 'Cauchemar',

2682

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

2683

},

2684

'playlist_mincount': 1123,

2685

}, {

2686

# even larger playlist, 8832 videos

2687

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

2688

'only_matching': True,

2689

}, {

2690

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

2691

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

2692

'info_dict': {

2693

'title': 'Uploads from Interstellar Movie',

2694

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

2695

'uploader': 'Interstellar Movie',

2696

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

2697

},

2698

'playlist_mincount': 21,

2699

}, {

2700

# https://github.com/ytdl-org/youtube-dl/issues/21844

2701

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2702

'info_dict': {

2703

'title': 'Data Analysis with Dr Mike Pound',

2704

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2705

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

2706

'uploader': 'Computerphile',

2707

},

2708

'playlist_mincount': 11,

2709

}, {

2710

'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2711

'only_matching': True,

2712

}, {

2713

# Playlist URL that does not actually serve a playlist

2714

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

2719

'uploader': 'STREEM',

2720

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

2721

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

2722

'upload_date': '20150526',

2723

'license': 'Standard YouTube License',

2724

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

2725

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

2730

},

2731

'params': {

2732

'skip_download': True,

2733

},

2734

'skip': 'This video is not available.',

2735

'add_ie': [YoutubeIE.ie_key()],

2736

}, {

2737

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

2738

'only_matching': True,

2739

}, {

2740

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

2741

'only_matching': True,

2742

}, {

2743

'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',

'info_dict': {

'id': '9Auq9mYxFEE',

'ext': 'mp4',

'title': 'Watch Sky News live',

2748

'uploader': 'Sky News',

2749

'uploader_id': 'skynews',

2750

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',

2751

'upload_date': '20191102',

2752

'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',

2753

'categories': ['News & Politics'],

2754

'tags': list,

2755

'like_count': int,

2756

'dislike_count': int,

2757

},

2758

'params': {

2759

'skip_download': True,

2760

},

2761

}, {

2762

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

2767

'uploader': 'The Young Turks',

2768

'uploader_id': 'TheYoungTurks',

2769

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

2770

'upload_date': '20150715',

2771

'license': 'Standard YouTube License',

2772

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

2773

'categories': ['News & Politics'],

2774

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

2775

'like_count': int,

2776

'dislike_count': int,

2777

},

2778

'params': {

2779

'skip_download': True,

2780

},

2781

'only_matching': True,

2782

}, {

2783

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

2784

'only_matching': True,

2785

}, {

2786

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

2787

'only_matching': True,

},

# TODO

# {

# 'url': 'https://www.youtube.com/TheYoungTurks/live',

2792

# 'only_matching': True,

# }

]

def _extract_channel_id(self, webpage):

2797

channel_id = self._html_search_meta(

2798

'channelId', webpage, 'channel id', default=None)

2799

if channel_id:

2800

return channel_id

2801

channel_url = self._html_search_meta(

2802

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

2803

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

2804

'twitter:app:url:googleplay'), webpage, 'channel url')

2805

return self._search_regex(

2806

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

2807

channel_url, 'channel id')

2808

2809

@staticmethod

2810

def _extract_grid_item_renderer(item):

2811

for item_kind in ('Playlist', 'Video', 'Channel'):

2812

renderer = item.get('grid%sRenderer' % item_kind)

if renderer:

return renderer

def _extract_video(self, renderer):

2817

video_id = renderer.get('videoId')

2818

title = try_get(

2819

renderer,

2820

(lambda x: x['title']['runs'][0]['text'],

2821

lambda x: x['title']['simpleText']), compat_str)

2822

description = try_get(

2823

renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],

2824

compat_str)

2825

duration = parse_duration(try_get(

2826

renderer, lambda x: x['lengthText']['simpleText'], compat_str))

2827

view_count_text = try_get(

2828

renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

2829

view_count = str_to_int(self._search_regex(

2830

r'^([\d,]+)', re.sub(r'\s', '', view_count_text),

2831

'view count', default=None))

2832

uploader = try_get(

2833

renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

2834

return {

2835

'_type': 'url_transparent',

2836

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

2841

'duration': duration,

2842

'view_count': view_count,

2843

'uploader': uploader,

2844

}

2845

2846

def _grid_entries(self, grid_renderer):

2847

for item in grid_renderer['items']:

2848

if not isinstance(item, dict):

2849

continue

2850

renderer = self._extract_grid_item_renderer(item)

2851

if not isinstance(renderer, dict):

2852

continue

2853

title = try_get(

2854

renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

2855

# playlist

2856

playlist_id = renderer.get('playlistId')

2857

if playlist_id:

2858

yield self.url_result(

2859

'https://www.youtube.com/playlist?list=%s' % playlist_id,

2860

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

2861

video_title=title)

2862

# video

2863

video_id = renderer.get('videoId')

2864

if video_id:

2865

yield self._extract_video(renderer)

2866

# channel

2867

channel_id = renderer.get('channelId')

2868

if channel_id:

2869

title = try_get(

2870

renderer, lambda x: x['title']['simpleText'], compat_str)

2871

yield self.url_result(

2872

'https://www.youtube.com/channel/%s' % channel_id,

2873

ie=YoutubeTabIE.ie_key(), video_title=title)

2874

2875

def _shelf_entries_trimmed(self, shelf_renderer):

2876

renderer = try_get(

2877

shelf_renderer, lambda x: x['content']['horizontalListRenderer'], dict)

2878

if not renderer:

2879

return

2880

# TODO: add support for nested playlists so each shelf is processed

2881

# as separate playlist

2882

# TODO: this includes only first N items

2883

for entry in self._grid_entries(renderer):

2884

yield entry

2885

2886

def _shelf_entries(self, shelf_renderer):

2887

ep = try_get(

2888

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

2889

compat_str)

2890

shelf_url = urljoin('https://www.youtube.com', ep)

if not shelf_url:

return

title = try_get(

shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

2895

yield self.url_result(shelf_url, video_title=title)

2896

2897

def _playlist_entries(self, video_list_renderer):

2898

for content in video_list_renderer['contents']:

2899

if not isinstance(content, dict):

2900

continue

2901

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

2902

if not isinstance(renderer, dict):

2903

continue

2904

video_id = renderer.get('videoId')

2905

if not video_id:

2906

continue

2907

yield self._extract_video(renderer)

2908

2909

def _itemSection_entries(self, item_sect_renderer):

2910

for content in item_sect_renderer['contents']:

2911

if not isinstance(content, dict):

2912

continue

2913

renderer = content.get('videoRenderer', {})

2914

if not isinstance(renderer, dict):

2915

continue

2916

video_id = renderer.get('videoId')

2917

if not video_id:

2918

continue

2919

yield self._extract_video(renderer)

2920

2921

def _rich_entries(self, rich_grid_renderer):

2922

renderer = try_get(

2923

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}

2924

video_id = renderer.get('videoId')

2925

if not video_id:

2926

return

2927

yield self._extract_video(renderer)

2928

2929

def _video_entry(self, video_renderer):

2930

video_id = video_renderer.get('videoId')

2931

if video_id:

2932

return self._extract_video(video_renderer)

2933

2934

def _post_thread_entries(self, post_thread_renderer):

2935

post_renderer = try_get(

2936

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

2937

if not post_renderer:

2938

return

2939

# video attachment

2940

video_renderer = try_get(

2941

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)

2942

video_id = None

2943

if video_renderer:

2944

entry = self._video_entry(video_renderer)

if entry:

yield entry

# inline video links

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

2949

for run in runs:

2950

if not isinstance(run, dict):

2951

continue

2952

ep_url = try_get(

2953

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

2954

if not ep_url:

2955

continue

2956

if not YoutubeIE.suitable(ep_url):

2957

continue

2958

ep_video_id = YoutubeIE._match_id(ep_url)

2959

if video_id == ep_video_id:

2960

continue

2961

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)

2962

2963

def _post_thread_continuation_entries(self, post_thread_continuation):

2964

contents = post_thread_continuation.get('contents')

2965

if not isinstance(contents, list):

2966

return

2967

for content in contents:

2968

renderer = content.get('backstagePostThreadRenderer')

2969

if not isinstance(renderer, dict):

2970

continue

2971

for entry in self._post_thread_entries(renderer):

yield entry

@staticmethod

def _extract_next_continuation_data(renderer):

2976

next_continuation = try_get(

2977

renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)

2978

if not next_continuation:

2979

return

2980

continuation = next_continuation.get('continuation')

2981

if not continuation:

2982

return

2983

ctp = next_continuation.get('clickTrackingParams')

2984

return {

2985

'ctoken': continuation,

2986

'continuation': continuation,

'itct': ctp,

}

@classmethod

def _extract_continuation(cls, renderer):

2992

next_continuation = cls._extract_next_continuation_data(renderer)

2993

if next_continuation:

2994

return next_continuation

2995

contents = renderer.get('contents')

2996

if not isinstance(contents, list):

2997

return

2998

for content in contents:

2999

if not isinstance(content, dict):

3000

continue

3001

continuation_ep = try_get(

3002

content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],

3003

dict)

3004

if not continuation_ep:

3005

continue

3006

continuation = try_get(

3007

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

3008

if not continuation:

3009

continue

3010

ctp = continuation_ep.get('clickTrackingParams')

if not ctp:

continue

return {

'ctoken': continuation,

3015

'continuation': continuation,

'itct': ctp,

}

def _entries(self, tab, identity_token):

3020

3021

def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds

3022

contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

3023

for content in contents:

3024

if not isinstance(content, dict):

3025

continue

3026

is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)

3027

if not is_renderer:

3028

renderer = content.get('richItemRenderer')

3029

if renderer:

3030

for entry in self._rich_entries(renderer):

3031

yield entry

3032

continuation_list[0] = self._extract_continuation(parent_renderer)

3033

continue

3034

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

3035

for isr_content in isr_contents:

3036

if not isinstance(isr_content, dict):

3037

continue

3038

renderer = isr_content.get('playlistVideoListRenderer')

3039

if renderer:

3040

for entry in self._playlist_entries(renderer):

3041

yield entry

3042

continuation_list[0] = self._extract_continuation(renderer)

3043

continue

3044

renderer = isr_content.get('gridRenderer')

3045

if renderer:

3046

for entry in self._grid_entries(renderer):

3047

yield entry

3048

continuation_list[0] = self._extract_continuation(renderer)

3049

continue

3050

renderer = isr_content.get('shelfRenderer')

3051

if renderer:

3052

for entry in self._shelf_entries(renderer):

3053

yield entry

3054

continue

3055

renderer = isr_content.get('backstagePostThreadRenderer')

3056

if renderer:

3057

for entry in self._post_thread_entries(renderer):

3058

yield entry

3059

continuation_list[0] = self._extract_continuation(renderer)

3060

continue

3061

renderer = isr_content.get('videoRenderer')

3062

if renderer:

3063

entry = self._video_entry(renderer)

if entry:

yield entry

if not continuation_list[0]:

3068

continuation_list[0] = self._extract_continuation(is_renderer)

3069

3070

if not continuation_list[0]:

3071

continuation_list[0] = self._extract_continuation(parent_renderer)

3072

3073

continuation_list = [None] # Python 2 doesnot support nonlocal

3074

parent_renderer = (

3075

try_get(tab, lambda x: x['sectionListRenderer'], dict)

3076

or try_get(tab, lambda x: x['richGridRenderer'], dict) or {})

3077

for entry in extract_entries(parent_renderer):

3078

yield entry

3079

continuation = continuation_list[0]

3080

3081

headers = {

3082

'x-youtube-client-name': '1',

3083

'x-youtube-client-version': '2.20201112.04.01',

3084

}

3085

if identity_token:

3086

headers['x-youtube-identity-token'] = identity_token

3087

3088

for page_num in itertools.count(1):

3089

if not continuation:

3090

break

3091

browse = self._download_json(

3092

'https://www.youtube.com/browse_ajax', None,

3093

'Downloading page %d' % page_num,

3094

headers=headers, query=continuation, fatal=False)

3095

if not browse:

3096

break

3097

response = try_get(browse, lambda x: x[1]['response'], dict)

if not response:

break

continuation_contents = try_get(

3102

response, lambda x: x['continuationContents'], dict)

3103

if continuation_contents:

3104

continuation_renderer = continuation_contents.get('playlistVideoListContinuation')

3105

if continuation_renderer:

3106

for entry in self._playlist_entries(continuation_renderer):

3107

yield entry

3108

continuation = self._extract_continuation(continuation_renderer)

3109

continue

3110

continuation_renderer = continuation_contents.get('gridContinuation')

3111

if continuation_renderer:

3112

for entry in self._grid_entries(continuation_renderer):

3113

yield entry

3114

continuation = self._extract_continuation(continuation_renderer)

3115

continue

3116

continuation_renderer = continuation_contents.get('itemSectionContinuation')

3117

if continuation_renderer:

3118

for entry in self._post_thread_continuation_entries(continuation_renderer):

3119

yield entry

3120

continuation = self._extract_continuation(continuation_renderer)

3121

continue

3122

continuation_renderer = continuation_contents.get('sectionListContinuation') # for feeds

3123

if continuation_renderer:

3124

continuation_list = [None]

3125

for entry in extract_entries(continuation_renderer):

3126

yield entry

3127

continuation = continuation_list[0]

3128

continue

3129

3130

continuation_items = try_get(

3131

response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)

3132

if continuation_items:

3133

continuation_item = continuation_items[0]

3134

if not isinstance(continuation_item, dict):

3135

continue

3136

renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')

3137

if renderer:

3138

video_list_renderer = {'contents': continuation_items}

3139

for entry in self._playlist_entries(video_list_renderer):

3140

yield entry

3141

continuation = self._extract_continuation(video_list_renderer)

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3147

for tab in tabs:

3148

if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):

3149

return tab['tabRenderer']

3150

else:

3151

raise ExtractorError('Unable to find selected tab')

3152

3153

@staticmethod

3154

def _extract_uploader(data):

3155

uploader = {}

3156

sidebar_renderer = try_get(

3157

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)

3158

if sidebar_renderer:

3159

for item in sidebar_renderer:

3160

if not isinstance(item, dict):

3161

continue

3162

renderer = item.get('playlistSidebarSecondaryInfoRenderer')

3163

if not isinstance(renderer, dict):

3164

continue

3165

owner = try_get(

3166

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3167

if owner:

3168

uploader['uploader'] = owner.get('text')

3169

uploader['uploader_id'] = try_get(

3170

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3171

uploader['uploader_url'] = urljoin(

3172

'https://www.youtube.com/',

3173

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3174

return uploader

3175

3176

def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):

3177

selected_tab = self._extract_selected_tab(tabs)

3178

renderer = try_get(

3179

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3180

playlist_id = title = description = None

3181

if renderer:

3182

channel_title = renderer.get('title') or item_id

3183

tab_title = selected_tab.get('title')

3184

title = channel_title or item_id

3185

if tab_title:

3186

title += ' - %s' % tab_title

3187

description = renderer.get('description')

3188

playlist_id = renderer.get('externalId')

3189

renderer = try_get(

3190

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3191

if renderer:

3192

title = renderer.get('title')

3193

description = None

3194

playlist_id = item_id

3195

if playlist_id is None:

3196

playlist_id = item_id

3197

if title is None:

3198

title = "Youtube " + playlist_id.title()

3199

playlist = self.playlist_result(

3200

self._entries(selected_tab['content'], identity_token),

3201

playlist_id=playlist_id, playlist_title=title,

3202

playlist_description=description)

3203

playlist.update(self._extract_uploader(data))

3204

return playlist

3205

3206

def _extract_from_playlist(self, item_id, data, playlist):

3207

title = playlist.get('title') or try_get(

3208

data, lambda x: x['titleText']['simpleText'], compat_str)

3209

playlist_id = playlist.get('playlistId') or item_id

3210

return self.playlist_result(

3211

self._playlist_entries(playlist), playlist_id=playlist_id,

3212

playlist_title=title)

3213

3214

def _real_extract(self, url):

3215

item_id = self._match_id(url)

3216

url = compat_urlparse.urlunparse(

3217

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

3218

is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)

3219

if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':

3220

self._downloader.report_warning(

3221

'A channel/user page was given. All the channel\'s videos will be downloaded. '

3222

'To download only the videos in the home page, add a "/home" to the URL')

3223

url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')

3224

3225

# Handle both video/playlist URLs

3226

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3227

video_id = qs.get('v', [None])[0]

3228

playlist_id = qs.get('list', [None])[0]

3229

if video_id and playlist_id:

3230

if self._downloader.params.get('noplaylist'):

3231

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

3232

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3233

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

3234

webpage = self._download_webpage(url, item_id)

3235

identity_token = self._search_regex(

3236

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

3237

'identity token', default=None)

3238

data = self._extract_yt_initial_data(item_id, webpage)

3239

tabs = try_get(

3240

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

3241

if tabs:

3242

return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)

3243

playlist = try_get(

3244

data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

3245

if playlist:

3246

return self._extract_from_playlist(item_id, data, playlist)

3247

# Fallback to video extraction if no playlist alike page is recognized.

3248

# First check for the current video then try the v attribute of URL query.

3249

video_id = try_get(

3250

data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],

3251

compat_str) or video_id

3252

if video_id:

3253

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3254

# Failed to recognize

3255

raise ExtractorError('Unable to recognize tab page')

3256

3257

3258

class YoutubePlaylistIE(InfoExtractor):

3259

IE_DESC = 'YouTube.com playlists'

3260

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us|

youtu\.be

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

3272

)''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

3273

IE_NAME = 'youtube:playlist'

3274

_TESTS = [{

3275

'note': 'issue #673',

3276

'url': 'PLBB231211A4F62143',

3277

'info_dict': {

3278

'title': '[OLD]Team Fortress 2 (Class-based LP)',

3279

'id': 'PLBB231211A4F62143',

3280

'uploader': 'Wickydoo',

3281

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

3282

},

3283

'playlist_mincount': 29,

3284

}, {

3285

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3286

'info_dict': {

3287

'title': 'YDL_safe_search',

3288

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3289

},

3290

'playlist_count': 2,

3291

'skip': 'This playlist is private',

3292

}, {

3293

'note': 'embedded',

3294

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

3299

'uploader': 'milan',

3300

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

3301

}

3302

}, {

3303

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3304

'playlist_mincount': 982,

3305

'info_dict': {

3306

'title': '2018 Chinese New Singles (11/6 updated)',

3307

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3308

'uploader': 'LBK',

3309

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

3310

}

3311

}, {

3312

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

3317

'uploader': 'Backus-Page House Museum',

3318

'uploader_id': 'backuspagemuseum',

3319

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

3320

'upload_date': '20161008',

3321

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

3322

'categories': ['Nonprofits & Activism'],

3323

'tags': list,

3324

'like_count': int,

3325

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

3330

},

3331

}, {

3332

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

3333

'only_matching': True,

3334

}, {

3335

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

3336

'only_matching': True,

3337

}, {

3338

# music album playlist

3339

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

3340

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3345

return False if YoutubeTabIE.suitable(url) else super(

3346

YoutubePlaylistIE, cls).suitable(url)

3347

3348

def _real_extract(self, url):

3349

playlist_id = self._match_id(url)

3350

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3351

if not qs:

3352

qs = {'list': playlist_id}

3353

return self.url_result(

3354

update_url_query('https://www.youtube.com/playlist', qs),

3355

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3356

3357

3358

class YoutubeYtUserIE(InfoExtractor):

3359

_VALID_URL = r'ytuser:(?P<id>.+)'

3360

_TESTS = [{

3361

'url': 'ytuser:phihag',

3362

'only_matching': True,

3363

}]

3364

3365

def _real_extract(self, url):

3366

user_id = self._match_id(url)

3367

return self.url_result(

3368

'https://www.youtube.com/user/%s' % user_id,

3369

ie=YoutubeTabIE.ie_key(), video_id=user_id)

3370

3371

3372

class YoutubeFavouritesIE(InfoExtractor):

3373

IE_NAME = 'youtube:favorites'

3374

IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'

3375

_VALID_URL = r':ytfav(?:ou?rite)?s?'

3376

_LOGIN_REQUIRED = True

3377

_TESTS = [{

3378

'url': ':ytfav',

3379

'only_matching': True,

3380

}, {

3381

'url': ':ytfavorites',

3382

'only_matching': True,

3383

}]

3384

3385

def _real_extract(self, url):

3386

return self.url_result(

3387

'https://www.youtube.com/playlist?list=LL',

3388

ie=YoutubeTabIE.ie_key())

3389

3390

3391

class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):

3392

IE_DESC = 'YouTube.com searches'

3393

# there doesn't appear to be a real limit, for example if you search for

3394

# 'python' you get more than 8.000.000 results

3395

_MAX_RESULTS = float('inf')

3396

IE_NAME = 'youtube:search'

3397

_SEARCH_KEY = 'ytsearch'

3398

_SEARCH_PARAMS = None

3399

_TESTS = []

3400

3401

def _entries(self, query, n):

data = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

'query': query,

}

if self._SEARCH_PARAMS:

3412

data['params'] = self._SEARCH_PARAMS

3413

total = 0

3414

for page_num in itertools.count(1):

3415

search = self._download_json(

3416

'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

3417

video_id='query "%s"' % query,

3418

note='Downloading page %s' % page_num,

3419

errnote='Unable to download API page', fatal=False,

3420

data=json.dumps(data).encode('utf8'),

3421

headers={'content-type': 'application/json'})

3422

if not search:

3423

break

3424

slr_contents = try_get(

3425

search,

3426

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

3427

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

list)

if not slr_contents:

break

isr_contents = try_get(

3432

slr_contents,

3433

lambda x: x[0]['itemSectionRenderer']['contents'],

list)

if not isr_contents:

break

for content in isr_contents:

3438

if not isinstance(content, dict):

3439

continue

3440

video = content.get('videoRenderer')

3441

if not isinstance(video, dict):

3442

continue

3443

video_id = video.get('videoId')

3444

if not video_id:

3445

continue

3446

title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)

3447

description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)

3448

duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))

3449

view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

3450

view_count = int_or_none(self._search_regex(

3451

r'^(\d+)', re.sub(r'\s', '', view_count_text),

3452

'view count', default=None))

3453

uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

3454

total += 1

3455

yield {

3456

'_type': 'url_transparent',

3457

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

3462

'duration': duration,

3463

'view_count': view_count,

3464

'uploader': uploader,

}

if total == n:

return

token = try_get(

slr_contents,

lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],

compat_str)

if not token:

break

data['continuation'] = token

3475

3476

def _get_n_results(self, query, n):

3477

"""Get a specified number of results for a query"""

3478

return self.playlist_result(self._entries(query, n), query)

3479

3480

3481

class YoutubeSearchDateIE(YoutubeSearchIE):

3482

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

3483

_SEARCH_KEY = 'ytsearchdate'

3484

IE_DESC = 'YouTube.com searches, newest videos first'

3485

_SEARCH_PARAMS = 'CAI%3D'

3486

3487

3488

class YoutubeSearchURLIE(YoutubeSearchIE):

3489

IE_DESC = 'YouTube.com search URLs'

3490

IE_NAME = YoutubeSearchIE.IE_NAME + '_url'

3491

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'

3492

# _MAX_RESULTS = 100

3493

_TESTS = [{

3494

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

3495

'playlist_mincount': 5,

3496

'info_dict': {

3497

'title': 'youtube-dl test video',

3498

}

3499

}, {

3500

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

3501

'only_matching': True,

}]

@classmethod

def _make_valid_url(cls):

3506

return cls._VALID_URL

3507

3508

def _real_extract(self, url):

3509

qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

3510

query = (qs.get('search_query') or qs.get('q'))[0]

3511

self._SEARCH_PARAMS = qs.get('sp', ('',))[0]

3512

return self._get_n_results(query, self._MAX_RESULTS)

3513

3514

3515

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

3516

"""

3517

Base class for feed extractors

3518

Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.

3519

"""

3520

_LOGIN_REQUIRED = True

# _MAX_PAGES = 5

_TESTS = []

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

3527

3528

def _real_initialize(self):

3529

self._login()

3530

3531

def _shelf_entries(self, shelf_renderer):

3532

renderer = try_get(shelf_renderer, lambda x: x['content']['gridRenderer'], dict)

3533

if not renderer:

3534

return

3535

for entry in self._grid_entries(renderer):

3536

yield entry

3537

3538

def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):

3539

selected_tab = self._extract_selected_tab(tabs)

3540

return self.playlist_result(

3541

self._entries(selected_tab['content'], identity_token),

3542

playlist_title=self._PLAYLIST_TITLE)

3543

3544

def _real_extract(self, url):

3545

item_id = self._FEED_NAME

3546

url = 'https://www.youtube.com/feed/%s' % self._FEED_NAME

3547

webpage = self._download_webpage(url, item_id)

3548

identity_token = self._search_regex(

3549

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

3550

'identity token', default=None)

3551

data = self._extract_yt_initial_data(item_id, webpage)

3552

tabs = try_get(

3553

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

3554

if tabs:

3555

return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)

3556

# Failed to recognize

3557

raise ExtractorError('Unable to recognize feed page')

3558

3559

3560

class YoutubeWatchLaterIE(InfoExtractor):

3561

IE_NAME = 'youtube:watchlater'

3562

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater|WL'

3563

3564

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

3565

_TESTS = [{

3566

'url': 'https://www.youtube.com/feed/watch_later',

3567

'only_matching': True,

3568

}, {

3569

'url': ':ytwatchlater',

3570

'only_matching': True,

3571

}]

3572

3573

def _real_extract(self, url):

3574

return self.url_result(

3575

'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())

3576

3577

3578

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

3579

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

3580

_VALID_URL = r'https?://(?:www\.)?youtube\.com(?:/feed/recommended|/?[?#]|/?$)|:ytrec(?:ommended)?'

3581

_FEED_NAME = 'recommended'

3582

_PLAYLIST_TITLE = 'Youtube Recommended videos'

3583

3584

3585

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

3586

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsub(?:scription)?s?'

3587

IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'

3588

_FEED_NAME = 'subscriptions'

3589

_PLAYLIST_TITLE = 'Youtube Subscriptions'

3590

3591

3592

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

3593

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

3594

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'

3595

_FEED_NAME = 'history'

3596

_PLAYLIST_TITLE = 'Youtube History'

3597

3598

3599

class YoutubeTruncatedURLIE(InfoExtractor):

3600

IE_NAME = 'youtube:truncated_url'

3601

IE_DESC = False # Do not list

3602

_VALID_URL = r'''(?x)

3603

(?:https?://)?

3604

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

3605

(?:watch\?(?:

3606

feature=[a-z_]+|

3607

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

3620

'only_matching': True,

3621

}, {

3622

'url': 'https://www.youtube.com/watch?',

3623

'only_matching': True,

3624

}, {

3625

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

3626

'only_matching': True,

3627

}, {

3628

'url': 'https://www.youtube.com/watch?feature=foo',

3629

'only_matching': True,

3630

}, {

3631

'url': 'https://www.youtube.com/watch?hl=en-GB',

3632

'only_matching': True,

3633

}, {

3634

'url': 'https://www.youtube.com/watch?t=2372',

3635

'only_matching': True,

3636

}]

3637

3638

def _real_extract(self, url):

3639

raise ExtractorError(

3640

'Did you forget to quote the URL? Remember that & is a meta '

3641

'character in most shells, so you want to put the URL in quotes, '

3642

'like youtube-dl '

3643

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

3644

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

3649

IE_NAME = 'youtube:truncated_id'

3650

IE_DESC = False # Do not list

3651

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

3652

3653

_TESTS = [{

3654

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

3655

'only_matching': True,

3656

}]

3657

3658

def _real_extract(self, url):

3659

video_id = self._match_id(url)

3660

raise ExtractorError(

3661

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

expected=True)

# Do Youtube show urls even exist anymore? I couldn't find any

3666

r'''

3667

class YoutubeShowIE(YoutubeTabIE):

3668

IE_DESC = 'YouTube.com (multi-season) shows'

3669

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

3670

IE_NAME = 'youtube:show'

3671

_TESTS = [{

3672

'url': 'https://www.youtube.com/show/airdisasters',

3673

'playlist_mincount': 5,

3674

'info_dict': {

3675

'id': 'airdisasters',

3676

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

3681

playlist_id = self._match_id(url)

3682

return super(YoutubeShowIE, self)._real_extract(

3683

'https://www.youtube.com/show/%s/playlists' % playlist_id)

3684

'''