jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_kwargs,
	20	compat_parse_qs,
	21	compat_urllib_parse_unquote,
	22	compat_urllib_parse_unquote_plus,
	23	compat_urllib_parse_urlencode,
	24	compat_urllib_parse_urlparse,
	25	compat_urlparse,
	26	compat_str,
	27	)
	28	from ..utils import (
	29	bool_or_none,
	30	clean_html,
	31	error_to_compat_str,
	32	ExtractorError,
	33	float_or_none,
	34	get_element_by_id,
	35	int_or_none,
	36	mimetype2ext,
	37	parse_codecs,
	38	parse_count,
	39	parse_duration,
	40	remove_quotes,
	41	remove_start,
	42	smuggle_url,
	43	str_or_none,
	44	str_to_int,
	45	try_get,
	46	unescapeHTML,
	47	unified_strdate,
	48	unsmuggle_url,
	49	update_url_query,
	50	uppercase_escape,
	51	url_or_none,
	52	urlencode_postdata,
	53	urljoin,
	54	)
	55
	56
	57	class YoutubeBaseInfoExtractor(InfoExtractor):
	58	"""Provide base functions for Youtube extractors"""
	59	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	60	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	61
	62	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	63	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	64	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	65
	66	_RESERVED_NAMES = (
	67	r'course\|embed\|watch\|w\|results\|storefront\|'
	68	r'shared\|index\|account\|reporthistory\|t/terms\|about\|upload\|signin\|logout\|'
	69	r'feed/(watch_later\|history\|subscriptions\|library\|trending\|recommended)')
	70
	71	_NETRC_MACHINE = 'youtube'
	72	# If True it will raise an error if no login info is provided
	73	_LOGIN_REQUIRED = False
	74
	75	_PLAYLIST_ID_RE = r'(?:(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}\|RDMM\|LL\|WL)'
	76
	77	_YOUTUBE_CLIENT_HEADERS = {
	78	'x-youtube-client-name': '1',
	79	'x-youtube-client-version': '1.20200609.04.02',
	80	}
	81
	82	def _set_language(self):
	83	self._set_cookie(
	84	'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
	85	# YouTube sets the expire time to about two months
	86	expire_time=time.time() + 2 * 30 * 24 * 3600)
	87
	88	def _ids_to_results(self, ids):
	89	return [
	90	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	91	for vid_id in ids]
	92
	93	def _login(self):
	94	"""
	95	Attempt to log in to YouTube.
	96	True is returned if successful or skipped.
	97	False is returned if login failed.
	98
	99	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	100	"""
	101	username, password = self._get_login_info()
	102	# No authentication to be performed
	103	if username is None:
	104	if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
	105	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	106	if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
	107	self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
	108	return True
	109
	110	login_page = self._download_webpage(
	111	self._LOGIN_URL, None,
	112	note='Downloading login page',
	113	errnote='unable to fetch login page', fatal=False)
	114	if login_page is False:
	115	return
	116
	117	login_form = self._hidden_inputs(login_page)
	118
	119	def req(url, f_req, note, errnote):
	120	data = login_form.copy()
	121	data.update({
	122	'pstMsg': 1,
	123	'checkConnection': 'youtube',
	124	'checkedDomains': 'youtube',
	125	'hl': 'en',
	126	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	127	'f.req': json.dumps(f_req),
	128	'flowName': 'GlifWebSignIn',
	129	'flowEntry': 'ServiceLogin',
	130	# TODO: reverse actual botguard identifier generation algo
	131	'bgRequest': '["identifier",""]',
	132	})
	133	return self._download_json(
	134	url, None, note=note, errnote=errnote,
	135	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	136	fatal=False,
	137	data=urlencode_postdata(data), headers={
	138	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	139	'Google-Accounts-XSRF': 1,
	140	})
	141
	142	def warn(message):
	143	self._downloader.report_warning(message)
	144
	145	lookup_req = [
	146	username,
	147	None, [], None, 'US', None, None, 2, False, True,
	148	[
	149	None, None,
	150	[2, 1, None, 1,
	151	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	152	None, [], 4],
	153	1, [None, None, []], None, None, None, True
	154	],
	155	username,
	156	]
	157
	158	lookup_results = req(
	159	self._LOOKUP_URL, lookup_req,
	160	'Looking up account info', 'Unable to look up account info')
	161
	162	if lookup_results is False:
	163	return False
	164
	165	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	166	if not user_hash:
	167	warn('Unable to extract user hash')
	168	return False
	169
	170	challenge_req = [
	171	user_hash,
	172	None, 1, None, [1, None, None, None, [password, None, True]],
	173	[
	174	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	175	1, [None, None, []], None, None, None, True
	176	]]
	177
	178	challenge_results = req(
	179	self._CHALLENGE_URL, challenge_req,
	180	'Logging in', 'Unable to log in')
	181
	182	if challenge_results is False:
	183	return
	184
	185	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	186	if login_res:
	187	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	188	warn(
	189	'Unable to login: %s' % 'Invalid password'
	190	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	191	return False
	192
	193	res = try_get(challenge_results, lambda x: x[0][-1], list)
	194	if not res:
	195	warn('Unable to extract result entry')
	196	return False
	197
	198	login_challenge = try_get(res, lambda x: x[0][0], list)
	199	if login_challenge:
	200	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	201	if challenge_str == 'TWO_STEP_VERIFICATION':
	202	# SEND_SUCCESS - TFA code has been successfully sent to phone
	203	# QUOTA_EXCEEDED - reached the limit of TFA codes
	204	status = try_get(login_challenge, lambda x: x[5], compat_str)
	205	if status == 'QUOTA_EXCEEDED':
	206	warn('Exceeded the limit of TFA codes, try later')
	207	return False
	208
	209	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	210	if not tl:
	211	warn('Unable to extract TL')
	212	return False
	213
	214	tfa_code = self._get_tfa_info('2-step verification code')
	215
	216	if not tfa_code:
	217	warn(
	218	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	219	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	220	return False
	221
	222	tfa_code = remove_start(tfa_code, 'G-')
	223
	224	tfa_req = [
	225	user_hash, None, 2, None,
	226	[
	227	9, None, None, None, None, None, None, None,
	228	[None, tfa_code, True, 2]
	229	]]
	230
	231	tfa_results = req(
	232	self._TFA_URL.format(tl), tfa_req,
	233	'Submitting TFA code', 'Unable to submit TFA code')
	234
	235	if tfa_results is False:
	236	return False
	237
	238	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	239	if tfa_res:
	240	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	241	warn(
	242	'Unable to finish TFA: %s' % 'Invalid TFA code'
	243	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	244	return False
	245
	246	check_cookie_url = try_get(
	247	tfa_results, lambda x: x[0][-1][2], compat_str)
	248	else:
	249	CHALLENGES = {
	250	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	251	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	252	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	253	}
	254	challenge = CHALLENGES.get(
	255	challenge_str,
	256	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	257	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	258	return False
	259	else:
	260	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	261
	262	if not check_cookie_url:
	263	warn('Unable to extract CheckCookie URL')
	264	return False
	265
	266	check_cookie_results = self._download_webpage(
	267	check_cookie_url, None, 'Checking cookie', fatal=False)
	268
	269	if check_cookie_results is False:
	270	return False
	271
	272	if 'https://myaccount.google.com/' not in check_cookie_results:
	273	warn('Unable to log in')
	274	return False
	275
	276	return True
	277
	278	def _download_webpage_handle(self, args, *kwargs):
	279	query = kwargs.get('query', {}).copy()
	280	kwargs['query'] = query
	281	return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
	282	args, *compat_kwargs(kwargs))
	283
	284	def _get_yt_initial_data(self, video_id, webpage):
	285	config = self._search_regex(
	286	(r'window\["ytInitialData"\]\s=\s(.*?)(?<=});',
	287	r'var\s+ytInitialData\s=\s(.*?)(?<=});'),
	288	webpage, 'ytInitialData', default=None)
	289	if config:
	290	return self._parse_json(
	291	uppercase_escape(config), video_id, fatal=False)
	292
	293	def _real_initialize(self):
	294	if self._downloader is None:
	295	return
	296	self._set_language()
	297	if not self._login():
	298	return
	299
	300	_DEFAULT_API_DATA = {
	301	'context': {
	302	'client': {
	303	'clientName': 'WEB',
	304	'clientVersion': '2.20201021.03.00',
	305	}
	306	},
	307	}
	308
	309	def _call_api(self, ep, query, video_id):
	310	data = self._DEFAULT_API_DATA.copy()
	311	data.update(query)
	312
	313	response = self._download_json(
	314	'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
	315	note='Downloading API JSON', errnote='Unable to download API page',
	316	data=json.dumps(data).encode('utf8'),
	317	headers={'content-type': 'application/json'},
	318	query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
	319
	320	return response
	321
	322	def _extract_yt_initial_data(self, video_id, webpage):
	323	return self._parse_json(
	324	self._search_regex(
	325	r'(?:window\s\[\s["\']ytInitialData["\']\s\]\|ytInitialData)\s=\s({.+?})\s;',
	326	webpage, 'yt initial data'),
	327	video_id)
	328
	329
	330	class YoutubeIE(YoutubeBaseInfoExtractor):
	331	IE_DESC = 'YouTube.com'
	332	_VALID_URL = r"""(?x)^
	333	(
	334	(?:https?://\|//) # http(s):// or protocol-independent URL
	335	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie\|kids)?\.com/\|
	336	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	337	(?:www\.)?pwnyoutube\.com/\|
	338	(?:www\.)?hooktube\.com/\|
	339	(?:www\.)?yourepeat\.com/\|
	340	tube\.majestyc\.net/\|
	341	# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
	342	(?:(?:www\|dev)\.)?invidio\.us/\|
	343	(?:(?:www\|no)\.)?invidiou\.sh/\|
	344	(?:(?:www\|fi\|de)\.)?invidious\.snopyta\.org/\|
	345	(?:www\.)?invidious\.kabi\.tk/\|
	346	(?:www\.)?invidious\.13ad\.de/\|
	347	(?:www\.)?invidious\.mastodon\.host/\|
	348	(?:www\.)?invidious\.nixnet\.xyz/\|
	349	(?:www\.)?invidious\.drycat\.fr/\|
	350	(?:www\.)?tube\.poal\.co/\|
	351	(?:www\.)?vid\.wxzm\.sx/\|
	352	(?:www\.)?yewtu\.be/\|
	353	(?:www\.)?yt\.elukerio\.org/\|
	354	(?:www\.)?yt\.lelux\.fi/\|
	355	(?:www\.)?invidious\.ggc-project\.de/\|
	356	(?:www\.)?yt\.maisputain\.ovh/\|
	357	(?:www\.)?invidious\.13ad\.de/\|
	358	(?:www\.)?invidious\.toot\.koeln/\|
	359	(?:www\.)?invidious\.fdn\.fr/\|
	360	(?:www\.)?watch\.nettohikari\.com/\|
	361	(?:www\.)?kgg2m7yk5aybusll\.onion/\|
	362	(?:www\.)?qklhadlycap4cnod\.onion/\|
	363	(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/\|
	364	(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/\|
	365	(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/\|
	366	(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/\|
	367	(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/\|
	368	(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/\|
	369	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	370	(?:.*?\#/)? # handle anchor (#/) redirect urls
	371	(?: # the various things that can precede the ID:
	372	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	373	\|(?: # or the v= param in all its forms
	374	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	375	(?:\?\|\#!?) # the params delimiter ? or # or #!
	376	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	377	v=
	378	)
	379	))
	380	\|(?:
	381	youtu\.be\| # just youtu.be/xxxx
	382	vid\.plus\| # or vid.plus/xxxx
	383	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	384	)/
	385	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	386	)
	387	)? # all until now is optional -> you can pass the naked ID
	388	(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	389	(?!.*?\blist=
	390	(?:
	391	%(playlist_id)s\| # combined list/video URLs are handled by the playlist IE
	392	WL # WL are handled by the watch later IE
	393	)
	394	)
	395	(?(1).+)? # if we found the ID, everything can follow
	396	$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
	397	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	398	_PLAYER_INFO_RE = (
	399	r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
	400	r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
	401	)
	402	_formats = {
	403	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	404	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	405	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	406	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	407	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	408	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	409	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	410	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	411	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	412	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	413	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	414	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	415	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	416	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	417	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	418	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	419	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	420	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	421
	422
	423	# 3D videos
	424	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	425	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	426	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	427	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	428	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	429	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	430	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	431
	432	# Apple HTTP Live Streaming
	433	'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	434	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	435	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	436	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	437	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	438	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	439	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	440	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	441
	442	# DASH mp4 video
	443	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
	444	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
	445	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	446	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
	447	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
	448	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
	449	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
	450	'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	451	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
	452	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	453	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	454	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
	455
	456	# Dash mp4 audio
	457	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
	458	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
	459	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
	460	'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	461	'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
	462	'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
	463	'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
	464
	465	# Dash webm
	466	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	467	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	468	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	469	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	470	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	471	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
	472	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
	473	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	474	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	475	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	476	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	477	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	478	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	479	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	480	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	481	# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
	482	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	483	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	484	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	485	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	486	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
	487	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
	488
	489	# Dash webm audio
	490	'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
	491	'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
	492
	493	# Dash webm audio with opus inside
	494	'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
	495	'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
	496	'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
	497
	498	# RTMP (unnamed)
	499	'_rtmp': {'protocol': 'rtmp'},
	500

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

compat_chr,

compat_kwargs,

compat_parse_qs,

compat_urllib_parse_unquote,

22

compat_urllib_parse_unquote_plus,

23

compat_urllib_parse_urlencode,

24

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

bool_or_none,

clean_html,

error_to_compat_str,

ExtractorError,

float_or_none,

get_element_by_id,

int_or_none,

mimetype2ext,

parse_codecs,

parse_count,

parse_duration,

remove_quotes,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

update_url_query,

uppercase_escape,

url_or_none,

urlencode_postdata,

urljoin,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

58

"""Provide base functions for Youtube extractors"""

59

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

60

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

61

62

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

63

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

64

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

_RESERVED_NAMES = (

_NETRC_MACHINE = 'youtube'

72

# If True it will raise an error if no login info is provided

73

_LOGIN_REQUIRED = False

74

75

_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|LL|WL)'

76

77

_YOUTUBE_CLIENT_HEADERS = {

78

'x-youtube-client-name': '1',

79

'x-youtube-client-version': '1.20200609.04.02',

80

}

81

82

def _set_language(self):

83

self._set_cookie(

84

'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',

85

# YouTube sets the expire time to about two months

86

expire_time=time.time() + 2 * 30 * 24 * 3600)

87

88

def _ids_to_results(self, ids):

89

return [

90

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

96

True is returned if successful or skipped.

97

False is returned if login failed.

98

99

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

100

"""

101

username, password = self._get_login_info()

102

# No authentication to be performed

103

if username is None:

104

if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

105

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

106

if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.

107

self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')

108

return True

109

110

login_page = self._download_webpage(

111

self._LOGIN_URL, None,

112

note='Downloading login page',

113

errnote='unable to fetch login page', fatal=False)

114

if login_page is False:

115

return

116

117

login_form = self._hidden_inputs(login_page)

118

119

def req(url, f_req, note, errnote):

120

data = login_form.copy()

121

data.update({

122

'pstMsg': 1,

123

'checkConnection': 'youtube',

124

'checkedDomains': 'youtube',

125

'hl': 'en',

126

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

127

'f.req': json.dumps(f_req),

128

'flowName': 'GlifWebSignIn',

129

'flowEntry': 'ServiceLogin',

130

# TODO: reverse actual botguard identifier generation algo

131

'bgRequest': '["identifier",""]',

132

})

133

return self._download_json(

134

url, None, note=note, errnote=errnote,

135

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

136

fatal=False,

137

data=urlencode_postdata(data), headers={

138

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

139

'Google-Accounts-XSRF': 1,

})

def warn(message):

self._downloader.report_warning(message)

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

152

None, [], 4],

153

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

159

self._LOOKUP_URL, lookup_req,

160

'Looking up account info', 'Unable to look up account info')

161

162

if lookup_results is False:

163

return False

164

165

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

166

if not user_hash:

167

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

173

[

174

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

175

1, [None, None, []], None, None, None, True

176

]]

177

178

challenge_results = req(

179

self._CHALLENGE_URL, challenge_req,

180

'Logging in', 'Unable to log in')

181

182

if challenge_results is False:

183

return

184

185

login_res = try_get(challenge_results, lambda x: x[0][5], list)

186

if login_res:

187

login_msg = try_get(login_res, lambda x: x[5], compat_str)

188

warn(

189

'Unable to login: %s' % 'Invalid password'

190

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

191

return False

192

193

res = try_get(challenge_results, lambda x: x[0][-1], list)

194

if not res:

195

warn('Unable to extract result entry')

196

return False

197

198

login_challenge = try_get(res, lambda x: x[0][0], list)

199

if login_challenge:

200

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

201

if challenge_str == 'TWO_STEP_VERIFICATION':

202

# SEND_SUCCESS - TFA code has been successfully sent to phone

203

# QUOTA_EXCEEDED - reached the limit of TFA codes

204

status = try_get(login_challenge, lambda x: x[5], compat_str)

205

if status == 'QUOTA_EXCEEDED':

206

warn('Exceeded the limit of TFA codes, try later')

207

return False

208

209

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

210

if not tl:

211

warn('Unable to extract TL')

212

return False

213

214

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

219

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

220

return False

221

222

tfa_code = remove_start(tfa_code, 'G-')

223

224

tfa_req = [

225

user_hash, None, 2, None,

226

[

227

9, None, None, None, None, None, None, None,

228

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

233

'Submitting TFA code', 'Unable to submit TFA code')

234

235

if tfa_results is False:

236

return False

237

238

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

239

if tfa_res:

240

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

241

warn(

242

'Unable to finish TFA: %s' % 'Invalid TFA code'

243

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

244

return False

245

246

check_cookie_url = try_get(

247

tfa_results, lambda x: x[0][-1][2], compat_str)

248

else:

249

CHALLENGES = {

250

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

251

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

252

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

253

}

254

challenge = CHALLENGES.get(

255

challenge_str,

256

'%s returned error %s.' % (self.IE_NAME, challenge_str))

257

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

258

return False

259

else:

260

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

261

262

if not check_cookie_url:

263

warn('Unable to extract CheckCookie URL')

264

return False

265

266

check_cookie_results = self._download_webpage(

267

check_cookie_url, None, 'Checking cookie', fatal=False)

268

269

if check_cookie_results is False:

270

return False

271

272

if 'https://myaccount.google.com/' not in check_cookie_results:

273

warn('Unable to log in')

return False

return True

def _download_webpage_handle(self, *args, **kwargs):

279

query = kwargs.get('query', {}).copy()

280

kwargs['query'] = query

281

return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(

282

*args, **compat_kwargs(kwargs))

283

284

def _get_yt_initial_data(self, video_id, webpage):

285

config = self._search_regex(

286

(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',

287

r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),

288

webpage, 'ytInitialData', default=None)

289

if config:

290

return self._parse_json(

291

uppercase_escape(config), video_id, fatal=False)

292

293

def _real_initialize(self):

294

if self._downloader is None:

295

return

296

self._set_language()

297

if not self._login():

298

return

299

300

_DEFAULT_API_DATA = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

}

def _call_api(self, ep, query, video_id):

310

data = self._DEFAULT_API_DATA.copy()

311

data.update(query)

312

313

response = self._download_json(

314

'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,

315

note='Downloading API JSON', errnote='Unable to download API page',

316

data=json.dumps(data).encode('utf8'),

317

headers={'content-type': 'application/json'},

318

query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})

return response

def _extract_yt_initial_data(self, video_id, webpage):

323

return self._parse_json(

324

self._search_regex(

325

r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;',

326

webpage, 'yt initial data'),

video_id)

class YoutubeIE(YoutubeBaseInfoExtractor):

331

IE_DESC = 'YouTube.com'

332

_VALID_URL = r"""(?x)^

333

(

334

(?:https?://|//) # http(s):// or protocol-independent URL

335

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|

336

(?:www\.)?deturl\.com/www\.youtube\.com/|

337

(?:www\.)?pwnyoutube\.com/|

338

(?:www\.)?hooktube\.com/|

339

(?:www\.)?yourepeat\.com/|

340

tube\.majestyc\.net/|

341

# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances

342

(?:(?:www|dev)\.)?invidio\.us/|

343

(?:(?:www|no)\.)?invidiou\.sh/|

344

(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|

345

(?:www\.)?invidious\.kabi\.tk/|

346

(?:www\.)?invidious\.13ad\.de/|

347

(?:www\.)?invidious\.mastodon\.host/|

348

(?:www\.)?invidious\.nixnet\.xyz/|

349

(?:www\.)?invidious\.drycat\.fr/|

350

(?:www\.)?tube\.poal\.co/|

351

(?:www\.)?vid\.wxzm\.sx/|

352

(?:www\.)?yewtu\.be/|

353

(?:www\.)?yt\.elukerio\.org/|

354

(?:www\.)?yt\.lelux\.fi/|

355

(?:www\.)?invidious\.ggc-project\.de/|

356

(?:www\.)?yt\.maisputain\.ovh/|

357

(?:www\.)?invidious\.13ad\.de/|

358

(?:www\.)?invidious\.toot\.koeln/|

359

(?:www\.)?invidious\.fdn\.fr/|

360

(?:www\.)?watch\.nettohikari\.com/|

361

(?:www\.)?kgg2m7yk5aybusll\.onion/|

362

(?:www\.)?qklhadlycap4cnod\.onion/|

363

(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|

364

(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|

365

(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|

366

(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|

367

(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|

368

(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|

369

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

370

(?:.*?\#/)? # handle anchor (#/) redirect urls

371

(?: # the various things that can precede the ID:

372

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

373

|(?: # or the v= param in all its forms

374

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

375

(?:\?|\#!?) # the params delimiter ? or # or #!

376

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

382

vid\.plus| # or vid.plus/xxxx

383

zwearz\.com/watch| # or zwearz.com/watch/xxxx

384

)/

385

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

386

)

387

)? # all until now is optional -> you can pass the naked ID

388

(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

389

(?!.*?\blist=

390

(?:

391

%(playlist_id)s| # combined list/video URLs are handled by the playlist IE

392

WL # WL are handled by the watch later IE

393

)

394

)

395

(?(1).+)? # if we found the ID, everything can follow

396

$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

397

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

398

_PLAYER_INFO_RE = (

399

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',

400

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',

401

)

402

_formats = {

403

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

404

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

405

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

406

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

407

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

408

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

409

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

410

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

411

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

412

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

413

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

414

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

415

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

416

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

417

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

418

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

419

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

420

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

425

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

426

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

427

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

428

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

429

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

430

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

431

432

# Apple HTTP Live Streaming

433

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

434

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

435

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

436

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

437

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

438

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

439

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

440

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

441

442

# DASH mp4 video

443

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

444

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

445

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

446

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

447

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

448

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

449

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

450

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

451

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

452

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

453

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

454

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

455

456

# Dash mp4 audio

457

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

458

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

459

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

460

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

461

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

462

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

463

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

464

465

# Dash webm

466

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

467

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

468

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

469

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

470

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

471

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

472

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

473

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

474

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

475

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

476

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

477

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

478

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

479

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

480

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

481

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

482

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

483

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

484

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

485

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

486

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

487

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

488

489

# Dash webm audio

490

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

491

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

492

493

# Dash webm audio with opus inside

494

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

495

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

496

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

497

498

# RTMP (unnamed)

499

'_rtmp': {'protocol': 'rtmp'},

500

501

# av01 video only formats sometimes served with "unknown" codecs

502

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

503

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

504

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

505

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

506

}

507

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

519

'uploader': 'Philipp Hagemeister',

520

'uploader_id': 'phihag',

521

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

522

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

523

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

524

'upload_date': '20121002',

525

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

526

'categories': ['Science & Technology'],

527

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

538

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

543

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

544

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

545

'uploader': 'SET India',

546

'uploader_id': 'setindia',

547

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',

553

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

558

'uploader': 'Philipp Hagemeister',

559

'uploader_id': 'phihag',

560

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

561

'upload_date': '20121002',

562

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

563

'categories': ['Science & Technology'],

564

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

569

},

570

'params': {

571

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

576

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

581

'uploader_id': '8KVIDEO',

582

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

583

'description': '',

584

'uploader': '8KVIDEO',

585

'title': 'UHDTV TEST 8K VIDEO.mp4'

586

},

587

'params': {

588

'youtube_include_dash_manifest': True,

589

'format': '141',

590

},

591

'skip': 'format 141 not served anymore',

592

},

593

# DASH manifest with encrypted signature

594

{

595

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

600

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

601

'duration': 244,

602

'uploader': 'AfrojackVEVO',

603

'uploader_id': 'AfrojackVEVO',

604

'upload_date': '20131011',

605

},

606

'params': {

607

'youtube_include_dash_manifest': True,

608

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

619

'uploader': 'Amazing Atheist',

620

'uploader_id': 'TheAmazingAtheist',

621

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

622

'title': 'Burning Everyone\'s Koran',

623

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

624

}

625

},

626

# Normal age-gate video (embed allowed)

627

{

628

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

633

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

634

'duration': 142,

635

'uploader': 'The Witcher',

636

'uploader_id': 'WitcherGame',

637

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

638

'upload_date': '20140605',

'age_limit': 18,

},

},

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

643

# YouTube Red ad is not captured for creator

644

{

645

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

651

'uploader_id': 'deadmau5',

652

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

653

'creator': 'Dada Life, deadmau5',

654

'description': 'md5:12c56784b8032162bb936a5f76d55360',

655

'uploader': 'deadmau5',

656

'title': 'Deadmau5 - Some Chords (HD)',

657

'alt_title': 'This Machine Kills Some Chords',

658

},

659

'expected_warnings': [

660

'DASH manifest missing',

661

]

662

},

663

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

664

{

665

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

671

'uploader_id': 'olympic',

672

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

673

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

674

'uploader': 'Olympic',

675

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

676

},

677

'params': {

678

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

688

'duration': 85,

689

'upload_date': '20110310',

690

'uploader_id': 'AllenMeow',

691

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

692

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

693

'uploader': '孫ᄋᄅ',

694

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

695

},

696

},

697

# url_encoded_fmt_stream_map is empty string

698

{

699

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

704

'description': '',

705

'upload_date': '20150404',

706

'uploader_id': 'spbelect',

707

'uploader': 'Наблюдатели Петербурга',

708

},

709

'params': {

710

'skip_download': 'requires avconv',

711

},

712

'skip': 'This live event has ended.',

713

},

714

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

715

{

716

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

721

'description': 'md5:116377fd2963b81ec4ce64b542173306',

722

'duration': 220,

723

'upload_date': '20150625',

724

'uploader_id': 'dorappi2000',

725

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

726

'uploader': 'dorappi2000',

727

'formats': 'mincount:31',

728

},

729

'skip': 'not actual anymore',

730

},

731

# DASH manifest with segment_list

732

{

733

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

734

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

739

'uploader': 'Airtek',

740

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

741

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

742

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

743

},

744

'params': {

745

'youtube_include_dash_manifest': True,

746

'format': '135', # bestvideo

747

},

748

'skip': 'This live event has ended.',

749

},

750

{

751

# Multifeed videos (multiple cameras), URL is for Main Camera

752

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

753

'info_dict': {

754

'id': 'jqWvoWXjCVs',

755

'title': 'teamPGP: Rocket League Noob Stream',

756

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

763

'description': 'md5:dc7872fb300e143831327f1bae3af010',

764

'duration': 7335,

765

'upload_date': '20150721',

766

'uploader': 'Beer Games Beer',

767

'uploader_id': 'beergamesbeer',

768

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

769

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

776

'description': 'md5:dc7872fb300e143831327f1bae3af010',

777

'duration': 7337,

778

'upload_date': '20150721',

779

'uploader': 'Beer Games Beer',

780

'uploader_id': 'beergamesbeer',

781

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

782

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

789

'description': 'md5:dc7872fb300e143831327f1bae3af010',

790

'duration': 7337,

791

'upload_date': '20150721',

792

'uploader': 'Beer Games Beer',

793

'uploader_id': 'beergamesbeer',

794

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

795

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

802

'description': 'md5:dc7872fb300e143831327f1bae3af010',

803

'duration': 7334,

804

'upload_date': '20150721',

805

'uploader': 'Beer Games Beer',

806

'uploader_id': 'beergamesbeer',

807

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

808

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

813

},

814

'skip': 'This video is not available.',

815

},

816

{

817

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

818

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

819

'info_dict': {

820

'id': 'gVfLd0zydlo',

821

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

822

},

823

'playlist_count': 2,

824

'skip': 'Not multifeed anymore',

825

},

826

{

827

'url': 'https://vid.plus/FlRa-iH7PGw',

828

'only_matching': True,

829

},

830

{

831

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

832

'only_matching': True,

833

},

834

{

835

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

836

# Also tests cut-off URL expansion in video description (see

837

# https://github.com/ytdl-org/youtube-dl/issues/1892,

838

# https://github.com/ytdl-org/youtube-dl/issues/8164)

839

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

844

'alt_title': 'Dark Walk - Position Music',

845

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

846

'duration': 133,

847

'upload_date': '20151119',

848

'uploader_id': 'IronSoulElf',

849

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

850

'uploader': 'IronSoulElf',

851

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

852

'track': 'Dark Walk - Position Music',

853

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

854

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

855

},

856

'params': {

857

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

862

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

863

'only_matching': True,

864

},

865

{

866

# Video with yt:stretch=17:0

867

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

872

'description': 'md5:ee18a25c350637c8faff806845bddee9',

873

'upload_date': '20151107',

874

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

875

'uploader': 'CH GAMER DROID',

876

},

877

'params': {

878

'skip_download': True,

879

},

880

'skip': 'This video does not exist.',

881

},

882

{

883

# Video licensed under Creative Commons

884

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

889

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

890

'duration': 721,

891

'upload_date': '20150127',

892

'uploader_id': 'BerkmanCenter',

893

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

894

'uploader': 'The Berkman Klein Center for Internet & Society',

895

'license': 'Creative Commons Attribution license (reuse allowed)',

896

},

897

'params': {

898

'skip_download': True,

},

},

{

# Channel-like uploader_url

903

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

908

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

909

'duration': 4060,

910

'upload_date': '20151119',

911

'uploader': 'Bernie Sanders',

912

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

913

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

914

'license': 'Creative Commons Attribution license (reuse allowed)',

915

},

916

'params': {

917

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

922

'only_matching': True,

923

},

924

{

925

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

926

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

927

'only_matching': True,

928

},

929

{

930

# Rental video preview

931

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

936

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

937

'upload_date': '20150811',

938

'uploader': 'FlixMatrix',

939

'uploader_id': 'FlixMatrixKaravan',

940

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

941

'license': 'Standard YouTube License',

942

},

943

'params': {

944

'skip_download': True,

945

},

946

'skip': 'This video is not available.',

947

},

948

{

949

# YouTube Red video with episode data

950

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

955

'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',

956

'duration': 2085,

957

'upload_date': '20170118',

958

'uploader': 'Vsauce',

959

'uploader_id': 'Vsauce',

960

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

961

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

967

},

968

'expected_warnings': [

969

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

974

# as inappropriate or offensive to some audiences.

975

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

980

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

981

'duration': 965,

982

'upload_date': '20140124',

983

'uploader': 'New Century Foundation',

984

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

985

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

986

},

987

'params': {

988

'skip_download': True,

},

},

{

# itag 212

'url': '1t24XAntNCY',

994

'only_matching': True,

995

},

996

{

997

# geo restricted to JP

998

'url': 'sJL6WA-aGkQ',

999

'only_matching': True,

1000

},

1001

{

1002

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1003

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1008

'only_matching': True,

1009

},

1010

{

1011

# Video with unsupported adaptive stream type formats

1012

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1017

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1018

'duration': 433,

1019

'upload_date': '20130923',

1020

'uploader': 'Amelia Putri Harwita',

1021

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1022

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1023

'formats': 'maxcount:10',

1024

},

1025

'params': {

1026

'skip_download': True,

1027

'youtube_include_dash_manifest': False,

1028

},

1029

'skip': 'not actual anymore',

1030

},

1031

{

1032

# Youtube Music Auto-generated description

1033

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1038

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1039

'upload_date': '20190312',

1040

'uploader': 'Stephen - Topic',

1041

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1042

'artist': 'Stephen',

1043

'track': 'Voyeur Girl',

1044

'album': 'it\'s too much love to know my dear',

1045

'release_date': '20190313',

1046

'release_year': 2019,

1047

},

1048

'params': {

1049

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1054

'only_matching': True,

1055

},

1056

{

1057

# invalid -> valid video id redirection

1058

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1063

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1064

'upload_date': '20090125',

1065

'uploader': 'Prochorowka',

1066

'uploader_id': 'Prochorowka',

1067

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1068

'artist': 'Panjabi MC',

1069

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1070

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1071

},

1072

'params': {

1073

'skip_download': True,

},

},

{

# empty description results in an empty string

1078

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1085

'uploader_id': 'ElevageOrVert',

1086

'uploader': 'ElevageOrVert',

1087

},

1088

'params': {

1089

'skip_download': True,

},

},

]

def __init__(self, *args, **kwargs):

1095

super(YoutubeIE, self).__init__(*args, **kwargs)

1096

self._player_cache = {}

1097

1098

def report_video_info_webpage_download(self, video_id):

1099

"""Report attempt to download video info webpage."""

1100

self.to_screen('%s: Downloading video info webpage' % video_id)

1101

1102

def report_information_extraction(self, video_id):

1103

"""Report attempt to extract video information."""

1104

self.to_screen('%s: Extracting video information' % video_id)

1105

1106

def report_unavailable_format(self, video_id, format):

1107

"""Report extracted video URL."""

1108

self.to_screen('%s: Format %s not available' % (video_id, format))

1109

1110

def report_rtmp_download(self):

1111

"""Indicate the download will use the RTMP protocol."""

1112

self.to_screen('RTMP download detected')

1113

1114

def _signature_cache_id(self, example_sig):

1115

""" Return a string representation of a signature """

1116

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1117

1118

@classmethod

1119

def _extract_player_info(cls, player_url):

1120

for player_re in cls._PLAYER_INFO_RE:

1121

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1126

return id_m.group('ext'), id_m.group('id')

1127

1128

def _extract_signature_function(self, video_id, player_url, example_sig):

1129

player_type, player_id = self._extract_player_info(player_url)

1130

1131

# Read from filesystem cache

1132

func_id = '%s_%s_%s' % (

1133

player_type, player_id, self._signature_cache_id(example_sig))

1134

assert os.path.basename(func_id) == func_id

1135

1136

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1137

if cache_spec is not None:

1138

return lambda s: ''.join(s[i] for i in cache_spec)

1139

1140

download_note = (

1141

'Downloading player %s' % player_url

1142

if self._downloader.params.get('verbose') else

1143

'Downloading %s player %s' % (player_type, player_id)

1144

)

1145

if player_type == 'js':

1146

code = self._download_webpage(

1147

player_url, video_id,

1148

note=download_note,

1149

errnote='Download of %s failed' % player_url)

1150

res = self._parse_sig_js(code)

1151

elif player_type == 'swf':

1152

urlh = self._request_webpage(

1153

player_url, video_id,

1154

note=download_note,

1155

errnote='Download of %s failed' % player_url)

1156

code = urlh.read()

1157

res = self._parse_sig_swf(code)

1158

else:

1159

assert False, 'Invalid player type %r' % player_type

1160

1161

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1162

cache_res = res(test_string)

1163

cache_spec = [ord(c) for c in cache_res]

1164

1165

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1166

return res

1167

1168

def _print_sig_code(self, func, example_sig):

1169

def gen_sig_code(idxs):

1170

def _genslice(start, end, step):

1171

starts = '' if start == 0 else str(start)

1172

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1173

steps = '' if step == 1 else (':%d' % step)

1174

return 's[%s%s%s]' % (starts, ends, steps)

1175

1176

step = None

1177

# Quelch pyflakes warnings - start will be set when step is set

1178

start = '(Never used)'

1179

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1184

step = None

1185

continue

1186

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1196

1197

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1198

cache_res = func(test_string)

1199

cache_spec = [ord(c) for c in cache_res]

1200

expr_code = ' + '.join(gen_sig_code(cache_spec))

1201

signature_id_tuple = '(%s)' % (

1202

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1203

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1204

' return %s\n') % (signature_id_tuple, expr_code)

1205

self.to_screen('Extracted signature function:\n' + code)

1206

1207

def _parse_sig_js(self, jscode):

1208

funcname = self._search_regex(

1209

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1210

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1211

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1212

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1213

# Obsolete patterns

1214

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1215

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1216

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1217

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1218

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1219

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1220

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1221

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1222

jscode, 'Initial JS player signature function name', group='sig')

1223

1224

jsi = JSInterpreter(jscode)

1225

initial_function = jsi.extract_function(funcname)

1226

return lambda s: initial_function([s])

1227

1228

def _parse_sig_swf(self, file_contents):

1229

swfi = SWFInterpreter(file_contents)

1230

TARGET_CLASSNAME = 'SignatureDecipher'

1231

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1232

initial_function = swfi.extract_function(searched_class, 'decipher')

1233

return lambda s: initial_function([s])

1234

1235

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1236

"""Turn the encrypted s field into a working signature"""

1237

1238

if player_url is None:

1239

raise ExtractorError('Cannot decrypt signature without player_url')

1240

1241

if player_url.startswith('//'):

1242

player_url = 'https:' + player_url

1243

elif not re.match(r'https?://', player_url):

1244

player_url = compat_urlparse.urljoin(

1245

'https://www.youtube.com', player_url)

1246

try:

1247

player_id = (player_url, self._signature_cache_id(s))

1248

if player_id not in self._player_cache:

1249

func = self._extract_signature_function(

1250

video_id, player_url, s

1251

)

1252

self._player_cache[player_id] = func

1253

func = self._player_cache[player_id]

1254

if self._downloader.params.get('youtube_print_sig_code'):

1255

self._print_sig_code(func, s)

1256

return func(s)

1257

except Exception as e:

1258

tb = traceback.format_exc()

1259

raise ExtractorError(

1260

'Signature extraction failed: ' + tb, cause=e)

1261

1262

def _get_subtitles(self, video_id, webpage, has_live_chat_replay):

1263

try:

1264

subs_doc = self._download_xml(

1265

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1266

video_id, note=False)

1267

except ExtractorError as err:

1268

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1273

lang = track.attrib['lang_code']

1274

if lang in sub_lang_list:

1275

continue

1276

sub_formats = []

1277

for ext in self._SUBTITLE_FORMATS:

1278

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1283

})

1284

sub_formats.append({

1285

'url': 'https://www.youtube.com/api/timedtext?' + params,

1286

'ext': ext,

1287

})

1288

sub_lang_list[lang] = sub_formats

1289

if has_live_chat_replay:

1290

sub_lang_list['live_chat'] = [

1291

{

1292

'video_id': video_id,

1293

'ext': 'json',

1294

'protocol': 'youtube_live_chat_replay',

1295

},

1296

]

1297

if not sub_lang_list:

1298

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1303

patterns = (

1304

# User data may contain arbitrary character sequences that may affect

1305

# JSON extraction with regex, e.g. when '};' is contained the second

1306

# regex won't capture the whole JSON. Yet working around by trying more

1307

# concrete regex first keeping in mind proper quoted string handling

1308

# to be implemented in future that will replace this workaround (see

1309

# https://github.com/ytdl-org/youtube-dl/issues/7468,

1310

# https://github.com/ytdl-org/youtube-dl/pull/7599)

1311

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1312

r';ytplayer\.config\s*=\s*({.+?});',

1313

r'ytInitialPlayerResponse\s*=\s*({.+?});var meta' # Needed???

1314

)

1315

config = self._search_regex(

1316

patterns, webpage, 'ytplayer.config', default=None)

1317

if config:

1318

return self._parse_json(

1319

uppercase_escape(config), video_id, fatal=False)

1320

1321

def _get_music_metadata_from_yt_initial(self, yt_initial):

music_metadata = []

key_map = {

'Album': 'album',

'Artist': 'artist',

'Song': 'track'

}

contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])

1329

if type(contents) is list:

1330

for content in contents:

1331

music_track = {}

1332

if type(content) is not dict:

1333

continue

1334

videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])

1335

if type(videoSecondaryInfoRenderer) is not dict:

1336

continue

1337

rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])

1338

if type(rows) is not list:

1339

continue

1340

for row in rows:

1341

metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])

1342

if type(metadataRowRenderer) is not dict:

1343

continue

1344

key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])

1345

value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \

1346

try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])

1347

if type(key) is not str or type(value) is not str:

1348

continue

1349

if key in key_map:

1350

if key_map[key] in music_track:

1351

# we've started on a new track

1352

music_metadata.append(music_track)

1353

music_track = {}

1354

music_track[key_map[key]] = value

1355

if len(music_track.keys()):

1356

music_metadata.append(music_track)

1357

return music_metadata

1358

1359

def _get_automatic_captions(self, video_id, webpage):

1360

"""We need the webpage for getting the captions url, pass it as an

1361

argument to speed up the process."""

1362

self.to_screen('%s: Looking for automatic captions' % video_id)

1363

player_config = self._get_ytplayer_config(video_id, webpage)

1364

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1365

if not player_config:

1366

self._downloader.report_warning(err_msg)

1367

return {}

1368

try:

1369

args = player_config['args']

1370

caption_url = args.get('ttsurl')

1371

if caption_url:

1372

timestamp = args['timestamp']

1373

# We get the available subtitles

1374

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1380

caption_list = self._download_xml(list_url, video_id)

1381

original_lang_node = caption_list.find('track')

1382

if original_lang_node is None:

1383

self._downloader.report_warning('Video doesn\'t have automatic captions')

1384

return {}

1385

original_lang = original_lang_node.attrib['lang_code']

1386

caption_kind = original_lang_node.attrib.get('kind', '')

1387

1388

sub_lang_list = {}

1389

for lang_node in caption_list.findall('target'):

1390

sub_lang = lang_node.attrib['lang_code']

1391

sub_formats = []

1392

for ext in self._SUBTITLE_FORMATS:

1393

params = compat_urllib_parse_urlencode({

1394

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1399

})

1400

sub_formats.append({

1401

'url': caption_url + '&' + params,

1402

'ext': ext,

1403

})

1404

sub_lang_list[sub_lang] = sub_formats

1405

return sub_lang_list

1406

1407

def make_captions(sub_url, sub_langs):

1408

parsed_sub_url = compat_urllib_parse_urlparse(sub_url)

1409

caption_qs = compat_parse_qs(parsed_sub_url.query)

1410

captions = {}

1411

for sub_lang in sub_langs:

1412

sub_formats = []

1413

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(

1419

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

captions[sub_lang] = sub_formats

1425

return captions

1426

1427

# New captions format as of 22.06.2017

1428

player_response = args.get('player_response')

1429

if player_response and isinstance(player_response, compat_str):

1430

player_response = self._parse_json(

1431

player_response, video_id, fatal=False)

1432

if player_response:

1433

renderer = player_response['captions']['playerCaptionsTracklistRenderer']

1434

base_url = renderer['captionTracks'][0]['baseUrl']

1435

sub_lang_list = []

1436

for lang in renderer['translationLanguages']:

1437

lang_code = lang.get('languageCode')

1438

if lang_code:

1439

sub_lang_list.append(lang_code)

1440

return make_captions(base_url, sub_lang_list)

1441

1442

# Some videos don't provide ttsurl but rather caption_tracks and

1443

# caption_translation_languages (e.g. 20LmZk1hakA)

1444

# Does not used anymore as of 22.06.2017

1445

caption_tracks = args['caption_tracks']

1446

caption_translation_languages = args['caption_translation_languages']

1447

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1448

sub_lang_list = []

1449

for lang in caption_translation_languages.split(','):

1450

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1451

sub_lang = lang_qs.get('lc', [None])[0]

1452

if sub_lang:

1453

sub_lang_list.append(sub_lang)

1454

return make_captions(caption_url, sub_lang_list)

1455

# An extractor error can be raise by the download process if there are

1456

# no automatic captions but there are subtitles

1457

except (KeyError, IndexError, ExtractorError):

1458

self._downloader.report_warning(err_msg)

1459

return {}

1460

1461

def _mark_watched(self, video_id, video_info, player_response):

1462

playback_url = url_or_none(try_get(

1463

player_response,

1464

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(

1465

video_info, lambda x: x['videostats_playback_base_url'][0]))

1466

if not playback_url:

1467

return

1468

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1469

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1470

1471

# cpn generation algorithm is reverse engineered from base.js.

1472

# In fact it works even with dummy cpn.

1473

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1474

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1481

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1482

1483

self._download_webpage(

1484

playback_url, video_id, 'Marking watched',

1485

'Unable to mark watched', fatal=False)

1486

1487

@staticmethod

1488

def _extract_urls(webpage):

1489

# Embedded YouTube player

1490

entries = [

1491

unescapeHTML(mobj.group('url'))

1492

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1503

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1504

\1''', webpage)]

1505

1506

# lazyYT YouTube embed

1507

entries.extend(list(map(

1508

unescapeHTML,

1509

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1510

1511

# Wordpress "YouTube Video Importer" plugin

1512

matches = re.findall(r'''(?x)<div[^>]+

1513

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1514

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1515

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1521

urls = YoutubeIE._extract_urls(webpage)

1522

return urls[0] if urls else None

1523

1524

@classmethod

1525

def extract_id(cls, url):

1526

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1527

if mobj is None:

1528

raise ExtractorError('Invalid URL: %s' % url)

1529

video_id = mobj.group(2)

1530

return video_id

1531

1532

def _extract_chapters_from_json(self, webpage, video_id, duration):

1533

if not webpage:

1534

return

1535

data = self._extract_yt_initial_data(video_id, webpage)

1536

if not data or not isinstance(data, dict):

1537

return

1538

chapters_list = try_get(

1539

data,

1540

lambda x: x['playerOverlays']

1541

['playerOverlayRenderer']

1542

['decoratedPlayerBarRenderer']

1543

['decoratedPlayerBarRenderer']

1544

['playerBar']

1545

['chapteredPlayerBarRenderer']

1546

['chapters'],

1547

list)

1548

if not chapters_list:

1549

return

1550

1551

def chapter_time(chapter):

1552

return float_or_none(

1553

try_get(

1554

chapter,

1555

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1560

start_time = chapter_time(chapter)

1561

if start_time is None:

1562

continue

1563

end_time = (chapter_time(chapters_list[next_num])

1564

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1569

compat_str)

1570

chapters.append({

1571

'start_time': start_time,

1572

'end_time': end_time,

'title': title,

})

return chapters

@staticmethod

def _extract_chapters_from_description(description, duration):

1579

if not description:

1580

return None

1581

chapter_lines = re.findall(

1582

r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',

1583

description)

1584

if not chapter_lines:

1585

return None

1586

chapters = []

1587

for next_num, (chapter_line, time_point) in enumerate(

1588

chapter_lines, start=1):

1589

start_time = parse_duration(time_point)

1590

if start_time is None:

1591

continue

1592

if start_time > duration:

1593

break

1594

end_time = (duration if next_num == len(chapter_lines)

1595

else parse_duration(chapter_lines[next_num][1]))

1596

if end_time is None:

1597

continue

1598

if end_time > duration:

1599

end_time = duration

1600

if start_time > end_time:

1601

break

1602

chapter_title = re.sub(

1603

r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')

1604

chapter_title = re.sub(r'\s+', ' ', chapter_title)

1605

chapters.append({

1606

'start_time': start_time,

1607

'end_time': end_time,

1608

'title': chapter_title,

})

return chapters

def _extract_chapters(self, webpage, description, video_id, duration):

1613

return (self._extract_chapters_from_json(webpage, video_id, duration)

1614

or self._extract_chapters_from_description(description, duration))

1615

1616

def _real_extract(self, url):

1617

url, smuggled_data = unsmuggle_url(url, {})

1618

1619

proto = (

1620

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1626

for component in [parsed_url.fragment, parsed_url.query]:

1627

query = compat_parse_qs(component)

1628

if start_time is None and 't' in query:

1629

start_time = parse_duration(query['t'][0])

1630

if start_time is None and 'start' in query:

1631

start_time = parse_duration(query['start'][0])

1632

if end_time is None and 'end' in query:

1633

end_time = parse_duration(query['end'][0])

1634

1635

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1636

mobj = re.search(self._NEXT_URL_RE, url)

1637

if mobj:

1638

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1639

video_id = self.extract_id(url)

1640

1641

# Get video webpage

1642

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1643

video_webpage, urlh = self._download_webpage_handle(url, video_id)

1644

1645

qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)

1646

video_id = qs.get('v', [None])[0] or video_id

1647

1648

# Attempt to extract SWF player URL

1649

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1650

if mobj is not None:

1651

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1658

dash_mpd = video_info.get('dashmpd')

1659

if dash_mpd and dash_mpd[0] not in dash_mpds:

1660

dash_mpds.append(dash_mpd[0])

1661

1662

def add_dash_mpd_pr(pl_response):

1663

dash_mpd = url_or_none(try_get(

1664

pl_response, lambda x: x['streamingData']['dashManifestUrl'],

1665

compat_str))

1666

if dash_mpd and dash_mpd not in dash_mpds:

1667

dash_mpds.append(dash_mpd)

is_live = None

view_count = None

def extract_view_count(v_info):

1673

return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))

1674

1675

def extract_player_response(player_response, video_id):

1676

pl_response = str_or_none(player_response)

1677

if not pl_response:

1678

return

1679

pl_response = self._parse_json(pl_response, video_id, fatal=False)

1680

if isinstance(pl_response, dict):

1681

add_dash_mpd_pr(pl_response)

1682

return pl_response

1683

1684

def extract_embedded_config(embed_webpage, video_id):

1685

embedded_config = self._search_regex(

1686

r'setConfig$({.*})$;',

1687

embed_webpage, 'ytInitialData', default=None)

1688

if embedded_config:

1689

return embedded_config

player_response = {}

# Get video info

video_info = {}

embed_webpage = None

if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'

1697

or re.search(r'player-age-gate-content">', video_webpage) is not None):

1698

cookie_keys = self._get_cookies('https://www.youtube.com').keys()

1699

age_gate = True

1700

# We simulate the access to the video from www.youtube.com/v/{video_id}

1701

# this can be viewed without login into Youtube

1702

url = proto + '://www.youtube.com/embed/%s' % video_id

1703

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1704

ext = extract_embedded_config(embed_webpage, video_id)

1705

# playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)

1706

playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)

1707

if not playable_in_embed:

1708

self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)

1709

playable_in_embed = ''

1710

else:

1711

playable_in_embed = playable_in_embed.group('playableinEmbed')

1712

# check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)

1713

# if re.search(r'player-unavailable">', embed_webpage) is not None:

1714

if playable_in_embed == 'false':

1715

'''

1716

# TODO apply this patch when Support for Python 2.6(!) and above drops

1717

if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys

1718

or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):

1719

'''

1720

if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)

1721

or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):

1722

age_gate = False

1723

# Try looking directly into the video webpage

1724

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1725

if ytplayer_config:

1726

args = ytplayer_config.get("args")

1727

if args is not None:

1728

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1729

# Convert to the same format returned by compat_parse_qs

1730

video_info = dict((k, [v]) for k, v in args.items())

1731

add_dash_mpd(video_info)

1732

# Rental video is not rented but preview is available (e.g.

1733

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1734

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1735

if not video_info and args.get('ypc_vid'):

1736

return self.url_result(

1737

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1738

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1739

is_live = True

1740

if not player_response:

1741

player_response = extract_player_response(args.get('player_response'), video_id)

1742

elif not player_response:

1743

player_response = ytplayer_config

1744

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1745

add_dash_mpd_pr(player_response)

1746

else:

1747

raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)

1748

else:

1749

data = compat_urllib_parse_urlencode({

1750

'video_id': video_id,

1751

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1752

'sts': self._search_regex(

1753

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1754

})

1755

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1756

try:

1757

video_info_webpage = self._download_webpage(

1758

video_info_url, video_id,

1759

note='Refetching age-gated info webpage',

1760

errnote='unable to download video info webpage')

1761

except ExtractorError:

1762

video_info_webpage = None

1763

if video_info_webpage:

1764

video_info = compat_parse_qs(video_info_webpage)

1765

pl_response = video_info.get('player_response', [None])[0]

1766

player_response = extract_player_response(pl_response, video_id)

1767

add_dash_mpd(video_info)

1768

view_count = extract_view_count(video_info)

1769

else:

1770

age_gate = False

1771

# Try looking directly into the video webpage

1772

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1773

if ytplayer_config:

1774

args = ytplayer_config.get('args', {})

1775

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1776

# Convert to the same format returned by compat_parse_qs

1777

video_info = dict((k, [v]) for k, v in args.items())

1778

add_dash_mpd(video_info)

1779

# Rental video is not rented but preview is available (e.g.

1780

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1781

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1782

if not video_info and args.get('ypc_vid'):

1783

return self.url_result(

1784

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1785

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1786

is_live = True

1787

if not player_response:

1788

player_response = extract_player_response(args.get('player_response'), video_id)

1789

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1790

add_dash_mpd_pr(player_response)

1791

1792

if not video_info and not player_response:

1793

player_response = extract_player_response(

1794

self._search_regex(

1795

r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,

1796

'initial player response', default='{}'),

1797

video_id)

1798

1799

def extract_unavailable_message():

1800

messages = []

1801

for tag, kind in (('h1', 'message'), ('div', 'submessage')):

1802

msg = self._html_search_regex(

1803

r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),

1804

video_webpage, 'unavailable %s' % kind, default=None)

if msg:

messages.append(msg)

if messages:

return '\n'.join(messages)

1809

1810

if not video_info and not player_response:

1811

unavailable_message = extract_unavailable_message()

1812

if not unavailable_message:

1813

unavailable_message = 'Unable to extract video data'

1814

raise ExtractorError(

1815

'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)

1816

1817

if not isinstance(video_info, dict):

1818

video_info = {}

1819

1820

video_details = try_get(

1821

player_response, lambda x: x['videoDetails'], dict) or {}

1822

1823

microformat = try_get(

1824

player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}

1825

1826

video_title = video_info.get('title', [None])[0] or video_details.get('title')

1827

if not video_title:

1828

self._downloader.report_warning('Unable to extract video title')

1829

video_title = '_'

1830

1831

description_original = video_description = get_element_by_id("eow-description", video_webpage)

1832

if video_description:

1833

1834

def replace_url(m):

1835

redir_url = compat_urlparse.urljoin(url, m.group(1))

1836

parsed_redir_url = compat_urllib_parse_urlparse(redir_url)

1837

if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':

1838

qs = compat_parse_qs(parsed_redir_url.query)

q = qs.get('q')

if q and q[0]:

return q[0]

return redir_url

description_original = video_description = re.sub(r'''(?x)

1845

<a\s+

1846

(?:[a-zA-Z-]+="[^"]*"\s+)*?

1847

(?:title|href)="([^"]+)"\s+

1848

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', replace_url, video_description)

1853

video_description = clean_html(video_description)

1854

else:

1855

video_description = video_details.get('shortDescription')

1856

if video_description is None:

1857

video_description = self._html_search_meta('description', video_webpage)

1858

1859

if not smuggled_data.get('force_singlefeed', False):

1860

if not self._downloader.params.get('noplaylist'):

1861

multifeed_metadata_list = try_get(

1862

player_response,

1863

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

1864

compat_str) or try_get(

1865

video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)

1866

if multifeed_metadata_list:

1867

entries = []

1868

feed_ids = []

1869

for feed in multifeed_metadata_list.split(','):

1870

# Unquote should take place before split on comma (,) since textual

1871

# fields may contain comma as well (see

1872

# https://github.com/ytdl-org/youtube-dl/issues/8536)

1873

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1874

1875

def feed_entry(name):

1876

return try_get(feed_data, lambda x: x[name][0], compat_str)

1877

1878

feed_id = feed_entry('id')

1879

if not feed_id:

1880

continue

1881

feed_title = feed_entry('title')

1882

title = video_title

1883

if feed_title:

1884

title += ' (%s)' % feed_title

1885

entries.append({

1886

'_type': 'url_transparent',

1887

'ie_key': 'Youtube',

1888

'url': smuggle_url(

1889

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1890

{'force_singlefeed': True}),

1891

'title': title,

1892

})

1893

feed_ids.append(feed_id)

1894

self.to_screen(

1895

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1896

% (', '.join(feed_ids), video_id))

1897

return self.playlist_result(entries, video_id, video_title, video_description)

1898

else:

1899

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1900

1901

if view_count is None:

1902

view_count = extract_view_count(video_info)

1903

if view_count is None and video_details:

1904

view_count = int_or_none(video_details.get('viewCount'))

1905

if view_count is None and microformat:

1906

view_count = int_or_none(microformat.get('viewCount'))

1907

1908

if is_live is None:

1909

is_live = bool_or_none(video_details.get('isLive'))

1910

1911

has_live_chat_replay = False

1912

if not is_live:

1913

yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)

1914

try:

1915

yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

1916

has_live_chat_replay = True

1917

except (KeyError, IndexError, TypeError):

1918

pass

1919

1920

# Check for "rental" videos

1921

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

1922

raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)

1923

1924

def _extract_filesize(media_url):

1925

return int_or_none(self._search_regex(

1926

r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

1927

1928

streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []

1929

streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])

1930

1931

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

1932

self.report_rtmp_download()

1933

formats = [{

1934

'format_id': '_rtmp',

1935

'protocol': 'rtmp',

1936

'url': video_info['conn'][0],

1937

'player_url': player_url,

1938

}]

1939

elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):

1940

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

1941

if 'rtmpe%3Dyes' in encoded_url_map:

1942

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)

1943

formats = []

1944

formats_spec = {}

1945

fmt_list = video_info.get('fmt_list', [''])[0]

1946

if fmt_list:

1947

for fmt in fmt_list.split(','):

1948

spec = fmt.split('/')

1949

if len(spec) > 1:

1950

width_height = spec[1].split('x')

1951

if len(width_height) == 2:

1952

formats_spec[spec[0]] = {

1953

'resolution': spec[1],

1954

'width': int_or_none(width_height[0]),

1955

'height': int_or_none(width_height[1]),

1956

}

1957

for fmt in streaming_formats:

1958

itag = str_or_none(fmt.get('itag'))

1959

if not itag:

1960

continue

1961

quality = fmt.get('quality')

1962

quality_label = fmt.get('qualityLabel') or quality

1963

formats_spec[itag] = {

1964

'asr': int_or_none(fmt.get('audioSampleRate')),

1965

'filesize': int_or_none(fmt.get('contentLength')),

1966

'format_note': quality_label,

1967

'fps': int_or_none(fmt.get('fps')),

1968

'height': int_or_none(fmt.get('height')),

1969

# bitrate for itag 43 is always 2147483647

1970

'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,

1971

'width': int_or_none(fmt.get('width')),

1972

}

1973

1974

for fmt in streaming_formats:

1975

if fmt.get('drmFamilies') or fmt.get('drm_families'):

1976

continue

1977

url = url_or_none(fmt.get('url'))

1978

1979

if not url:

1980

cipher = fmt.get('cipher') or fmt.get('signatureCipher')

1981

if not cipher:

1982

continue

1983

url_data = compat_parse_qs(cipher)

1984

url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))

if not url:

continue

else:

cipher = None

url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

1990

1991

stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))

1992

# Unsupported FORMAT_STREAM_TYPE_OTF

if stream_type == 3:

continue

format_id = fmt.get('itag') or url_data['itag'][0]

1997

if not format_id:

1998

continue

1999

format_id = compat_str(format_id)

2000

2001

if cipher:

2002

if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):

2003

ASSETS_RE = (

2004

r'<script[^>]+\bsrc=("[^"]+")[^>]+\bname=["\']player_ias/base',

2005

r'"jsUrl"\s*:\s*("[^"]+")',

2006

r'"assets":.+?"js":\s*("[^"]+")')

2007

jsplayer_url_json = self._search_regex(

2008

ASSETS_RE,

2009

embed_webpage if age_gate else video_webpage,

2010

'JS player URL (1)', default=None)

2011

if not jsplayer_url_json and not age_gate:

2012

# We need the embed website after all

2013

if embed_webpage is None:

2014

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

2015

embed_webpage = self._download_webpage(

2016

embed_url, video_id, 'Downloading embed webpage')

2017

jsplayer_url_json = self._search_regex(

2018

ASSETS_RE, embed_webpage, 'JS player URL')

2019

2020

player_url = json.loads(jsplayer_url_json)

2021

if player_url is None:

2022

player_url_json = self._search_regex(

2023

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

2024

video_webpage, 'age gate player URL')

2025

player_url = json.loads(player_url_json)

2026

2027

if 'sig' in url_data:

2028

url += '&signature=' + url_data['sig'][0]

2029

elif 's' in url_data:

2030

encrypted_sig = url_data['s'][0]

2031

2032

if self._downloader.params.get('verbose'):

2033

if player_url is None:

2034

player_desc = 'unknown'

2035

else:

2036

player_type, player_version = self._extract_player_info(player_url)

2037

player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)

2038

parts_sizes = self._signature_cache_id(encrypted_sig)

2039

self.to_screen('{%s} signature length %s, %s' %

2040

(format_id, parts_sizes, player_desc))

2041

2042

signature = self._decrypt_signature(

2043

encrypted_sig, video_id, player_url, age_gate)

2044

sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'

2045

url += '&%s=%s' % (sp, signature)

2046

if 'ratebypass' not in url:

2047

url += '&ratebypass=yes'

2048

2049

dct = {

2050

'format_id': format_id,

2051

'url': url,

2052

'player_url': player_url,

2053

}

2054

if format_id in self._formats:

2055

dct.update(self._formats[format_id])

2056

if format_id in formats_spec:

2057

dct.update(formats_spec[format_id])

2058

2059

# Some itags are not included in DASH manifest thus corresponding formats will

2060

# lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).

2061

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

2062

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

2063

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

2064

2065

if width is None:

2066

width = int_or_none(fmt.get('width'))

2067

if height is None:

2068

height = int_or_none(fmt.get('height'))

2069

2070

filesize = int_or_none(url_data.get(

2071

'clen', [None])[0]) or _extract_filesize(url)

2072

2073

quality = url_data.get('quality', [None])[0] or fmt.get('quality')

2074

quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')

2075

2076

tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)

2077

or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None

2078

fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

2079

2080

more_fields = {

2081

'filesize': filesize,

'tbr': tbr,

'width': width,

'height': height,

'fps': fps,

'format_note': quality_label or quality,

2087

}

2088

for key, value in more_fields.items():

2089

if value:

2090

dct[key] = value

2091

type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')

2092

if type_:

2093

type_split = type_.split(';')

2094

kind_ext = type_split[0].split('/')

2095

if len(kind_ext) == 2:

2096

kind, _ = kind_ext

2097

dct['ext'] = mimetype2ext(type_split[0])

2098

if kind in ('audio', 'video'):

2099

codecs = None

2100

for mobj in re.finditer(

2101

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

2102

if mobj.group('key') == 'codecs':

2103

codecs = mobj.group('val')

2104

break

2105

if codecs:

2106

dct.update(parse_codecs(codecs))

2107

if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':

2108

dct['downloader_options'] = {

2109

# Youtube throttles chunks >~10M

2110

'http_chunk_size': 10485760,

}

formats.append(dct)

else:

manifest_url = (

url_or_none(try_get(

player_response,

lambda x: x['streamingData']['hlsManifestUrl'],

2118

compat_str))

2119

or url_or_none(try_get(

2120

video_info, lambda x: x['hlsvp'][0], compat_str)))

2121

if manifest_url:

2122

formats = []

2123

m3u8_formats = self._extract_m3u8_formats(

2124

manifest_url, video_id, 'mp4', fatal=False)

2125

for a_format in m3u8_formats:

2126

itag = self._search_regex(

2127

r'/itag/(\d+)/', a_format['url'], 'itag', default=None)

2128

if itag:

2129

a_format['format_id'] = itag

2130

if itag in self._formats:

2131

dct = self._formats[itag].copy()

2132

dct.update(a_format)

2133

a_format = dct

2134

a_format['player_url'] = player_url

2135

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

2136

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

2137

if self._downloader.params.get('youtube_include_hls_manifest', True):

2138

formats.append(a_format)

2139

else:

2140

error_message = extract_unavailable_message()

2141

if not error_message:

2142

error_message = clean_html(try_get(

2143

player_response, lambda x: x['playabilityStatus']['reason'],

2144

compat_str))

2145

if not error_message:

2146

error_message = clean_html(

2147

try_get(video_info, lambda x: x['reason'][0], compat_str))

2148

if error_message:

2149

raise ExtractorError(error_message, expected=True)

2150

raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')

2151

2152

# uploader

2153

video_uploader = try_get(

2154

video_info, lambda x: x['author'][0],

2155

compat_str) or str_or_none(video_details.get('author'))

2156

if video_uploader:

2157

video_uploader = compat_urllib_parse_unquote_plus(video_uploader)

2158

else:

2159

self._downloader.report_warning('unable to extract uploader name')

2160

2161

# uploader_id

2162

video_uploader_id = None

2163

video_uploader_url = None

2164

mobj = re.search(

2165

r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

2166

video_webpage)

2167

if mobj is not None:

2168

video_uploader_id = mobj.group('uploader_id')

2169

video_uploader_url = mobj.group('uploader_url')

2170

else:

2171

owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))

2172

if owner_profile_url:

2173

video_uploader_id = self._search_regex(

2174

r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',

2175

default=None)

2176

video_uploader_url = owner_profile_url

2177

2178

channel_id = (

2179

str_or_none(video_details.get('channelId'))

2180

or self._html_search_meta(

2181

'channelId', video_webpage, 'channel id', default=None)

2182

or self._search_regex(

2183

r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',

2184

video_webpage, 'channel id', default=None, group='id'))

2185

channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None

2186

2187

thumbnails = []

2188

thumbnails_list = try_get(

2189

video_details, lambda x: x['thumbnail']['thumbnails'], list) or []

2190

for t in thumbnails_list:

2191

if not isinstance(t, dict):

2192

continue

2193

thumbnail_url = url_or_none(t.get('url'))

2194

if not thumbnail_url:

2195

continue

2196

thumbnails.append({

2197

'url': thumbnail_url,

2198

'width': int_or_none(t.get('width')),

2199

'height': int_or_none(t.get('height')),

})

if not thumbnails:

video_thumbnail = None

2204

# We try first to get a high quality image:

2205

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

2206

video_webpage, re.DOTALL)

2207

if m_thumb is not None:

2208

video_thumbnail = m_thumb.group(1)

2209

thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)

2210

if thumbnail_url:

2211

video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)

2212

if video_thumbnail:

2213

thumbnails.append({'url': video_thumbnail})

2214

2215

# upload date

2216

upload_date = self._html_search_meta(

2217

'datePublished', video_webpage, 'upload date', default=None)

2218

if not upload_date:

2219

upload_date = self._search_regex(

2220

[r'(?s)id="eow-date.*?>(.*?)</span>',

2221

r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],

2222

video_webpage, 'upload date', default=None)

2223

if not upload_date:

2224

upload_date = microformat.get('publishDate') or microformat.get('uploadDate')

2225

upload_date = unified_strdate(upload_date)

2226

2227

video_license = self._html_search_regex(

2228

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

2229

video_webpage, 'license', default=None)

m_music = re.search(

r'''(?x)

<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*

<ul[^>]*>\s*

<li>(?P<title>.+?)

by (?P<creator>.+?)

(?:

$.+?$|

<a[^>]*

(?:

\bhref=["\']/red[^>]*>| # drop possible

2242

>\s*Listen ad-free with YouTube Red # YouTube Red ad

)

.*?

)?</li

''',

video_webpage)

if m_music:

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

2250

video_creator = clean_html(m_music.group('creator'))

2251

else:

2252

video_alt_title = video_creator = None

2253

2254

def extract_meta(field):

2255

return self._html_search_regex(

2256

r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,

2257

video_webpage, field, default=None)

2258

2259

track = extract_meta('Song')

2260

artist = extract_meta('Artist')

2261

album = extract_meta('Album')

2262

2263

# Youtube Music Auto-generated description

2264

release_date = release_year = None

2265

if video_description:

2266

mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)

2267

if mobj:

2268

if not track:

2269

track = mobj.group('track').strip()

2270

if not artist:

2271

artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))

2272

if not album:

2273

album = mobj.group('album'.strip())

2274

release_year = mobj.group('release_year')

2275

release_date = mobj.group('release_date')

2276

if release_date:

2277

release_date = release_date.replace('-', '')

2278

if not release_year:

2279

release_year = int(release_date[:4])

2280

if release_year:

2281

release_year = int(release_year)

2282

2283

yt_initial = self._get_yt_initial_data(video_id, video_webpage)

2284

if yt_initial:

2285

music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)

2286

if len(music_metadata):

2287

album = music_metadata[0].get('album')

2288

artist = music_metadata[0].get('artist')

2289

track = music_metadata[0].get('track')

2290

2291

m_episode = re.search(

2292

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

2293

video_webpage)

2294

if m_episode:

2295

series = unescapeHTML(m_episode.group('series'))

2296

season_number = int(m_episode.group('season'))

2297

episode_number = int(m_episode.group('episode'))

2298

else:

2299

series = season_number = episode_number = None

2300

2301

m_cat_container = self._search_regex(

2302

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

2303

video_webpage, 'categories', default=None)

2304

category = None

2305

if m_cat_container:

2306

category = self._html_search_regex(

2307

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

default=None)

if not category:

category = try_get(

microformat, lambda x: x['category'], compat_str)

2312

video_categories = None if category is None else [category]

2313

2314

video_tags = [

2315

unescapeHTML(m.group('content'))

2316

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

2317

if not video_tags:

2318

video_tags = try_get(video_details, lambda x: x['keywords'], list)

2319

2320

def _extract_count(count_name):

2321

return str_to_int(self._search_regex(

2322

r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'

2323

% re.escape(count_name),

2324

video_webpage, count_name, default=None))

2325

2326

like_count = _extract_count('like')

2327

dislike_count = _extract_count('dislike')

2328

2329

if view_count is None:

2330

view_count = str_to_int(self._search_regex(

2331

r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,

2332

'view count', default=None))

2333

2334

average_rating = (

2335

float_or_none(video_details.get('averageRating'))

2336

or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))

2337

2338

# subtitles

2339

video_subtitles = self.extract_subtitles(

2340

video_id, video_webpage, has_live_chat_replay)

2341

automatic_captions = self.extract_automatic_captions(video_id, video_webpage)

2342

2343

video_duration = try_get(

2344

video_info, lambda x: int_or_none(x['length_seconds'][0]))

2345

if not video_duration:

2346

video_duration = int_or_none(video_details.get('lengthSeconds'))

2347

if not video_duration:

2348

video_duration = parse_duration(self._html_search_meta(

2349

'duration', video_webpage, 'video duration'))

2350

2351

# Get Subscriber Count of channel

2352

subscriber_count = parse_count(self._search_regex(

2353

r'"text":"([\d\.]+\w?) subscribers"',

video_webpage,

'subscriber count',

default=None

))

# annotations

video_annotations = None

2361

if self._downloader.params.get('writeannotations', False):

2362

xsrf_token = self._search_regex(

2363

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',

2364

video_webpage, 'xsrf token', group='xsrf_token', fatal=False)

2365

invideo_url = try_get(

2366

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

2367

if xsrf_token and invideo_url:

2368

xsrf_field_name = self._search_regex(

2369

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

2370

video_webpage, 'xsrf field name',

2371

group='xsrf_field_name', default='session_token')

2372

video_annotations = self._download_webpage(

2373

self._proto_relative_url(invideo_url),

2374

video_id, note='Downloading annotations',

2375

errnote='Unable to download video annotations', fatal=False,

2376

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

2377

2378

chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)

2379

2380

# Look for the DASH manifest

2381

if self._downloader.params.get('youtube_include_dash_manifest', True):

2382

dash_mpd_fatal = True

2383

for mpd_url in dash_mpds:

2384

dash_formats = {}

2385

try:

2386

def decrypt_sig(mobj):

2387

s = mobj.group(1)

2388

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

2389

return '/signature/%s' % dec_s

2390

2391

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

2392

2393

for df in self._extract_mpd_formats(

2394

mpd_url, video_id, fatal=dash_mpd_fatal,

2395

formats_dict=self._formats):

2396

if not df.get('filesize'):

2397

df['filesize'] = _extract_filesize(df['url'])

2398

# Do not overwrite DASH format found in some previous DASH manifest

2399

if df['format_id'] not in dash_formats:

2400

dash_formats[df['format_id']] = df

2401

# Additional DASH manifests may end up in HTTP Error 403 therefore

2402

# allow them to fail without bug report message if we already have

2403

# some DASH manifest succeeded. This is temporary workaround to reduce

2404

# burst of bug reports until we figure out the reason and whether it

2405

# can be fixed at all.

2406

dash_mpd_fatal = False

2407

except (ExtractorError, KeyError) as e:

2408

self.report_warning(

2409

'Skipping DASH manifest: %r' % e, video_id)

2410

if dash_formats:

2411

# Remove the formats we found through non-DASH, they

2412

# contain less info and it can be wrong, because we use

2413

# fixed values (for example the resolution). See

2414

# https://github.com/ytdl-org/youtube-dl/issues/5774 for an

2415

# example.

2416

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

2417

formats.extend(dash_formats.values())

2418

2419

# Check for malformed aspect ratio

2420

stretched_m = re.search(

2421

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

2422

video_webpage)

2423

if stretched_m:

2424

w = float(stretched_m.group('w'))

2425

h = float(stretched_m.group('h'))

2426

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

2427

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2432

f['stretched_ratio'] = ratio

2433

2434

if not formats:

2435

if 'reason' in video_info:

2436

if 'The uploader has not made this video available in your country.' in video_info['reason']:

2437

regions_allowed = self._html_search_meta(

2438

'regionsAllowed', video_webpage, default=None)

2439

countries = regions_allowed.split(',') if regions_allowed else None

2440

self.raise_geo_restricted(

2441

msg=video_info['reason'][0], countries=countries)

2442

reason = video_info['reason'][0]

2443

if 'Invalid parameters' in reason:

2444

unavailable_message = extract_unavailable_message()

2445

if unavailable_message:

2446

reason = unavailable_message

2447

raise ExtractorError(

2448

'YouTube said: %s' % reason,

2449

expected=True, video_id=video_id)

2450

if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):

2451

raise ExtractorError('This video is DRM protected.', expected=True)

2452

2453

self._sort_formats(formats)

2454

2455

self.mark_watched(video_id, video_info, player_response)

return {

'id': video_id,

'uploader': video_uploader,

2460

'uploader_id': video_uploader_id,

2461

'uploader_url': video_uploader_url,

2462

'channel_id': channel_id,

2463

'channel_url': channel_url,

2464

'upload_date': upload_date,

2465

'license': video_license,

2466

'creator': video_creator or artist,

2467

'title': video_title,

2468

'alt_title': video_alt_title or track,

2469

'thumbnails': thumbnails,

2470

'description': video_description,

2471

'categories': video_categories,

2472

'tags': video_tags,

2473

'subtitles': video_subtitles,

2474

'automatic_captions': automatic_captions,

2475

'duration': video_duration,

2476

'age_limit': 18 if age_gate else 0,

2477

'annotations': video_annotations,

2478

'chapters': chapters,

2479

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

2480

'view_count': view_count,

2481

'like_count': like_count,

2482

'dislike_count': dislike_count,

2483

'average_rating': average_rating,

2484

'formats': formats,

2485

'is_live': is_live,

2486

'start_time': start_time,

2487

'end_time': end_time,

2488

'series': series,

2489

'season_number': season_number,

2490

'episode_number': episode_number,

'track': track,

'artist': artist,

'album': album,

'release_date': release_date,

2495

'release_year': release_year,

2496

'subscriber_count': subscriber_count,

}

class YoutubeTabIE(YoutubeBaseInfoExtractor):

2501

IE_DESC = 'YouTube.com tab'

2502

# (?x)^ will cause warning in LiveIE. So I cant split this into multiple lines using '''

2503

_VALID_URL = (

2504

r'https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/'

2505

r'(?:(?!(%s)([/#?]|$))|'

2506

r'(?:channel|c|user)/|'

2507

r'(?:playlist|watch)\?.*?\blist=)'

2508

r'(?P<id>[^/?#&]+)') % YoutubeBaseInfoExtractor._RESERVED_NAMES

2509

IE_NAME = 'youtube:tab'

2510

2511

_TESTS = [{

2512

# playlists, multipage

2513

'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',

2514

'playlist_mincount': 94,

2515

'info_dict': {

2516

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2517

'title': 'Игорь Клейнер - Playlists',

2518

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2519

},

2520

}, {

2521

# playlists, multipage, different order

2522

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

2523

'playlist_mincount': 94,

2524

'info_dict': {

2525

'id': 'UCqj7Cz7revf5maW9g5pgNcg',

2526

'title': 'Игорь Клейнер - Playlists',

2527

'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',

2528

},

2529

}, {

2530

# playlists, singlepage

2531

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

2532

'playlist_mincount': 4,

2533

'info_dict': {

2534

'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',

2535

'title': 'ThirstForScience - Playlists',

2536

'description': 'md5:609399d937ea957b0f53cbffb747a14c',

2537

}

2538

}, {

2539

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

2540

'only_matching': True,

2541

}, {

2542

# basic, single video playlist

2543

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2544

'info_dict': {

2545

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2546

'uploader': 'Sergey M.',

2547

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2548

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

# empty playlist

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2554

'info_dict': {

2555

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2556

'uploader': 'Sergey M.',

2557

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2558

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

# Home tab

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',

2564

'info_dict': {

2565

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2566

'title': 'lex will - Home',

2567

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2568

},

2569

'playlist_mincount': 2,

2570

}, {

2571

# Videos tab

2572

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',

2573

'info_dict': {

2574

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2575

'title': 'lex will - Videos',

2576

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2577

},

2578

'playlist_mincount': 975,

2579

}, {

2580

# Videos tab, sorted by popular

2581

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',

2582

'info_dict': {

2583

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2584

'title': 'lex will - Videos',

2585

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2586

},

2587

'playlist_mincount': 199,

2588

}, {

2589

# Playlists tab

2590

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',

2591

'info_dict': {

2592

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2593

'title': 'lex will - Playlists',

2594

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2595

},

2596

'playlist_mincount': 17,

2597

}, {

2598

# Community tab

2599

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',

2600

'info_dict': {

2601

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2602

'title': 'lex will - Community',

2603

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2604

},

2605

'playlist_mincount': 18,

2606

}, {

2607

# Channels tab

2608

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',

2609

'info_dict': {

2610

'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2611

'title': 'lex will - Channels',

2612

'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',

2613

},

2614

'playlist_mincount': 138,

2615

}, {

2616

'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',

2617

'only_matching': True,

2618

}, {

2619

'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',

2620

'only_matching': True,

2621

}, {

2622

'url': 'https://music.youtube.com/channel/UCT-K0qO8z6NzWrywqefBPBQ',

2623

'only_matching': True,

2624

}, {

2625

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

2626

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2627

'info_dict': {

2628

'title': '29C3: Not my department',

2629

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2630

'uploader': 'Christiaan008',

2631

'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',

2632

},

2633

'playlist_count': 96,

2634

}, {

2635

'note': 'Large playlist',

2636

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

2637

'info_dict': {

2638

'title': 'Uploads from Cauchemar',

2639

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

2640

'uploader': 'Cauchemar',

2641

'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',

2642

},

2643

'playlist_mincount': 1123,

2644

}, {

2645

# even larger playlist, 8832 videos

2646

'url': 'http://www.youtube.com/user/NASAgovVideo/videos',

2647

'only_matching': True,

2648

}, {

2649

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

2650

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

2651

'info_dict': {

2652

'title': 'Uploads from Interstellar Movie',

2653

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

2654

'uploader': 'Interstellar Movie',

2655

'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',

2656

},

2657

'playlist_mincount': 21,

2658

}, {

2659

# https://github.com/ytdl-org/youtube-dl/issues/21844

2660

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2661

'info_dict': {

2662

'title': 'Data Analysis with Dr Mike Pound',

2663

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2664

'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',

2665

'uploader': 'Computerphile',

2666

},

2667

'playlist_mincount': 11,

2668

}, {

2669

'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',

2670

'only_matching': True,

2671

}, {

2672

# Playlist URL that does not actually serve a playlist

2673

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

2678

'uploader': 'STREEM',

2679

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

2680

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

2681

'upload_date': '20150526',

2682

'license': 'Standard YouTube License',

2683

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

2684

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

2689

},

2690

'params': {

2691

'skip_download': True,

2692

},

2693

'skip': 'This video is not available.',

2694

'add_ie': [YoutubeIE.ie_key()],

2695

}, {

2696

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

2697

'only_matching': True,

2698

}, {

2699

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

2700

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

2705

IGNORE = (YoutubeLiveIE,)

2706

return (

2707

False if any(ie.suitable(url) for ie in IGNORE)

2708

else super(YoutubeTabIE, cls).suitable(url))

2709

2710

def _extract_channel_id(self, webpage):

2711

channel_id = self._html_search_meta(

2712

'channelId', webpage, 'channel id', default=None)

2713

if channel_id:

2714

return channel_id

2715

channel_url = self._html_search_meta(

2716

('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',

2717

'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',

2718

'twitter:app:url:googleplay'), webpage, 'channel url')

2719

return self._search_regex(

2720

r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',

2721

channel_url, 'channel id')

2722

2723

@staticmethod

2724

def _extract_grid_item_renderer(item):

2725

for item_kind in ('Playlist', 'Video', 'Channel'):

2726

renderer = item.get('grid%sRenderer' % item_kind)

if renderer:

return renderer

def _extract_video(self, renderer):

2731

video_id = renderer.get('videoId')

2732

title = try_get(

2733

renderer,

2734

(lambda x: x['title']['runs'][0]['text'],

2735

lambda x: x['title']['simpleText']), compat_str)

2736

description = try_get(

2737

renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],

2738

compat_str)

2739

duration = parse_duration(try_get(

2740

renderer, lambda x: x['lengthText']['simpleText'], compat_str))

2741

view_count_text = try_get(

2742

renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

2743

view_count = str_to_int(self._search_regex(

2744

r'^([\d,]+)', re.sub(r'\s', '', view_count_text),

2745

'view count', default=None))

2746

uploader = try_get(

2747

renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

2748

return {

2749

'_type': 'url_transparent',

2750

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

2755

'duration': duration,

2756

'view_count': view_count,

2757

'uploader': uploader,

2758

}

2759

2760

def _grid_entries(self, grid_renderer):

2761

for item in grid_renderer['items']:

2762

if not isinstance(item, dict):

2763

continue

2764

renderer = self._extract_grid_item_renderer(item)

2765

if not isinstance(renderer, dict):

2766

continue

2767

title = try_get(

2768

renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

2769

# playlist

2770

playlist_id = renderer.get('playlistId')

2771

if playlist_id:

2772

yield self.url_result(

2773

'https://www.youtube.com/playlist?list=%s' % playlist_id,

2774

ie=YoutubeTabIE.ie_key(), video_id=playlist_id,

2775

video_title=title)

2776

# video

2777

video_id = renderer.get('videoId')

2778

if video_id:

2779

yield self._extract_video(renderer)

2780

# channel

2781

channel_id = renderer.get('channelId')

2782

if channel_id:

2783

title = try_get(

2784

renderer, lambda x: x['title']['simpleText'], compat_str)

2785

yield self.url_result(

2786

'https://www.youtube.com/channel/%s' % channel_id,

2787

ie=YoutubeTabIE.ie_key(), video_title=title)

2788

2789

def _shelf_entries_trimmed(self, shelf_renderer):

2790

renderer = try_get(

2791

shelf_renderer, lambda x: x['content']['horizontalListRenderer'], dict)

2792

if not renderer:

2793

return

2794

# TODO: add support for nested playlists so each shelf is processed

2795

# as separate playlist

2796

# TODO: this includes only first N items

2797

for entry in self._grid_entries(renderer):

2798

yield entry

2799

2800

def _shelf_entries(self, shelf_renderer):

2801

ep = try_get(

2802

shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],

2803

compat_str)

2804

shelf_url = urljoin('https://www.youtube.com', ep)

if not shelf_url:

return

title = try_get(

shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

2809

yield self.url_result(shelf_url, video_title=title)

2810

2811

def _playlist_entries(self, video_list_renderer):

2812

for content in video_list_renderer['contents']:

2813

if not isinstance(content, dict):

2814

continue

2815

renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')

2816

if not isinstance(renderer, dict):

2817

continue

2818

video_id = renderer.get('videoId')

2819

if not video_id:

2820

continue

2821

yield self._extract_video(renderer)

2822

2823

def _itemSection_entries(self, item_sect_renderer):

2824

for content in item_sect_renderer['contents']:

2825

if not isinstance(content, dict):

2826

continue

2827

renderer = content.get('videoRenderer', {})

2828

if not isinstance(renderer, dict):

2829

continue

2830

video_id = renderer.get('videoId')

2831

if not video_id:

2832

continue

2833

yield self._extract_video(renderer)

2834

2835

def _rich_entries(self, rich_grid_renderer):

2836

renderer = try_get(

2837

rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict)

2838

video_id = renderer.get('videoId')

2839

if not video_id:

2840

return

2841

yield self._extract_video(renderer)

2842

2843

def _video_entry(self, video_renderer):

2844

video_id = video_renderer.get('videoId')

2845

if video_id:

2846

return self._extract_video(video_renderer)

2847

2848

def _post_thread_entries(self, post_thread_renderer):

2849

post_renderer = try_get(

2850

post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)

2851

if not post_renderer:

2852

return

2853

# video attachment

2854

video_renderer = try_get(

2855

post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)

2856

video_id = None

2857

if video_renderer:

2858

entry = self._video_entry(video_renderer)

if entry:

yield entry

# inline video links

runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []

2863

for run in runs:

2864

if not isinstance(run, dict):

2865

continue

2866

ep_url = try_get(

2867

run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)

2868

if not ep_url:

2869

continue

2870

if not YoutubeIE.suitable(ep_url):

2871

continue

2872

ep_video_id = YoutubeIE._match_id(ep_url)

2873

if video_id == ep_video_id:

2874

continue

2875

yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)

2876

2877

def _post_thread_continuation_entries(self, post_thread_continuation):

2878

contents = post_thread_continuation.get('contents')

2879

if not isinstance(contents, list):

2880

return

2881

for content in contents:

2882

renderer = content.get('backstagePostThreadRenderer')

2883

if not isinstance(renderer, dict):

2884

continue

2885

for entry in self._post_thread_entries(renderer):

yield entry

@staticmethod

def _extract_next_continuation_data(renderer):

2890

next_continuation = try_get(

2891

renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)

2892

if not next_continuation:

2893

return

2894

continuation = next_continuation.get('continuation')

2895

if not continuation:

2896

return

2897

ctp = next_continuation.get('clickTrackingParams')

2898

return {

2899

'ctoken': continuation,

2900

'continuation': continuation,

'itct': ctp,

}

@classmethod

def _extract_continuation(cls, renderer):

2906

next_continuation = cls._extract_next_continuation_data(renderer)

2907

if next_continuation:

2908

return next_continuation

2909

contents = renderer.get('contents')

2910

if not isinstance(contents, list):

2911

return

2912

for content in contents:

2913

if not isinstance(content, dict):

2914

continue

2915

continuation_ep = try_get(

2916

content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],

2917

dict)

2918

if not continuation_ep:

2919

continue

2920

continuation = try_get(

2921

continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)

2922

if not continuation:

2923

continue

2924

ctp = continuation_ep.get('clickTrackingParams')

if not ctp:

continue

return {

'ctoken': continuation,

2929

'continuation': continuation,

'itct': ctp,

}

def _entries(self, tab, identity_token):

2934

2935

def extract_entries(parent_renderer):

2936

slr_contents = try_get(parent_renderer, lambda x: x['contents'], list) or []

2937

for slr_content in slr_contents:

2938

if not isinstance(slr_content, dict):

2939

continue

2940

is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)

2941

if not is_renderer:

2942

renderer = slr_content.get('richItemRenderer')

2943

if renderer:

2944

for entry in self._rich_entries(renderer):

2945

yield entry

2946

continuation_list[0] = self._extract_continuation(parent_renderer)

2947

continue

2948

isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []

2949

for isr_content in isr_contents:

2950

if not isinstance(isr_content, dict):

2951

continue

2952

renderer = isr_content.get('playlistVideoListRenderer')

2953

if renderer:

2954

for entry in self._playlist_entries(renderer):

2955

yield entry

2956

continuation_list[0] = self._extract_continuation(renderer)

2957

continue

2958

renderer = isr_content.get('gridRenderer')

2959

if renderer:

2960

for entry in self._grid_entries(renderer):

2961

yield entry

2962

continuation_list[0] = self._extract_continuation(renderer)

2963

continue

2964

renderer = isr_content.get('shelfRenderer')

2965

if renderer:

2966

for entry in self._shelf_entries(renderer):

2967

yield entry

2968

continuation_list[0] = self._extract_continuation(parent_renderer)

2969

continue

2970

renderer = isr_content.get('backstagePostThreadRenderer')

2971

if renderer:

2972

for entry in self._post_thread_entries(renderer):

2973

yield entry

2974

continuation_list[0] = self._extract_continuation(renderer)

2975

continue

2976

renderer = isr_content.get('videoRenderer')

2977

if renderer:

2978

entry = self._video_entry(renderer)

2979

if entry:

2980

yield entry

2981

if not continuation_list[0]:

2982

continuation_list[0] = self._extract_continuation(is_renderer)

2983

if not continuation_list[0]:

2984

continuation_list[0] = self._extract_continuation(parent_renderer)

2985

2986

continuation_list = [None] # Python 2 doesnot support nonlocal

2987

parent_renderer = (

2988

try_get(tab, lambda x: x['sectionListRenderer'], dict)

2989

or try_get(tab, lambda x: x['richGridRenderer'], dict) or {})

2990

if parent_renderer:

2991

for entry in extract_entries(parent_renderer):

2992

yield entry

2993

2994

continuation = continuation_list[0]

2995

2996

headers = {

2997

'x-youtube-client-name': '1',

2998

'x-youtube-client-version': '2.20201112.04.01',

2999

}

3000

if identity_token:

3001

headers['x-youtube-identity-token'] = identity_token

3002

3003

for page_num in itertools.count(1):

3004

if not continuation:

3005

break

3006

if hasattr(self, '_MAX_PAGES') and page_num > self._MAX_PAGES:

3007

break

3008

browse = self._download_json(

3009

'https://www.youtube.com/browse_ajax', None,

3010

'Downloading page %d' % page_num,

3011

headers=headers, query=continuation, fatal=False)

3012

if not browse:

3013

break

3014

response = try_get(browse, lambda x: x[1]['response'], dict)

if not response:

break

continuation_contents = try_get(

3019

response, lambda x: x['continuationContents'], dict)

3020

if continuation_contents:

3021

continuation_renderer = continuation_contents.get('playlistVideoListContinuation')

3022

if continuation_renderer:

3023

for entry in self._playlist_entries(continuation_renderer):

3024

yield entry

3025

continuation = self._extract_continuation(continuation_renderer)

3026

continue

3027

continuation_renderer = continuation_contents.get('gridContinuation')

3028

if continuation_renderer:

3029

for entry in self._grid_entries(continuation_renderer):

3030

yield entry

3031

continuation = self._extract_continuation(continuation_renderer)

3032

continue

3033

continuation_renderer = continuation_contents.get('itemSectionContinuation')

3034

if continuation_renderer:

3035

for entry in self._post_thread_continuation_entries(continuation_renderer):

3036

yield entry

3037

continuation = self._extract_continuation(continuation_renderer)

3038

continue

3039

continuation_renderer = continuation_contents.get('sectionListContinuation')

3040

if continuation_renderer:

3041

continuation_list = [None]

3042

for entry in extract_entries(continuation_renderer):

3043

yield entry

3044

continuation = continuation_list[0]

3045

continue

3046

3047

continuation_items = try_get(

3048

response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)

3049

if continuation_items:

3050

continuation_item = continuation_items[0]

3051

if not isinstance(continuation_item, dict):

3052

continue

3053

renderer = continuation_item.get('playlistVideoRenderer')

3054

if renderer:

3055

video_list_renderer = {'contents': continuation_items}

3056

for entry in self._playlist_entries(video_list_renderer):

3057

yield entry

3058

continuation = self._extract_continuation(video_list_renderer)

3059

continue

3060

renderer = continuation_item.get('itemSectionRenderer')

3061

if renderer:

3062

for entry in self._itemSection_entries(renderer):

3063

yield entry

3064

continuation = self._extract_continuation({'contents': continuation_items})

continue

break

@staticmethod

def _extract_selected_tab(tabs):

3070

for tab in tabs:

3071

if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):

3072

return tab['tabRenderer']

3073

else:

3074

raise ExtractorError('Unable to find selected tab')

3075

3076

@staticmethod

3077

def _extract_uploader(data):

3078

uploader = {}

3079

sidebar_renderer = try_get(

3080

data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)

3081

if sidebar_renderer:

3082

for item in sidebar_renderer:

3083

if not isinstance(item, dict):

3084

continue

3085

renderer = item.get('playlistSidebarSecondaryInfoRenderer')

3086

if not isinstance(renderer, dict):

3087

continue

3088

owner = try_get(

3089

renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)

3090

if owner:

3091

uploader['uploader'] = owner.get('text')

3092

uploader['uploader_id'] = try_get(

3093

owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)

3094

uploader['uploader_url'] = urljoin(

3095

'https://www.youtube.com/',

3096

try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))

3097

return uploader

3098

3099

def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):

3100

selected_tab = self._extract_selected_tab(tabs)

3101

renderer = try_get(

3102

data, lambda x: x['metadata']['channelMetadataRenderer'], dict)

3103

playlist_id = None

3104

if renderer:

3105

channel_title = renderer.get('title') or item_id

3106

tab_title = selected_tab.get('title')

3107

title = channel_title or item_id

3108

if tab_title:

3109

title += ' - %s' % tab_title

3110

description = renderer.get('description')

3111

playlist_id = renderer.get('externalId')

3112

renderer = try_get(

3113

data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)

3114

if renderer:

3115

title = renderer.get('title')

3116

description = None

3117

playlist_id = item_id

3118

if playlist_id is None:

3119

return None

3120

playlist = self.playlist_result(

3121

self._entries(selected_tab['content'], identity_token),

3122

playlist_id=playlist_id, playlist_title=title,

3123

playlist_description=description)

3124

playlist.update(self._extract_uploader(data))

3125

return playlist

3126

3127

def _extract_from_playlist(self, item_id, data, playlist):

3128

title = playlist.get('title') or try_get(

3129

data, lambda x: x['titleText']['simpleText'], compat_str)

3130

playlist_id = playlist.get('playlistId') or item_id

3131

return self.playlist_result(

3132

self._playlist_entries(playlist), playlist_id=playlist_id,

3133

playlist_title=title)

3134

3135

def _real_extract(self, url):

3136

item_id = self._match_id(url)

3137

url = compat_urlparse.urlunparse(

3138

compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))

3139

# Handle both video/playlist URLs

3140

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3141

video_id = qs.get('v', [None])[0]

3142

playlist_id = qs.get('list', [None])[0]

3143

if video_id and playlist_id:

3144

if self._downloader.params.get('noplaylist'):

3145

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

3146

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3147

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

3148

webpage = self._download_webpage(url, item_id)

3149

identity_token = self._search_regex(

3150

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

3151

'identity token', default=None)

3152

data = self._extract_yt_initial_data(item_id, webpage)

3153

tabs = try_get(

3154

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

3155

if tabs:

3156

return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)

3157

playlist = try_get(

3158

data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)

3159

if playlist:

3160

return self._extract_from_playlist(item_id, data, playlist)

3161

# Fallback to video extraction if no playlist alike page is recognized

3162

if video_id:

3163

return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)

3164

# Failed to recognize

3165

raise ExtractorError('Unable to recognize tab page')

3166

3167

3168

class YoutubePlaylistIE(InfoExtractor):

3169

IE_DESC = 'YouTube.com playlists'

3170

_VALID_URL = r'''(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us|

youtu\.be

)

/.*?\?.*?\blist=

)?

(?P<id>%(playlist_id)s)

3182

)''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

3183

IE_NAME = 'youtube:playlist'

3184

_TESTS = [{

3185

'note': 'issue #673',

3186

'url': 'PLBB231211A4F62143',

3187

'info_dict': {

3188

'title': '[OLD]Team Fortress 2 (Class-based LP)',

3189

'id': 'PLBB231211A4F62143',

3190

'uploader': 'Wickydoo',

3191

'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',

3192

},

3193

'playlist_mincount': 29,

3194

}, {

3195

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3196

'info_dict': {

3197

'title': 'YDL_safe_search',

3198

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

3199

},

3200

'playlist_count': 2,

3201

'skip': 'This playlist is private',

3202

}, {

3203

'note': 'embedded',

3204

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

3209

'uploader': 'milan',

3210

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

3211

}

3212

}, {

3213

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3214

'playlist_mincount': 982,

3215

'info_dict': {

3216

'title': '2018 Chinese New Singles (11/6 updated)',

3217

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

3218

'uploader': 'LBK',

3219

'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',

3220

}

3221

}, {

3222

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

3227

'uploader': 'Backus-Page House Museum',

3228

'uploader_id': 'backuspagemuseum',

3229

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

3230

'upload_date': '20161008',

3231

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

3232

'categories': ['Nonprofits & Activism'],

3233

'tags': list,

3234

'like_count': int,

3235

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

3240

},

3241

}, {

3242

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

3243

'only_matching': True,

3244

}, {

3245

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

3246

'only_matching': True,

3247

}, {

3248

# music album playlist

3249

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

3250

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3255

return False if YoutubeTabIE.suitable(url) else super(

3256

YoutubePlaylistIE, cls).suitable(url)

3257

3258

def _real_extract(self, url):

3259

playlist_id = self._match_id(url)

3260

qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3261

if not qs:

3262

qs = {'list': playlist_id}

3263

return self.url_result(

3264

update_url_query('https://www.youtube.com/playlist', qs),

3265

ie=YoutubeTabIE.ie_key(), video_id=playlist_id)

3266

3267

3268

class YoutubeYtUserIE(InfoExtractor):

3269

_VALID_URL = r'ytuser:(?P<id>.+)'

3270

_TESTS = [{

3271

'url': 'ytuser:phihag',

3272

'only_matching': True,

3273

}]

3274

3275

def _real_extract(self, url):

3276

user_id = self._match_id(url)

3277

return self.url_result(

3278

'https://www.youtube.com/user/%s' % user_id,

3279

ie=YoutubeTabIE.ie_key(), video_id=user_id)

3280

3281

3282

class YoutubeLiveIE(YoutubeBaseInfoExtractor):

3283

IE_DESC = 'YouTube.com live streams'

3284

_VALID_URL = r'(?P<base_url>%s)/live' % YoutubeTabIE._VALID_URL

3285

IE_NAME = 'youtube:live'

3286

3287

_TESTS = [{

3288

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

3293

'uploader': 'The Young Turks',

3294

'uploader_id': 'TheYoungTurks',

3295

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

3296

'upload_date': '20150715',

3297

'license': 'Standard YouTube License',

3298

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

3299

'categories': ['News & Politics'],

3300

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

3301

'like_count': int,

3302

'dislike_count': int,

3303

},

3304

'params': {

3305

'skip_download': True,

3306

},

3307

}, {

3308

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

3309

'only_matching': True,

3310

}, {

3311

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

3312

'only_matching': True,

3313

}, {

3314

'url': 'https://www.youtube.com/TheYoungTurks/live',

3315

'only_matching': True,

3316

}]

3317

3318

def _real_extract(self, url):

3319

mobj = re.match(self._VALID_URL, url)

3320

channel_id = mobj.group('id')

3321

base_url = mobj.group('base_url')

3322

webpage = self._download_webpage(url, channel_id, fatal=False)

3323

if webpage:

3324

page_type = self._og_search_property(

3325

'type', webpage, 'page type', default='')

3326

video_id = self._html_search_meta(

3327

'videoId', webpage, 'video id', default=None)

3328

if page_type.startswith('video') and video_id and re.match(

3329

r'^[0-9A-Za-z_-]{11}$', video_id):

3330

return self.url_result(video_id, YoutubeIE.ie_key())

3331

return self.url_result(base_url)

3332

3333

3334

class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):

3335

IE_DESC = 'YouTube.com searches'

3336

# there doesn't appear to be a real limit, for example if you search for

3337

# 'python' you get more than 8.000.000 results

3338

_MAX_RESULTS = float('inf')

3339

IE_NAME = 'youtube:search'

3340

_SEARCH_KEY = 'ytsearch'

3341

_SEARCH_PARAMS = None

3342

_TESTS = []

3343

3344

def _entries(self, query, n):

data = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

'query': query,

}

if self._SEARCH_PARAMS:

3355

data['params'] = self._SEARCH_PARAMS

3356

total = 0

3357

for page_num in itertools.count(1):

3358

search = self._download_json(

3359

'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

3360

video_id='query "%s"' % query,

3361

note='Downloading page %s' % page_num,

3362

errnote='Unable to download API page', fatal=False,

3363

data=json.dumps(data).encode('utf8'),

3364

headers={'content-type': 'application/json'})

3365

if not search:

3366

break

3367

slr_contents = try_get(

3368

search,

3369

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

3370

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

list)

if not slr_contents:

break

isr_contents = try_get(

3375

slr_contents,

3376

lambda x: x[0]['itemSectionRenderer']['contents'],

list)

if not isr_contents:

break

for content in isr_contents:

3381

if not isinstance(content, dict):

3382

continue

3383

video = content.get('videoRenderer')

3384

if not isinstance(video, dict):

3385

continue

3386

video_id = video.get('videoId')

3387

if not video_id:

3388

continue

3389

title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)

3390

description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)

3391

duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))

3392

view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

3393

view_count = int_or_none(self._search_regex(

3394

r'^(\d+)', re.sub(r'\s', '', view_count_text),

3395

'view count', default=None))

3396

uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

3397

total += 1

3398

yield {

3399

'_type': 'url_transparent',

3400

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

3405

'duration': duration,

3406

'view_count': view_count,

3407

'uploader': uploader,

}

if total == n:

return

token = try_get(

slr_contents,

lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],

compat_str)

if not token:

break

data['continuation'] = token

3418

3419

def _get_n_results(self, query, n):

3420

"""Get a specified number of results for a query"""

3421

return self.playlist_result(self._entries(query, n), query)

3422

3423

3424

class YoutubeSearchDateIE(YoutubeSearchIE):

3425

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

3426

_SEARCH_KEY = 'ytsearchdate'

3427

IE_DESC = 'YouTube.com searches, newest videos first'

3428

_SEARCH_PARAMS = 'CAI%3D'

3429

3430

3431

class YoutubeSearchURLIE(InfoExtractor):

3432

IE_DESC = 'YouTube.com search URLs'

3433

IE_NAME = 'youtube:search_url'

3434

_PARAM_REGEX = r''

3435

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results/?(?:\?|\?[^#]*?&)(?:sp=(?P<param1>[^&#]+)&(?:[^#]*&)?)?(?:q|search_query)=(?P<query>[^#&]+)(?:[^#]*?&sp=(?P<param2>[^#&]+))?'

3436

_MAX_RESULTS = 100

3437

_TESTS = [{

3438

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

3439

'playlist_mincount': 5,

3440

'info_dict': {

3441

'title': 'youtube-dl test video',

3442

}

3443

}, {

3444

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

3445

'only_matching': True,

3446

}]

3447

3448

def _real_extract(self, url):

3449

mobj = re.match(self._VALID_URL, url)

3450

query = compat_urllib_parse_unquote_plus(mobj.group('query'))

3451

IE = YoutubeSearchIE(self._downloader)

3452

IE._SEARCH_PARAMS = mobj.group('param1') or mobj.group('param2')

3453

self._downloader.to_screen(IE._SEARCH_PARAMS)

3454

IE._MAX_RESULTS = self._MAX_RESULTS

3455

return IE._get_n_results(query, self._MAX_RESULTS)

3456

3457

3458

class YoutubeFeedsInfoExtractor(YoutubeTabIE):

3459

"""

3460

Base class for feed extractors

3461

Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.

3462

"""

3463

_LOGIN_REQUIRED = True

_TESTS = []

# _MAX_PAGES = 5

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

3471

3472

def _real_initialize(self):

3473

self._login()

3474

3475

def _shelf_entries(self, shelf_renderer):

3476

renderer = try_get(shelf_renderer, lambda x: x['content']['gridRenderer'], dict)

3477

if not renderer:

3478

return

3479

for entry in self._grid_entries(renderer):

3480

yield entry

3481

3482

def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):

3483

selected_tab = self._extract_selected_tab(tabs)

3484

return self.playlist_result(

3485

self._entries(selected_tab['content'], identity_token),

3486

playlist_title=self._PLAYLIST_TITLE)

3487

3488

def _real_extract(self, url):

3489

item_id = self._FEED_NAME

3490

url = 'https://www.youtube.com/feed/%s' % self._FEED_NAME

3491

webpage = self._download_webpage(url, item_id)

3492

identity_token = self._search_regex(

3493

r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,

3494

'identity token', default=None)

3495

data = self._extract_yt_initial_data(item_id, webpage)

3496

tabs = try_get(

3497

data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)

3498

if tabs:

3499

return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)

3500

# Failed to recognize

3501

raise ExtractorError('Unable to recognize feed page')

3502

3503

3504

class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):

3505

IE_DESC = 'Youtube watch later list, ":ytwatchlater" or "WL" for short (requires authentication)'

3506

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater'

3507

_FEED_NAME = 'watchlater'

3508

3509

_TESTS = [{

3510

'url': 'https://www.youtube.com/feed/watch_later',

3511

'only_matching': True,

3512

}, {

3513

'url': ':ytwatchlater',

3514

'only_matching': True,

3515

}]

3516

3517

def _real_extract(self, url):

3518

return self.url_result('WL', ie=YoutubePlaylistIE.ie_key())

3519

3520

3521

class YoutubeFavouritesIE(YoutubeFeedsInfoExtractor):

3522

IE_DESC = 'YouTube.com liked videos, ":ytfav" or "LL" for short (requires authentication)'

3523

_VALID_URL = r':ytfav(?:ou?rite)s?'

3524

_FEED_NAME = 'favourites'

_TESTS = [{

'url': ':ytfav',

'only_matching': True,

3529

}]

3530

3531

def _real_extract(self, url):

3532

return self.url_result('LL', ie=YoutubePlaylistIE.ie_key())

3533

3534

3535

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

3536

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

3537

_VALID_URL = r'https?://(?:www\.)?youtube\.com(?:/feed/recommended|/?[?#]|/?$)|:ytrec(?:ommended)?'

3538

_FEED_NAME = 'recommended'

3539

_PLAYLIST_TITLE = 'Youtube Recommended videos'

3540

3541

3542

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

3543

IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'

3544

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsub(?:scription)?s?'

3545

_FEED_NAME = 'subscriptions'

3546

_PLAYLIST_TITLE = 'Youtube Subscriptions'

3547

3548

3549

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

3550

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

3551

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'

3552

_FEED_NAME = 'history'

3553

_PLAYLIST_TITLE = 'Youtube History'

3554

3555

3556

class YoutubeTruncatedURLIE(InfoExtractor):

3557

IE_NAME = 'youtube:truncated_url'

3558

IE_DESC = False # Do not list

3559

_VALID_URL = r'''(?x)

3560

(?:https?://)?

3561

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

3562

(?:watch\?(?:

3563

feature=[a-z_]+|

3564

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

3577

'only_matching': True,

3578

}, {

3579

'url': 'https://www.youtube.com/watch?',

3580

'only_matching': True,

3581

}, {

3582

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

3583

'only_matching': True,

3584

}, {

3585

'url': 'https://www.youtube.com/watch?feature=foo',

3586

'only_matching': True,

3587

}, {

3588

'url': 'https://www.youtube.com/watch?hl=en-GB',

3589

'only_matching': True,

3590

}, {

3591

'url': 'https://www.youtube.com/watch?t=2372',

3592

'only_matching': True,

3593

}]

3594

3595

def _real_extract(self, url):

3596

raise ExtractorError(

3597

'Did you forget to quote the URL? Remember that & is a meta '

3598

'character in most shells, so you want to put the URL in quotes, '

3599

'like youtube-dl '

3600

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

3601

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

3606

IE_NAME = 'youtube:truncated_id'

3607

IE_DESC = False # Do not list

3608

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

3609

3610

_TESTS = [{

3611

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

3612

'only_matching': True,

3613

}]

3614

3615

def _real_extract(self, url):

3616

video_id = self._match_id(url)

3617

raise ExtractorError(

3618

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

expected=True)

# Do Youtube show urls even exist anymore? I couldn't find any

3623

r'''

3624

class YoutubeShowIE(YoutubeTabIE):

3625

IE_DESC = 'YouTube.com (multi-season) shows'

3626

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

3627

IE_NAME = 'youtube:show'

3628

_TESTS = [{

3629

'url': 'https://www.youtube.com/show/airdisasters',

3630

'playlist_mincount': 5,

3631

'info_dict': {

3632

'id': 'airdisasters',

3633

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

3638

playlist_id = self._match_id(url)

3639

return super(YoutubeShowIE, self)._real_extract(

3640

'https://www.youtube.com/show/%s/playlists' % playlist_id)

3641

'''