jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_HTTPError,
	20	compat_kwargs,
	21	compat_parse_qs,
	22	compat_urllib_parse_unquote,
	23	compat_urllib_parse_unquote_plus,
	24	compat_urllib_parse_urlencode,
	25	compat_urllib_parse_urlparse,
	26	compat_urlparse,
	27	compat_str,
	28	)
	29	from ..utils import (
	30	bool_or_none,
	31	clean_html,
	32	error_to_compat_str,
	33	extract_attributes,
	34	ExtractorError,
	35	float_or_none,
	36	get_element_by_attribute,
	37	get_element_by_id,
	38	int_or_none,
	39	mimetype2ext,
	40	orderedSet,
	41	parse_codecs,
	42	parse_duration,
	43	remove_quotes,
	44	remove_start,
	45	smuggle_url,
	46	str_or_none,
	47	str_to_int,
	48	try_get,
	49	unescapeHTML,
	50	unified_strdate,
	51	unsmuggle_url,
	52	uppercase_escape,
	53	url_or_none,
	54	urlencode_postdata,
	55	)
	56
	57
	58	class YoutubeBaseInfoExtractor(InfoExtractor):
	59	"""Provide base functions for Youtube extractors"""
	60	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	61	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	62
	63	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	64	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	65	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	66
	67	_NETRC_MACHINE = 'youtube'
	68	# If True it will raise an error if no login info is provided
	69	_LOGIN_REQUIRED = False
	70
	71	_PLAYLIST_ID_RE = r'(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}'
	72
	73	_YOUTUBE_CLIENT_HEADERS = {
	74	'x-youtube-client-name': '1',
	75	'x-youtube-client-version': '1.20200609.04.02',
	76	}
	77
	78	def _set_language(self):
	79	self._set_cookie(
	80	'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
	81	# YouTube sets the expire time to about two months
	82	expire_time=time.time() + 2 * 30 * 24 * 3600)
	83
	84	def _ids_to_results(self, ids):
	85	return [
	86	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	87	for vid_id in ids]
	88
	89	def _login(self):
	90	"""
	91	Attempt to log in to YouTube.
	92	True is returned if successful or skipped.
	93	False is returned if login failed.
	94
	95	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	96	"""
	97	username, password = self._get_login_info()
	98	# No authentication to be performed
	99	if username is None:
	100	if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
	101	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	102	return True
	103
	104	login_page = self._download_webpage(
	105	self._LOGIN_URL, None,
	106	note='Downloading login page',
	107	errnote='unable to fetch login page', fatal=False)
	108	if login_page is False:
	109	return
	110
	111	login_form = self._hidden_inputs(login_page)
	112
	113	def req(url, f_req, note, errnote):
	114	data = login_form.copy()
	115	data.update({
	116	'pstMsg': 1,
	117	'checkConnection': 'youtube',
	118	'checkedDomains': 'youtube',
	119	'hl': 'en',
	120	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	121	'f.req': json.dumps(f_req),
	122	'flowName': 'GlifWebSignIn',
	123	'flowEntry': 'ServiceLogin',
	124	# TODO: reverse actual botguard identifier generation algo
	125	'bgRequest': '["identifier",""]',
	126	})
	127	return self._download_json(
	128	url, None, note=note, errnote=errnote,
	129	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	130	fatal=False,
	131	data=urlencode_postdata(data), headers={
	132	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	133	'Google-Accounts-XSRF': 1,
	134	})
	135
	136	def warn(message):
	137	self._downloader.report_warning(message)
	138
	139	lookup_req = [
	140	username,
	141	None, [], None, 'US', None, None, 2, False, True,
	142	[
	143	None, None,
	144	[2, 1, None, 1,
	145	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	146	None, [], 4],
	147	1, [None, None, []], None, None, None, True
	148	],
	149	username,
	150	]
	151
	152	lookup_results = req(
	153	self._LOOKUP_URL, lookup_req,
	154	'Looking up account info', 'Unable to look up account info')
	155
	156	if lookup_results is False:
	157	return False
	158
	159	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	160	if not user_hash:
	161	warn('Unable to extract user hash')
	162	return False
	163
	164	challenge_req = [
	165	user_hash,
	166	None, 1, None, [1, None, None, None, [password, None, True]],
	167	[
	168	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	169	1, [None, None, []], None, None, None, True
	170	]]
	171
	172	challenge_results = req(
	173	self._CHALLENGE_URL, challenge_req,
	174	'Logging in', 'Unable to log in')
	175
	176	if challenge_results is False:
	177	return
	178
	179	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	180	if login_res:
	181	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	182	warn(
	183	'Unable to login: %s' % 'Invalid password'
	184	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	185	return False
	186
	187	res = try_get(challenge_results, lambda x: x[0][-1], list)
	188	if not res:
	189	warn('Unable to extract result entry')
	190	return False
	191
	192	login_challenge = try_get(res, lambda x: x[0][0], list)
	193	if login_challenge:
	194	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	195	if challenge_str == 'TWO_STEP_VERIFICATION':
	196	# SEND_SUCCESS - TFA code has been successfully sent to phone
	197	# QUOTA_EXCEEDED - reached the limit of TFA codes
	198	status = try_get(login_challenge, lambda x: x[5], compat_str)
	199	if status == 'QUOTA_EXCEEDED':
	200	warn('Exceeded the limit of TFA codes, try later')
	201	return False
	202
	203	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	204	if not tl:
	205	warn('Unable to extract TL')
	206	return False
	207
	208	tfa_code = self._get_tfa_info('2-step verification code')
	209
	210	if not tfa_code:
	211	warn(
	212	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	213	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	214	return False
	215
	216	tfa_code = remove_start(tfa_code, 'G-')
	217
	218	tfa_req = [
	219	user_hash, None, 2, None,
	220	[
	221	9, None, None, None, None, None, None, None,
	222	[None, tfa_code, True, 2]
	223	]]
	224
	225	tfa_results = req(
	226	self._TFA_URL.format(tl), tfa_req,
	227	'Submitting TFA code', 'Unable to submit TFA code')
	228
	229	if tfa_results is False:
	230	return False
	231
	232	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	233	if tfa_res:
	234	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	235	warn(
	236	'Unable to finish TFA: %s' % 'Invalid TFA code'
	237	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	238	return False
	239
	240	check_cookie_url = try_get(
	241	tfa_results, lambda x: x[0][-1][2], compat_str)
	242	else:
	243	CHALLENGES = {
	244	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	245	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	246	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	247	}
	248	challenge = CHALLENGES.get(
	249	challenge_str,
	250	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	251	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	252	return False
	253	else:
	254	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	255
	256	if not check_cookie_url:
	257	warn('Unable to extract CheckCookie URL')
	258	return False
	259
	260	check_cookie_results = self._download_webpage(
	261	check_cookie_url, None, 'Checking cookie', fatal=False)
	262
	263	if check_cookie_results is False:
	264	return False
	265
	266	if 'https://myaccount.google.com/' not in check_cookie_results:
	267	warn('Unable to log in')
	268	return False
	269
	270	return True
	271
	272	def _download_webpage_handle(self, args, *kwargs):
	273	query = kwargs.get('query', {}).copy()
	274	query['disable_polymer'] = 'true'
	275	kwargs['query'] = query
	276	return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
	277	args, *compat_kwargs(kwargs))
	278
	279	def _real_initialize(self):
	280	if self._downloader is None:
	281	return
	282	self._set_language()
	283	if not self._login():
	284	return
	285
	286
	287	class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
	288	# Extract entries from page with "Load more" button
	289	def _entries(self, page, playlist_id):
	290	more_widget_html = content_html = page
	291	for page_num in itertools.count(1):
	292	for entry in self._process_page(content_html):
	293	yield entry
	294
	295	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	296	if not mobj:
	297	break
	298
	299	count = 0
	300	retries = 3
	301	while count <= retries:
	302	try:
	303	# Downloading page may result in intermittent 5xx HTTP error
	304	# that is usually worked around with a retry
	305	more = self._download_json(
	306	'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
	307	'Downloading page #%s%s'
	308	% (page_num, ' (retry #%d)' % count if count else ''),
	309	transform_source=uppercase_escape,
	310	headers=self._YOUTUBE_CLIENT_HEADERS)
	311	break
	312	except ExtractorError as e:
	313	if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
	314	count += 1
	315	if count <= retries:
	316	continue
	317	raise
	318
	319	content_html = more['content_html']
	320	if not content_html.strip():
	321	# Some webpages show a "Load more" button but they don't
	322	# have more videos
	323	break
	324	more_widget_html = more['load_more_widget_html']
	325
	326
	327	class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	328	def _process_page(self, content):
	329	for video_id, video_title in self.extract_videos_from_page(content):
	330	yield self.url_result(video_id, 'Youtube', video_id, video_title)
	331
	332	def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
	333	for mobj in re.finditer(video_re, page):
	334	# The link with index 0 is not the first video of the playlist (not sure if still actual)
	335	if 'index' in mobj.groupdict() and mobj.group('id') == '0':
	336	continue
	337	video_id = mobj.group('id')
	338	video_title = unescapeHTML(
	339	mobj.group('title')) if 'title' in mobj.groupdict() else None
	340	if video_title:
	341	video_title = video_title.strip()
	342	if video_title == '► Play all':
	343	video_title = None
	344	try:
	345	idx = ids_in_page.index(video_id)
	346	if video_title and not titles_in_page[idx]:
	347	titles_in_page[idx] = video_title
	348	except ValueError:
	349	ids_in_page.append(video_id)
	350	titles_in_page.append(video_title)
	351
	352	def extract_videos_from_page(self, page):
	353	ids_in_page = []
	354	titles_in_page = []
	355	self.extract_videos_from_page_impl(
	356	self._VIDEO_RE, page, ids_in_page, titles_in_page)
	357	return zip(ids_in_page, titles_in_page)
	358
	359
	360	class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	361	def _process_page(self, content):
	362	for playlist_id in orderedSet(re.findall(
	363	r'<h3[^>]+class="[^"]yt-lockup-title[^"]"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
	364	content)):
	365	yield self.url_result(
	366	'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
	367
	368	def _real_extract(self, url):
	369	playlist_id = self._match_id(url)
	370	webpage = self._download_webpage(url, playlist_id)
	371	title = self._og_search_title(webpage, fatal=False)
	372	return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
	373
	374
	375	class YoutubeIE(YoutubeBaseInfoExtractor):
	376	IE_DESC = 'YouTube.com'
	377	_VALID_URL = r"""(?x)^
	378	(
	379	(?:https?://\|//) # http(s):// or protocol-independent URL
	380	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie\|kids)?\.com/\|
	381	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	382	(?:www\.)?pwnyoutube\.com/\|
	383	(?:www\.)?hooktube\.com/\|
	384	(?:www\.)?yourepeat\.com/\|
	385	tube\.majestyc\.net/\|
	386	# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
	387	(?:(?:www\|dev)\.)?invidio\.us/\|
	388	(?:(?:www\|no)\.)?invidiou\.sh/\|
	389	(?:(?:www\|fi\|de)\.)?invidious\.snopyta\.org/\|
	390	(?:www\.)?invidious\.kabi\.tk/\|
	391	(?:www\.)?invidious\.13ad\.de/\|
	392	(?:www\.)?invidious\.mastodon\.host/\|
	393	(?:www\.)?invidious\.nixnet\.xyz/\|
	394	(?:www\.)?invidious\.drycat\.fr/\|
	395	(?:www\.)?tube\.poal\.co/\|
	396	(?:www\.)?vid\.wxzm\.sx/\|
	397	(?:www\.)?yewtu\.be/\|
	398	(?:www\.)?yt\.elukerio\.org/\|
	399	(?:www\.)?yt\.lelux\.fi/\|
	400	(?:www\.)?invidious\.ggc-project\.de/\|
	401	(?:www\.)?yt\.maisputain\.ovh/\|
	402	(?:www\.)?invidious\.13ad\.de/\|
	403	(?:www\.)?invidious\.toot\.koeln/\|
	404	(?:www\.)?invidious\.fdn\.fr/\|
	405	(?:www\.)?watch\.nettohikari\.com/\|
	406	(?:www\.)?kgg2m7yk5aybusll\.onion/\|
	407	(?:www\.)?qklhadlycap4cnod\.onion/\|
	408	(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/\|
	409	(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/\|
	410	(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/\|
	411	(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/\|
	412	(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/\|
	413	(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/\|
	414	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	415	(?:.*?\#/)? # handle anchor (#/) redirect urls
	416	(?: # the various things that can precede the ID:
	417	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	418	\|(?: # or the v= param in all its forms
	419	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	420	(?:\?\|\#!?) # the params delimiter ? or # or #!
	421	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	422	v=
	423	)
	424	))
	425	\|(?:
	426	youtu\.be\| # just youtu.be/xxxx
	427	vid\.plus\| # or vid.plus/xxxx
	428	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	429	)/
	430	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	431	)
	432	)? # all until now is optional -> you can pass the naked ID
	433	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	434	(?!.*?\blist=
	435	(?:
	436	%(playlist_id)s\| # combined list/video URLs are handled by the playlist IE
	437	WL # WL are handled by the watch later IE
	438	)
	439	)
	440	(?(1).+)? # if we found the ID, everything can follow
	441	$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
	442	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	443	_PLAYER_INFO_RE = (
	444	r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
	445	r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
	446	)
	447	_formats = {
	448	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	449	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	450	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	451	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	452	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	453	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	454	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	455	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	456	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	457	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	458	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	459	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	460	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	461	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	462	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	463	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	464	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	465	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	466
	467
	468	# 3D videos
	469	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	470	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	471	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	472	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	473	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	474	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	475	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	476
	477	# Apple HTTP Live Streaming
	478	'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	479	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	480	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	481	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	482	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	483	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	484	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	485	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	486
	487	# DASH mp4 video
	488	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
	489	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
	490	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	491	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
	492	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
	493	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
	494	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
	495	'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
	496	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
	497	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	498	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
	499	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
	500

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

compat_chr,

compat_HTTPError,

compat_kwargs,

compat_parse_qs,

compat_urllib_parse_unquote,

23

compat_urllib_parse_unquote_plus,

24

compat_urllib_parse_urlencode,

25

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

bool_or_none,

clean_html,

error_to_compat_str,

extract_attributes,

ExtractorError,

float_or_none,

get_element_by_attribute,

get_element_by_id,

int_or_none,

mimetype2ext,

orderedSet,

parse_codecs,

parse_duration,

remove_quotes,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

uppercase_escape,

url_or_none,

urlencode_postdata,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

59

"""Provide base functions for Youtube extractors"""

60

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

61

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

62

63

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

64

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

65

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

66

67

_NETRC_MACHINE = 'youtube'

68

# If True it will raise an error if no login info is provided

69

_LOGIN_REQUIRED = False

70

71

_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'

72

73

_YOUTUBE_CLIENT_HEADERS = {

74

'x-youtube-client-name': '1',

75

'x-youtube-client-version': '1.20200609.04.02',

76

}

77

78

def _set_language(self):

79

self._set_cookie(

80

'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',

81

# YouTube sets the expire time to about two months

82

expire_time=time.time() + 2 * 30 * 24 * 3600)

83

84

def _ids_to_results(self, ids):

85

return [

86

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

92

True is returned if successful or skipped.

93

False is returned if login failed.

94

95

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

96

"""

97

username, password = self._get_login_info()

98

# No authentication to be performed

99

if username is None:

100

if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

101

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

102

return True

103

104

login_page = self._download_webpage(

105

self._LOGIN_URL, None,

106

note='Downloading login page',

107

errnote='unable to fetch login page', fatal=False)

108

if login_page is False:

109

return

110

111

login_form = self._hidden_inputs(login_page)

112

113

def req(url, f_req, note, errnote):

114

data = login_form.copy()

115

data.update({

116

'pstMsg': 1,

117

'checkConnection': 'youtube',

118

'checkedDomains': 'youtube',

119

'hl': 'en',

120

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

121

'f.req': json.dumps(f_req),

122

'flowName': 'GlifWebSignIn',

123

'flowEntry': 'ServiceLogin',

124

# TODO: reverse actual botguard identifier generation algo

125

'bgRequest': '["identifier",""]',

126

})

127

return self._download_json(

128

url, None, note=note, errnote=errnote,

129

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

130

fatal=False,

131

data=urlencode_postdata(data), headers={

132

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

133

'Google-Accounts-XSRF': 1,

})

def warn(message):

self._downloader.report_warning(message)

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

146

None, [], 4],

147

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

153

self._LOOKUP_URL, lookup_req,

154

'Looking up account info', 'Unable to look up account info')

155

156

if lookup_results is False:

157

return False

158

159

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

160

if not user_hash:

161

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

167

[

168

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

169

1, [None, None, []], None, None, None, True

170

]]

171

172

challenge_results = req(

173

self._CHALLENGE_URL, challenge_req,

174

'Logging in', 'Unable to log in')

175

176

if challenge_results is False:

177

return

178

179

login_res = try_get(challenge_results, lambda x: x[0][5], list)

180

if login_res:

181

login_msg = try_get(login_res, lambda x: x[5], compat_str)

182

warn(

183

'Unable to login: %s' % 'Invalid password'

184

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

185

return False

186

187

res = try_get(challenge_results, lambda x: x[0][-1], list)

188

if not res:

189

warn('Unable to extract result entry')

190

return False

191

192

login_challenge = try_get(res, lambda x: x[0][0], list)

193

if login_challenge:

194

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

195

if challenge_str == 'TWO_STEP_VERIFICATION':

196

# SEND_SUCCESS - TFA code has been successfully sent to phone

197

# QUOTA_EXCEEDED - reached the limit of TFA codes

198

status = try_get(login_challenge, lambda x: x[5], compat_str)

199

if status == 'QUOTA_EXCEEDED':

200

warn('Exceeded the limit of TFA codes, try later')

201

return False

202

203

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

204

if not tl:

205

warn('Unable to extract TL')

206

return False

207

208

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

213

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

214

return False

215

216

tfa_code = remove_start(tfa_code, 'G-')

217

218

tfa_req = [

219

user_hash, None, 2, None,

220

[

221

9, None, None, None, None, None, None, None,

222

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

227

'Submitting TFA code', 'Unable to submit TFA code')

228

229

if tfa_results is False:

230

return False

231

232

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

233

if tfa_res:

234

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

235

warn(

236

'Unable to finish TFA: %s' % 'Invalid TFA code'

237

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

238

return False

239

240

check_cookie_url = try_get(

241

tfa_results, lambda x: x[0][-1][2], compat_str)

242

else:

243

CHALLENGES = {

244

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

245

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

246

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

247

}

248

challenge = CHALLENGES.get(

249

challenge_str,

250

'%s returned error %s.' % (self.IE_NAME, challenge_str))

251

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

252

return False

253

else:

254

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

255

256

if not check_cookie_url:

257

warn('Unable to extract CheckCookie URL')

258

return False

259

260

check_cookie_results = self._download_webpage(

261

check_cookie_url, None, 'Checking cookie', fatal=False)

262

263

if check_cookie_results is False:

264

return False

265

266

if 'https://myaccount.google.com/' not in check_cookie_results:

267

warn('Unable to log in')

return False

return True

def _download_webpage_handle(self, *args, **kwargs):

273

query = kwargs.get('query', {}).copy()

274

query['disable_polymer'] = 'true'

275

kwargs['query'] = query

276

return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(

277

*args, **compat_kwargs(kwargs))

278

279

def _real_initialize(self):

280

if self._downloader is None:

281

return

282

self._set_language()

283

if not self._login():

return

class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):

288

# Extract entries from page with "Load more" button

289

def _entries(self, page, playlist_id):

290

more_widget_html = content_html = page

291

for page_num in itertools.count(1):

292

for entry in self._process_page(content_html):

293

yield entry

294

295

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

count = 0

retries = 3

while count <= retries:

302

try:

303

# Downloading page may result in intermittent 5xx HTTP error

304

# that is usually worked around with a retry

305

more = self._download_json(

306

'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,

307

'Downloading page #%s%s'

308

% (page_num, ' (retry #%d)' % count if count else ''),

309

transform_source=uppercase_escape,

310

headers=self._YOUTUBE_CLIENT_HEADERS)

311

break

312

except ExtractorError as e:

313

if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):

count += 1

if count <= retries:

continue

raise

content_html = more['content_html']

320

if not content_html.strip():

321

# Some webpages show a "Load more" button but they don't

322

# have more videos

323

break

324

more_widget_html = more['load_more_widget_html']

325

326

327

class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

328

def _process_page(self, content):

329

for video_id, video_title in self.extract_videos_from_page(content):

330

yield self.url_result(video_id, 'Youtube', video_id, video_title)

331

332

def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):

333

for mobj in re.finditer(video_re, page):

334

# The link with index 0 is not the first video of the playlist (not sure if still actual)

335

if 'index' in mobj.groupdict() and mobj.group('id') == '0':

336

continue

337

video_id = mobj.group('id')

338

video_title = unescapeHTML(

339

mobj.group('title')) if 'title' in mobj.groupdict() else None

340

if video_title:

341

video_title = video_title.strip()

342

if video_title == '► Play all':

343

video_title = None

344

try:

345

idx = ids_in_page.index(video_id)

346

if video_title and not titles_in_page[idx]:

347

titles_in_page[idx] = video_title

348

except ValueError:

349

ids_in_page.append(video_id)

350

titles_in_page.append(video_title)

351

352

def extract_videos_from_page(self, page):

353

ids_in_page = []

354

titles_in_page = []

355

self.extract_videos_from_page_impl(

356

self._VIDEO_RE, page, ids_in_page, titles_in_page)

357

return zip(ids_in_page, titles_in_page)

358

359

360

class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

361

def _process_page(self, content):

362

for playlist_id in orderedSet(re.findall(

363

r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',

364

content)):

365

yield self.url_result(

366

'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

367

368

def _real_extract(self, url):

369

playlist_id = self._match_id(url)

370

webpage = self._download_webpage(url, playlist_id)

371

title = self._og_search_title(webpage, fatal=False)

372

return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)

373

374

375

class YoutubeIE(YoutubeBaseInfoExtractor):

376

IE_DESC = 'YouTube.com'

377

_VALID_URL = r"""(?x)^

378

(

379

(?:https?://|//) # http(s):// or protocol-independent URL

380

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|

381

(?:www\.)?deturl\.com/www\.youtube\.com/|

382

(?:www\.)?pwnyoutube\.com/|

383

(?:www\.)?hooktube\.com/|

384

(?:www\.)?yourepeat\.com/|

385

tube\.majestyc\.net/|

386

# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances

387

(?:(?:www|dev)\.)?invidio\.us/|

388

(?:(?:www|no)\.)?invidiou\.sh/|

389

(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|

390

(?:www\.)?invidious\.kabi\.tk/|

391

(?:www\.)?invidious\.13ad\.de/|

392

(?:www\.)?invidious\.mastodon\.host/|

393

(?:www\.)?invidious\.nixnet\.xyz/|

394

(?:www\.)?invidious\.drycat\.fr/|

395

(?:www\.)?tube\.poal\.co/|

396

(?:www\.)?vid\.wxzm\.sx/|

397

(?:www\.)?yewtu\.be/|

398

(?:www\.)?yt\.elukerio\.org/|

399

(?:www\.)?yt\.lelux\.fi/|

400

(?:www\.)?invidious\.ggc-project\.de/|

401

(?:www\.)?yt\.maisputain\.ovh/|

402

(?:www\.)?invidious\.13ad\.de/|

403

(?:www\.)?invidious\.toot\.koeln/|

404

(?:www\.)?invidious\.fdn\.fr/|

405

(?:www\.)?watch\.nettohikari\.com/|

406

(?:www\.)?kgg2m7yk5aybusll\.onion/|

407

(?:www\.)?qklhadlycap4cnod\.onion/|

408

(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|

409

(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|

410

(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|

411

(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|

412

(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|

413

(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|

414

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

415

(?:.*?\#/)? # handle anchor (#/) redirect urls

416

(?: # the various things that can precede the ID:

417

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

418

|(?: # or the v= param in all its forms

419

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

420

(?:\?|\#!?) # the params delimiter ? or # or #!

421

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

427

vid\.plus| # or vid.plus/xxxx

428

zwearz\.com/watch| # or zwearz.com/watch/xxxx

429

)/

430

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

431

)

432

)? # all until now is optional -> you can pass the naked ID

433

([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

434

(?!.*?\blist=

435

(?:

436

%(playlist_id)s| # combined list/video URLs are handled by the playlist IE

437

WL # WL are handled by the watch later IE

438

)

439

)

440

(?(1).+)? # if we found the ID, everything can follow

441

$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

442

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

443

_PLAYER_INFO_RE = (

444

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',

445

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',

446

)

447

_formats = {

448

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

449

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

450

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

451

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

452

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

453

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

454

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

455

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

456

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

457

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

458

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

459

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

460

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

461

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

462

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

463

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

464

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

465

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

470

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

471

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

472

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

473

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

474

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

475

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

476

477

# Apple HTTP Live Streaming

478

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

479

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

480

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

481

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

482

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

483

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

484

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

485

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

486

487

# DASH mp4 video

488

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

489

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

490

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

491

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

492

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

493

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

494

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

495

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

496

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

497

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

498

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

499

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

500

501

# Dash mp4 audio

502

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

503

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

504

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

505

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

506

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

507

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

508

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

509

510

# Dash webm

511

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

512

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

513

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

514

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

515

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

516

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

517

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

518

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

519

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

520

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

521

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

522

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

523

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

524

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

525

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

526

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

527

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

528

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

529

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

530

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

531

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

532

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

533

534

# Dash webm audio

535

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

536

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

537

538

# Dash webm audio with opus inside

539

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

540

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

541

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

542

543

# RTMP (unnamed)

544

'_rtmp': {'protocol': 'rtmp'},

545

546

# av01 video only formats sometimes served with "unknown" codecs

547

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

548

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

549

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

550

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

551

}

552

_SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt', 'json3')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

564

'uploader': 'Philipp Hagemeister',

565

'uploader_id': 'phihag',

566

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

567

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

568

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

569

'upload_date': '20121002',

570

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

571

'categories': ['Science & Technology'],

572

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',

583

'note': 'Test generic use_cipher_signature video (#897)',

'info_dict': {

'id': 'UxxajLWwzqY',

'ext': 'mp4',

'upload_date': '20120506',

588

'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',

589

'alt_title': 'I Love It (feat. Charli XCX)',

590

'description': 'md5:19a2f98d9032b9311e686ed039564f63',

591

'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',

592

'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',

593

'iconic ep', 'iconic', 'love', 'it'],

594

'duration': 180,

595

'uploader': 'Icona Pop',

596

'uploader_id': 'IconaPop',

597

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',

598

'creator': 'Icona Pop',

599

'track': 'I Love It (feat. Charli XCX)',

600

'artist': 'Icona Pop',

}

},

{

'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',

605

'note': 'Test VEVO video with age protection (#956)',

'info_dict': {

'id': '07FYdnEawAQ',

'ext': 'mp4',

'upload_date': '20130703',

610

'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',

611

'alt_title': 'Tunnel Vision',

612

'description': 'md5:07dab3356cde4199048e4c7cd93471e1',

613

'duration': 419,

614

'uploader': 'justintimberlakeVEVO',

615

'uploader_id': 'justintimberlakeVEVO',

616

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',

617

'creator': 'Justin Timberlake',

618

'track': 'Tunnel Vision',

619

'artist': 'Justin Timberlake',

'age_limit': 18,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

625

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

630

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

631

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

632

'uploader': 'SET India',

633

'uploader_id': 'setindia',

634

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',

640

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

645

'uploader': 'Philipp Hagemeister',

646

'uploader_id': 'phihag',

647

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

648

'upload_date': '20121002',

649

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

650

'categories': ['Science & Technology'],

651

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

656

},

657

'params': {

658

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

663

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

668

'uploader_id': '8KVIDEO',

669

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

670

'description': '',

671

'uploader': '8KVIDEO',

672

'title': 'UHDTV TEST 8K VIDEO.mp4'

673

},

674

'params': {

675

'youtube_include_dash_manifest': True,

676

'format': '141',

677

},

678

'skip': 'format 141 not served anymore',

679

},

680

# DASH manifest with encrypted signature

681

{

682

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',

687

'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',

688

'duration': 244,

689

'uploader': 'AfrojackVEVO',

690

'uploader_id': 'AfrojackVEVO',

691

'upload_date': '20131011',

692

},

693

'params': {

694

'youtube_include_dash_manifest': True,

695

'format': '141/bestaudio[ext=m4a]',

696

},

697

},

698

# JS player signature function name containing $

699

{

700

'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',

'info_dict': {

'id': 'nfWlot6h_JM',

'ext': 'm4a',

'title': 'Taylor Swift - Shake It Off',

705

'description': 'md5:307195cd21ff7fa352270fe884570ef0',

706

'duration': 242,

707

'uploader': 'TaylorSwiftVEVO',

708

'uploader_id': 'TaylorSwiftVEVO',

709

'upload_date': '20140818',

710

},

711

'params': {

712

'youtube_include_dash_manifest': True,

713

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

724

'uploader': 'Amazing Atheist',

725

'uploader_id': 'TheAmazingAtheist',

726

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

727

'title': 'Burning Everyone\'s Koran',

728

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

729

}

730

},

731

# Normal age-gate video (No vevo, embed allowed)

732

{

733

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

738

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

739

'duration': 142,

740

'uploader': 'The Witcher',

741

'uploader_id': 'WitcherGame',

742

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

743

'upload_date': '20140605',

'age_limit': 18,

},

},

# Age-gate video with encrypted signature

748

{

749

'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',

'info_dict': {

'id': '6kLq3WMV1nU',

'ext': 'mp4',

'title': 'Dedication To My Ex (Miss That) (Lyric Video)',

754

'description': 'md5:33765bb339e1b47e7e72b5490139bb41',

755

'duration': 246,

756

'uploader': 'LloydVEVO',

757

'uploader_id': 'LloydVEVO',

758

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',

759

'upload_date': '20110629',

'age_limit': 18,

},

},

# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)

764

# YouTube Red ad is not captured for creator

765

{

766

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

772

'uploader_id': 'deadmau5',

773

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

774

'creator': 'Dada Life, deadmau5',

775

'description': 'md5:12c56784b8032162bb936a5f76d55360',

776

'uploader': 'deadmau5',

777

'title': 'Deadmau5 - Some Chords (HD)',

778

'alt_title': 'This Machine Kills Some Chords',

779

},

780

'expected_warnings': [

781

'DASH manifest missing',

782

]

783

},

784

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

785

{

786

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

792

'uploader_id': 'olympic',

793

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

794

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

795

'uploader': 'Olympic',

796

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

797

},

798

'params': {

799

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

809

'duration': 85,

810

'upload_date': '20110310',

811

'uploader_id': 'AllenMeow',

812

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

813

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

814

'uploader': '孫ᄋᄅ',

815

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

816

},

817

},

818

# url_encoded_fmt_stream_map is empty string

819

{

820

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

825

'description': '',

826

'upload_date': '20150404',

827

'uploader_id': 'spbelect',

828

'uploader': 'Наблюдатели Петербурга',

829

},

830

'params': {

831

'skip_download': 'requires avconv',

832

},

833

'skip': 'This live event has ended.',

834

},

835

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

836

{

837

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

842

'description': 'md5:116377fd2963b81ec4ce64b542173306',

843

'duration': 220,

844

'upload_date': '20150625',

845

'uploader_id': 'dorappi2000',

846

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

847

'uploader': 'dorappi2000',

848

'formats': 'mincount:31',

849

},

850

'skip': 'not actual anymore',

851

},

852

# DASH manifest with segment_list

853

{

854

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

855

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

860

'uploader': 'Airtek',

861

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

862

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

863

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

864

},

865

'params': {

866

'youtube_include_dash_manifest': True,

867

'format': '135', # bestvideo

868

},

869

'skip': 'This live event has ended.',

870

},

871

{

872

# Multifeed videos (multiple cameras), URL is for Main Camera

873

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

874

'info_dict': {

875

'id': 'jqWvoWXjCVs',

876

'title': 'teamPGP: Rocket League Noob Stream',

877

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

884

'description': 'md5:dc7872fb300e143831327f1bae3af010',

885

'duration': 7335,

886

'upload_date': '20150721',

887

'uploader': 'Beer Games Beer',

888

'uploader_id': 'beergamesbeer',

889

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

890

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

897

'description': 'md5:dc7872fb300e143831327f1bae3af010',

898

'duration': 7337,

899

'upload_date': '20150721',

900

'uploader': 'Beer Games Beer',

901

'uploader_id': 'beergamesbeer',

902

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

903

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

910

'description': 'md5:dc7872fb300e143831327f1bae3af010',

911

'duration': 7337,

912

'upload_date': '20150721',

913

'uploader': 'Beer Games Beer',

914

'uploader_id': 'beergamesbeer',

915

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

916

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

923

'description': 'md5:dc7872fb300e143831327f1bae3af010',

924

'duration': 7334,

925

'upload_date': '20150721',

926

'uploader': 'Beer Games Beer',

927

'uploader_id': 'beergamesbeer',

928

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

929

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

934

},

935

'skip': 'This video is not available.',

936

},

937

{

938

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

939

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

940

'info_dict': {

941

'id': 'gVfLd0zydlo',

942

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

943

},

944

'playlist_count': 2,

945

'skip': 'Not multifeed anymore',

946

},

947

{

948

'url': 'https://vid.plus/FlRa-iH7PGw',

949

'only_matching': True,

950

},

951

{

952

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

953

'only_matching': True,

954

},

955

{

956

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

957

# Also tests cut-off URL expansion in video description (see

958

# https://github.com/ytdl-org/youtube-dl/issues/1892,

959

# https://github.com/ytdl-org/youtube-dl/issues/8164)

960

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

965

'alt_title': 'Dark Walk - Position Music',

966

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

967

'duration': 133,

968

'upload_date': '20151119',

969

'uploader_id': 'IronSoulElf',

970

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

971

'uploader': 'IronSoulElf',

972

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

973

'track': 'Dark Walk - Position Music',

974

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

975

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

976

},

977

'params': {

978

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

983

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

984

'only_matching': True,

985

},

986

{

987

# Video with yt:stretch=17:0

988

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

993

'description': 'md5:ee18a25c350637c8faff806845bddee9',

994

'upload_date': '20151107',

995

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

996

'uploader': 'CH GAMER DROID',

997

},

998

'params': {

999

'skip_download': True,

1000

},

1001

'skip': 'This video does not exist.',

1002

},

1003

{

1004

# Video licensed under Creative Commons

1005

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

1010

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

1011

'duration': 721,

1012

'upload_date': '20150127',

1013

'uploader_id': 'BerkmanCenter',

1014

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

1015

'uploader': 'The Berkman Klein Center for Internet & Society',

1016

'license': 'Creative Commons Attribution license (reuse allowed)',

1017

},

1018

'params': {

1019

'skip_download': True,

},

},

{

# Channel-like uploader_url

1024

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

1029

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

1030

'duration': 4060,

1031

'upload_date': '20151119',

1032

'uploader': 'Bernie Sanders',

1033

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

1034

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

1035

'license': 'Creative Commons Attribution license (reuse allowed)',

1036

},

1037

'params': {

1038

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

1043

'only_matching': True,

1044

},

1045

{

1046

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1047

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1048

'only_matching': True,

1049

},

1050

{

1051

# Rental video preview

1052

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1057

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1058

'upload_date': '20150811',

1059

'uploader': 'FlixMatrix',

1060

'uploader_id': 'FlixMatrixKaravan',

1061

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1062

'license': 'Standard YouTube License',

1063

},

1064

'params': {

1065

'skip_download': True,

1066

},

1067

'skip': 'This video is not available.',

1068

},

1069

{

1070

# YouTube Red video with episode data

1071

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1076

'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',

1077

'duration': 2085,

1078

'upload_date': '20170118',

1079

'uploader': 'Vsauce',

1080

'uploader_id': 'Vsauce',

1081

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1082

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

1088

},

1089

'expected_warnings': [

1090

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1095

# as inappropriate or offensive to some audiences.

1096

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1101

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1102

'duration': 965,

1103

'upload_date': '20140124',

1104

'uploader': 'New Century Foundation',

1105

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1106

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1107

},

1108

'params': {

1109

'skip_download': True,

},

},

{

# itag 212

'url': '1t24XAntNCY',

1115

'only_matching': True,

1116

},

1117

{

1118

# geo restricted to JP

1119

'url': 'sJL6WA-aGkQ',

1120

'only_matching': True,

1121

},

1122

{

1123

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

1124

'only_matching': True,

1125

},

1126

{

1127

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1128

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1133

'only_matching': True,

1134

},

1135

{

1136

# Video with unsupported adaptive stream type formats

1137

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1142

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1143

'duration': 433,

1144

'upload_date': '20130923',

1145

'uploader': 'Amelia Putri Harwita',

1146

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1147

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1148

'formats': 'maxcount:10',

1149

},

1150

'params': {

1151

'skip_download': True,

1152

'youtube_include_dash_manifest': False,

1153

},

1154

'skip': 'not actual anymore',

1155

},

1156

{

1157

# Youtube Music Auto-generated description

1158

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1163

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1164

'upload_date': '20190312',

1165

'uploader': 'Stephen - Topic',

1166

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1167

'artist': 'Stephen',

1168

'track': 'Voyeur Girl',

1169

'album': 'it\'s too much love to know my dear',

1170

'release_date': '20190313',

1171

'release_year': 2019,

1172

},

1173

'params': {

1174

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1179

# Retrieve 'artist' field from 'Artist:' in video description

1180

# when it is present on youtube music video

1181

'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',

'info_dict': {

'id': 'k0jLE7tTwjY',

'ext': 'mp4',

'title': 'Latch Feat. Sam Smith',

1186

'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',

1187

'upload_date': '20150110',

1188

'uploader': 'Various Artists - Topic',

1189

'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',

1190

'artist': 'Disclosure',

1191

'track': 'Latch Feat. Sam Smith',

1192

'album': 'Latch Featuring Sam Smith',

1193

'release_date': '20121008',

1194

'release_year': 2012,

1195

},

1196

'params': {

1197

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1202

# handle multiple artists on youtube music video

1203

'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',

'info_dict': {

'id': '74qn0eJSjpA',

'ext': 'mp4',

'title': 'Eastside',

'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',

1209

'upload_date': '20180710',

1210

'uploader': 'Benny Blanco - Topic',

1211

'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',

1212

'artist': 'benny blanco, Halsey, Khalid',

1213

'track': 'Eastside',

1214

'album': 'Eastside',

1215

'release_date': '20180713',

1216

'release_year': 2018,

1217

},

1218

'params': {

1219

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1224

# handle youtube music video with release_year and no release_date

1225

'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',

'info_dict': {

'id': '-hcAI0g-f5M',

'ext': 'mp4',

'title': 'Put It On Me',

1230

'description': 'md5:f6422397c07c4c907c6638e1fee380a5',

1231

'upload_date': '20180426',

1232

'uploader': 'Matt Maeson - Topic',

1233

'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',

1234

'artist': 'Matt Maeson',

1235

'track': 'Put It On Me',

1236

'album': 'The Hearse',

1237

'release_date': None,

1238

'release_year': 2018,

1239

},

1240

'params': {

1241

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1246

'only_matching': True,

1247

},

1248

{

1249

# invalid -> valid video id redirection

1250

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1255

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1256

'upload_date': '20090125',

1257

'uploader': 'Prochorowka',

1258

'uploader_id': 'Prochorowka',

1259

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1260

'artist': 'Panjabi MC',

1261

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1262

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1263

},

1264

'params': {

1265

'skip_download': True,

},

}

]

def __init__(self, *args, **kwargs):

1271

super(YoutubeIE, self).__init__(*args, **kwargs)

1272

self._player_cache = {}

1273

1274

def report_video_info_webpage_download(self, video_id):

1275

"""Report attempt to download video info webpage."""

1276

self.to_screen('%s: Downloading video info webpage' % video_id)

1277

1278

def report_information_extraction(self, video_id):

1279

"""Report attempt to extract video information."""

1280

self.to_screen('%s: Extracting video information' % video_id)

1281

1282

def report_unavailable_format(self, video_id, format):

1283

"""Report extracted video URL."""

1284

self.to_screen('%s: Format %s not available' % (video_id, format))

1285

1286

def report_rtmp_download(self):

1287

"""Indicate the download will use the RTMP protocol."""

1288

self.to_screen('RTMP download detected')

1289

1290

def _signature_cache_id(self, example_sig):

1291

""" Return a string representation of a signature """

1292

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1293

1294

@classmethod

1295

def _extract_player_info(cls, player_url):

1296

for player_re in cls._PLAYER_INFO_RE:

1297

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1302

return id_m.group('ext'), id_m.group('id')

1303

1304

def _extract_signature_function(self, video_id, player_url, example_sig):

1305

player_type, player_id = self._extract_player_info(player_url)

1306

1307

# Read from filesystem cache

1308

func_id = '%s_%s_%s' % (

1309

player_type, player_id, self._signature_cache_id(example_sig))

1310

assert os.path.basename(func_id) == func_id

1311

1312

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1313

if cache_spec is not None:

1314

return lambda s: ''.join(s[i] for i in cache_spec)

1315

1316

download_note = (

1317

'Downloading player %s' % player_url

1318

if self._downloader.params.get('verbose') else

1319

'Downloading %s player %s' % (player_type, player_id)

1320

)

1321

if player_type == 'js':

1322

code = self._download_webpage(

1323

player_url, video_id,

1324

note=download_note,

1325

errnote='Download of %s failed' % player_url)

1326

res = self._parse_sig_js(code)

1327

elif player_type == 'swf':

1328

urlh = self._request_webpage(

1329

player_url, video_id,

1330

note=download_note,

1331

errnote='Download of %s failed' % player_url)

1332

code = urlh.read()

1333

res = self._parse_sig_swf(code)

1334

else:

1335

assert False, 'Invalid player type %r' % player_type

1336

1337

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1338

cache_res = res(test_string)

1339

cache_spec = [ord(c) for c in cache_res]

1340

1341

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1342

return res

1343

1344

def _print_sig_code(self, func, example_sig):

1345

def gen_sig_code(idxs):

1346

def _genslice(start, end, step):

1347

starts = '' if start == 0 else str(start)

1348

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1349

steps = '' if step == 1 else (':%d' % step)

1350

return 's[%s%s%s]' % (starts, ends, steps)

1351

1352

step = None

1353

# Quelch pyflakes warnings - start will be set when step is set

1354

start = '(Never used)'

1355

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1360

step = None

1361

continue

1362

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1372

1373

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1374

cache_res = func(test_string)

1375

cache_spec = [ord(c) for c in cache_res]

1376

expr_code = ' + '.join(gen_sig_code(cache_spec))

1377

signature_id_tuple = '(%s)' % (

1378

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1379

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1380

' return %s\n') % (signature_id_tuple, expr_code)

1381

self.to_screen('Extracted signature function:\n' + code)

1382

1383

def _parse_sig_js(self, jscode):

1384

funcname = self._search_regex(

1385

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1386

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1387

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1388

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1389

# Obsolete patterns

1390

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1391

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1392

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1393

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1394

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1395

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1396

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1397

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1398

jscode, 'Initial JS player signature function name', group='sig')

1399

1400

jsi = JSInterpreter(jscode)

1401

initial_function = jsi.extract_function(funcname)

1402

return lambda s: initial_function([s])

1403

1404

def _parse_sig_swf(self, file_contents):

1405

swfi = SWFInterpreter(file_contents)

1406

TARGET_CLASSNAME = 'SignatureDecipher'

1407

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1408

initial_function = swfi.extract_function(searched_class, 'decipher')

1409

return lambda s: initial_function([s])

1410

1411

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1412

"""Turn the encrypted s field into a working signature"""

1413

1414

if player_url is None:

1415

raise ExtractorError('Cannot decrypt signature without player_url')

1416

1417

if player_url.startswith('//'):

1418

player_url = 'https:' + player_url

1419

elif not re.match(r'https?://', player_url):

1420

player_url = compat_urlparse.urljoin(

1421

'https://www.youtube.com', player_url)

1422

try:

1423

player_id = (player_url, self._signature_cache_id(s))

1424

if player_id not in self._player_cache:

1425

func = self._extract_signature_function(

1426

video_id, player_url, s

1427

)

1428

self._player_cache[player_id] = func

1429

func = self._player_cache[player_id]

1430

if self._downloader.params.get('youtube_print_sig_code'):

1431

self._print_sig_code(func, s)

1432

return func(s)

1433

except Exception as e:

1434

tb = traceback.format_exc()

1435

raise ExtractorError(

1436

'Signature extraction failed: ' + tb, cause=e)

1437

1438

def _get_subtitles(self, video_id, webpage, has_live_chat_replay):

1439

try:

1440

subs_doc = self._download_xml(

1441

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1442

video_id, note=False)

1443

except ExtractorError as err:

1444

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1449

lang = track.attrib['lang_code']

1450

if lang in sub_lang_list:

1451

continue

1452

sub_formats = []

1453

for ext in self._SUBTITLE_FORMATS:

1454

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1459

})

1460

sub_formats.append({

1461

'url': 'https://www.youtube.com/api/timedtext?' + params,

1462

'ext': ext,

1463

})

1464

sub_lang_list[lang] = sub_formats

1465

if has_live_chat_replay:

1466

sub_lang_list['live_chat'] = [

1467

{

1468

'video_id': video_id,

1469

'ext': 'json',

1470

'protocol': 'youtube_live_chat_replay',

1471

},

1472

]

1473

if not sub_lang_list:

1474

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1479

patterns = (

1480

# User data may contain arbitrary character sequences that may affect

1481

# JSON extraction with regex, e.g. when '};' is contained the second

1482

# regex won't capture the whole JSON. Yet working around by trying more

1483

# concrete regex first keeping in mind proper quoted string handling

1484

# to be implemented in future that will replace this workaround (see

1485

# https://github.com/ytdl-org/youtube-dl/issues/7468,

1486

# https://github.com/ytdl-org/youtube-dl/pull/7599)

1487

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1488

r';ytplayer\.config\s*=\s*({.+?});',

1489

)

1490

config = self._search_regex(

1491

patterns, webpage, 'ytplayer.config', default=None)

1492

if config:

1493

return self._parse_json(

1494

uppercase_escape(config), video_id, fatal=False)

1495

1496

def _get_yt_initial_data(self, video_id, webpage):

1497

config = self._search_regex(

1498

(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',

1499

r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),

1500

webpage, 'ytInitialData', default=None)

1501

if config:

1502

return self._parse_json(

1503

uppercase_escape(config), video_id, fatal=False)

1504

1505

def _get_automatic_captions(self, video_id, webpage):

1506

"""We need the webpage for getting the captions url, pass it as an

1507

argument to speed up the process."""

1508

self.to_screen('%s: Looking for automatic captions' % video_id)

1509

player_config = self._get_ytplayer_config(video_id, webpage)

1510

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1511

if not player_config:

1512

self._downloader.report_warning(err_msg)

1513

return {}

1514

try:

1515

args = player_config['args']

1516

caption_url = args.get('ttsurl')

1517

if caption_url:

1518

timestamp = args['timestamp']

1519

# We get the available subtitles

1520

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1526

caption_list = self._download_xml(list_url, video_id)

1527

original_lang_node = caption_list.find('track')

1528

if original_lang_node is None:

1529

self._downloader.report_warning('Video doesn\'t have automatic captions')

1530

return {}

1531

original_lang = original_lang_node.attrib['lang_code']

1532

caption_kind = original_lang_node.attrib.get('kind', '')

1533

1534

sub_lang_list = {}

1535

for lang_node in caption_list.findall('target'):

1536

sub_lang = lang_node.attrib['lang_code']

1537

sub_formats = []

1538

for ext in self._SUBTITLE_FORMATS:

1539

params = compat_urllib_parse_urlencode({

1540

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1545

})

1546

sub_formats.append({

1547

'url': caption_url + '&' + params,

1548

'ext': ext,

1549

})

1550

sub_lang_list[sub_lang] = sub_formats

1551

return sub_lang_list

1552

1553

def make_captions(sub_url, sub_langs):

1554

parsed_sub_url = compat_urllib_parse_urlparse(sub_url)

1555

caption_qs = compat_parse_qs(parsed_sub_url.query)

1556

captions = {}

1557

for sub_lang in sub_langs:

1558

sub_formats = []

1559

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(

1565

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

captions[sub_lang] = sub_formats

1571

return captions

1572

1573

# New captions format as of 22.06.2017

1574

player_response = args.get('player_response')

1575

if player_response and isinstance(player_response, compat_str):

1576

player_response = self._parse_json(

1577

player_response, video_id, fatal=False)

1578

if player_response:

1579

renderer = player_response['captions']['playerCaptionsTracklistRenderer']

1580

caption_tracks = renderer['captionTracks']

1581

for caption_track in caption_tracks:

1582

if 'kind' not in caption_track:

1583

# not an automatic transcription

1584

continue

1585

base_url = caption_track['baseUrl']

1586

sub_lang_list = []

1587

for lang in renderer['translationLanguages']:

1588

lang_code = lang.get('languageCode')

1589

if lang_code:

1590

sub_lang_list.append(lang_code)

1591

return make_captions(base_url, sub_lang_list)

1592

1593

self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)

1594

return {}

1595

# Some videos don't provide ttsurl but rather caption_tracks and

1596

# caption_translation_languages (e.g. 20LmZk1hakA)

1597

# Does not used anymore as of 22.06.2017

1598

caption_tracks = args['caption_tracks']

1599

caption_translation_languages = args['caption_translation_languages']

1600

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1601

sub_lang_list = []

1602

for lang in caption_translation_languages.split(','):

1603

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1604

sub_lang = lang_qs.get('lc', [None])[0]

1605

if sub_lang:

1606

sub_lang_list.append(sub_lang)

1607

return make_captions(caption_url, sub_lang_list)

1608

# An extractor error can be raise by the download process if there are

1609

# no automatic captions but there are subtitles

1610

except (KeyError, IndexError, ExtractorError):

1611

self._downloader.report_warning(err_msg)

1612

return {}

1613

1614

def _mark_watched(self, video_id, video_info, player_response):

1615

playback_url = url_or_none(try_get(

1616

player_response,

1617

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(

1618

video_info, lambda x: x['videostats_playback_base_url'][0]))

1619

if not playback_url:

1620

return

1621

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1622

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1623

1624

# cpn generation algorithm is reverse engineered from base.js.

1625

# In fact it works even with dummy cpn.

1626

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1627

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1634

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1635

1636

self._download_webpage(

1637

playback_url, video_id, 'Marking watched',

1638

'Unable to mark watched', fatal=False)

1639

1640

@staticmethod

1641

def _extract_urls(webpage):

1642

# Embedded YouTube player

1643

entries = [

1644

unescapeHTML(mobj.group('url'))

1645

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1656

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1657

\1''', webpage)]

1658

1659

# lazyYT YouTube embed

1660

entries.extend(list(map(

1661

unescapeHTML,

1662

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1663

1664

# Wordpress "YouTube Video Importer" plugin

1665

matches = re.findall(r'''(?x)<div[^>]+

1666

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1667

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1668

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1674

urls = YoutubeIE._extract_urls(webpage)

1675

return urls[0] if urls else None

1676

1677

@classmethod

1678

def extract_id(cls, url):

1679

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1680

if mobj is None:

1681

raise ExtractorError('Invalid URL: %s' % url)

1682

video_id = mobj.group(2)

1683

return video_id

1684

1685

def _extract_chapters_from_json(self, webpage, video_id, duration):

1686

if not webpage:

1687

return

1688

initial_data = self._parse_json(

1689

self._search_regex(

1690

r'window\["ytInitialData"\] = (.+);\n', webpage,

1691

'player args', default='{}'),

1692

video_id, fatal=False)

1693

if not initial_data or not isinstance(initial_data, dict):

1694

return

1695

chapters_list = try_get(

1696

initial_data,

1697

lambda x: x['playerOverlays']

1698

['playerOverlayRenderer']

1699

['decoratedPlayerBarRenderer']

1700

['decoratedPlayerBarRenderer']

1701

['playerBar']

1702

['chapteredPlayerBarRenderer']

1703

['chapters'],

1704

list)

1705

if not chapters_list:

1706

return

1707

1708

def chapter_time(chapter):

1709

return float_or_none(

1710

try_get(

1711

chapter,

1712

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1717

start_time = chapter_time(chapter)

1718

if start_time is None:

1719

continue

1720

end_time = (chapter_time(chapters_list[next_num])

1721

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1726

compat_str)

1727

chapters.append({

1728

'start_time': start_time,

1729

'end_time': end_time,

'title': title,

})

return chapters

@staticmethod

def _extract_chapters_from_description(description, duration):

1736

if not description:

1737

return None

1738

chapter_lines = re.findall(

1739

r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',

1740

description)

1741

if not chapter_lines:

1742

return None

1743

chapters = []

1744

for next_num, (chapter_line, time_point) in enumerate(

1745

chapter_lines, start=1):

1746

start_time = parse_duration(time_point)

1747

if start_time is None:

1748

continue

1749

if start_time > duration:

1750

break

1751

end_time = (duration if next_num == len(chapter_lines)

1752

else parse_duration(chapter_lines[next_num][1]))

1753

if end_time is None:

1754

continue

1755

if end_time > duration:

1756

end_time = duration

1757

if start_time > end_time:

1758

break

1759

chapter_title = re.sub(

1760

r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')

1761

chapter_title = re.sub(r'\s+', ' ', chapter_title)

1762

chapters.append({

1763

'start_time': start_time,

1764

'end_time': end_time,

1765

'title': chapter_title,

})

return chapters

def _extract_chapters(self, webpage, description, video_id, duration):

1770

return (self._extract_chapters_from_json(webpage, video_id, duration)

1771

or self._extract_chapters_from_description(description, duration))

1772

1773

def _real_extract(self, url):

1774

url, smuggled_data = unsmuggle_url(url, {})

1775

1776

proto = (

1777

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1783

for component in [parsed_url.fragment, parsed_url.query]:

1784

query = compat_parse_qs(component)

1785

if start_time is None and 't' in query:

1786

start_time = parse_duration(query['t'][0])

1787

if start_time is None and 'start' in query:

1788

start_time = parse_duration(query['start'][0])

1789

if end_time is None and 'end' in query:

1790

end_time = parse_duration(query['end'][0])

1791

1792

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1793

mobj = re.search(self._NEXT_URL_RE, url)

1794

if mobj:

1795

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1796

video_id = self.extract_id(url)

1797

1798

# Get video webpage

1799

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1800

video_webpage, urlh = self._download_webpage_handle(url, video_id)

1801

1802

qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)

1803

video_id = qs.get('v', [None])[0] or video_id

1804

1805

# Attempt to extract SWF player URL

1806

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1807

if mobj is not None:

1808

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1815

dash_mpd = video_info.get('dashmpd')

1816

if dash_mpd and dash_mpd[0] not in dash_mpds:

1817

dash_mpds.append(dash_mpd[0])

1818

1819

def add_dash_mpd_pr(pl_response):

1820

dash_mpd = url_or_none(try_get(

1821

pl_response, lambda x: x['streamingData']['dashManifestUrl'],

1822

compat_str))

1823

if dash_mpd and dash_mpd not in dash_mpds:

1824

dash_mpds.append(dash_mpd)

is_live = None

view_count = None

def extract_view_count(v_info):

1830

return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))

1831

1832

def extract_player_response(player_response, video_id):

1833

pl_response = str_or_none(player_response)

1834

if not pl_response:

1835

return

1836

pl_response = self._parse_json(pl_response, video_id, fatal=False)

1837

if isinstance(pl_response, dict):

1838

add_dash_mpd_pr(pl_response)

return pl_response

player_response = {}

# Get video info

video_info = {}

embed_webpage = None

if self._html_search_meta('og:restrictions:age', video_webpage, default=None) == "18+":

1847

age_gate = True

1848

# We simulate the access to the video from www.youtube.com/v/{video_id}

1849

# this can be viewed without login into Youtube

1850

url = proto + '://www.youtube.com/embed/%s' % video_id

1851

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1852

data = compat_urllib_parse_urlencode({

1853

'video_id': video_id,

1854

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1855

'sts': self._search_regex(

1856

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1857

})

1858

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1859

try:

1860

video_info_webpage = self._download_webpage(

1861

video_info_url, video_id,

1862

note='Refetching age-gated info webpage',

1863

errnote='unable to download video info webpage')

1864

except ExtractorError:

1865

video_info_webpage = None

1866

if video_info_webpage:

1867

video_info = compat_parse_qs(video_info_webpage)

1868

pl_response = video_info.get('player_response', [None])[0]

1869

player_response = extract_player_response(pl_response, video_id)

1870

add_dash_mpd(video_info)

1871

view_count = extract_view_count(video_info)

1872

else:

1873

age_gate = False

1874

# Try looking directly into the video webpage

1875

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1876

if ytplayer_config:

1877

args = ytplayer_config['args']

1878

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1879

# Convert to the same format returned by compat_parse_qs

1880

video_info = dict((k, [v]) for k, v in args.items())

1881

add_dash_mpd(video_info)

1882

# Rental video is not rented but preview is available (e.g.

1883

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1884

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1885

if not video_info and args.get('ypc_vid'):

1886

return self.url_result(

1887

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1888

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1889

is_live = True

1890

if not player_response:

1891

player_response = extract_player_response(args.get('player_response'), video_id)

1892

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1893

add_dash_mpd_pr(player_response)

1894

1895

def extract_unavailable_message():

1896

messages = []

1897

for tag, kind in (('h1', 'message'), ('div', 'submessage')):

1898

msg = self._html_search_regex(

1899

r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),

1900

video_webpage, 'unavailable %s' % kind, default=None)

if msg:

messages.append(msg)

if messages:

return '\n'.join(messages)

1905

1906

if not video_info and not player_response:

1907

unavailable_message = extract_unavailable_message()

1908

if not unavailable_message:

1909

unavailable_message = 'Unable to extract video data'

1910

raise ExtractorError(

1911

'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)

1912

1913

if not isinstance(video_info, dict):

1914

video_info = {}

1915

1916

video_details = try_get(

1917

player_response, lambda x: x['videoDetails'], dict) or {}

1918

1919

microformat = try_get(

1920

player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}

1921

1922

video_title = video_info.get('title', [None])[0] or video_details.get('title')

1923

if not video_title:

1924

self._downloader.report_warning('Unable to extract video title')

1925

video_title = '_'

1926

1927

description_original = video_description = get_element_by_id("eow-description", video_webpage)

1928

if video_description:

1929

1930

def replace_url(m):

1931

redir_url = compat_urlparse.urljoin(url, m.group(1))

1932

parsed_redir_url = compat_urllib_parse_urlparse(redir_url)

1933

if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':

1934

qs = compat_parse_qs(parsed_redir_url.query)

q = qs.get('q')

if q and q[0]:

return q[0]

return redir_url

description_original = video_description = re.sub(r'''(?x)

1941

<a\s+

1942

(?:[a-zA-Z-]+="[^"]*"\s+)*?

1943

(?:title|href)="([^"]+)"\s+

1944

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', replace_url, video_description)

1949

video_description = clean_html(video_description)

1950

else:

1951

video_description = video_details.get('shortDescription') or self._html_search_meta('description', video_webpage)

1952

1953

if not smuggled_data.get('force_singlefeed', False):

1954

if not self._downloader.params.get('noplaylist'):

1955

multifeed_metadata_list = try_get(

1956

player_response,

1957

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

1958

compat_str) or try_get(

1959

video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)

1960

if multifeed_metadata_list:

1961

entries = []

1962

feed_ids = []

1963

for feed in multifeed_metadata_list.split(','):

1964

# Unquote should take place before split on comma (,) since textual

1965

# fields may contain comma as well (see

1966

# https://github.com/ytdl-org/youtube-dl/issues/8536)

1967

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1968

1969

def feed_entry(name):

1970

return try_get(feed_data, lambda x: x[name][0], compat_str)

1971

1972

feed_id = feed_entry('id')

1973

if not feed_id:

1974

continue

1975

feed_title = feed_entry('title')

1976

title = video_title

1977

if feed_title:

1978

title += ' (%s)' % feed_title

1979

entries.append({

1980

'_type': 'url_transparent',

1981

'ie_key': 'Youtube',

1982

'url': smuggle_url(

1983

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1984

{'force_singlefeed': True}),

1985

'title': title,

1986

})

1987

feed_ids.append(feed_id)

1988

self.to_screen(

1989

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1990

% (', '.join(feed_ids), video_id))

1991

return self.playlist_result(entries, video_id, video_title, video_description)

1992

else:

1993

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1994

1995

if view_count is None:

1996

view_count = extract_view_count(video_info)

1997

if view_count is None and video_details:

1998

view_count = int_or_none(video_details.get('viewCount'))

1999

if view_count is None and microformat:

2000

view_count = int_or_none(microformat.get('viewCount'))

2001

2002

if is_live is None:

2003

is_live = bool_or_none(video_details.get('isLive'))

2004

2005

has_live_chat_replay = False

2006

if not is_live:

2007

yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)

2008

try:

2009

yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

2010

has_live_chat_replay = True

2011

except (KeyError, IndexError, TypeError):

2012

pass

2013

2014

# Check for "rental" videos

2015

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

2016

raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)

2017

2018

def _extract_filesize(media_url):

2019

return int_or_none(self._search_regex(

2020

r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

2021

2022

streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []

2023

streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])

2024

2025

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

2026

self.report_rtmp_download()

2027

formats = [{

2028

'format_id': '_rtmp',

2029

'protocol': 'rtmp',

2030

'url': video_info['conn'][0],

2031

'player_url': player_url,

2032

}]

2033

elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):

2034

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

2035

if 'rtmpe%3Dyes' in encoded_url_map:

2036

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)

2037

formats = []

2038

formats_spec = {}

2039

fmt_list = video_info.get('fmt_list', [''])[0]

2040

if fmt_list:

2041

for fmt in fmt_list.split(','):

2042

spec = fmt.split('/')

2043

if len(spec) > 1:

2044

width_height = spec[1].split('x')

2045

if len(width_height) == 2:

2046

formats_spec[spec[0]] = {

2047

'resolution': spec[1],

2048

'width': int_or_none(width_height[0]),

2049

'height': int_or_none(width_height[1]),

2050

}

2051

for fmt in streaming_formats:

2052

itag = str_or_none(fmt.get('itag'))

2053

if not itag:

2054

continue

2055

quality = fmt.get('quality')

2056

quality_label = fmt.get('qualityLabel') or quality

2057

formats_spec[itag] = {

2058

'asr': int_or_none(fmt.get('audioSampleRate')),

2059

'filesize': int_or_none(fmt.get('contentLength')),

2060

'format_note': quality_label,

2061

'fps': int_or_none(fmt.get('fps')),

2062

'height': int_or_none(fmt.get('height')),

2063

# bitrate for itag 43 is always 2147483647

2064

'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,

2065

'width': int_or_none(fmt.get('width')),

2066

}

2067

2068

for fmt in streaming_formats:

2069

if fmt.get('drmFamilies') or fmt.get('drm_families'):

2070

continue

2071

url = url_or_none(fmt.get('url'))

2072

2073

if not url:

2074

cipher = fmt.get('cipher') or fmt.get('signatureCipher')

2075

if not cipher:

2076

continue

2077

url_data = compat_parse_qs(cipher)

2078

url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))

if not url:

continue

else:

cipher = None

url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

2084

2085

stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))

2086

# Unsupported FORMAT_STREAM_TYPE_OTF

if stream_type == 3:

continue

format_id = fmt.get('itag') or url_data['itag'][0]

2091

if not format_id:

2092

continue

2093

format_id = compat_str(format_id)

2094

2095

if cipher:

2096

if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):

2097

ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'

2098

jsplayer_url_json = self._search_regex(

2099

ASSETS_RE,

2100

embed_webpage if age_gate else video_webpage,

2101

'JS player URL (1)', default=None)

2102

if not jsplayer_url_json and not age_gate:

2103

# We need the embed website after all

2104

if embed_webpage is None:

2105

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

2106

embed_webpage = self._download_webpage(

2107

embed_url, video_id, 'Downloading embed webpage')

2108

jsplayer_url_json = self._search_regex(

2109

ASSETS_RE, embed_webpage, 'JS player URL')

2110

2111

player_url = json.loads(jsplayer_url_json)

2112

if player_url is None:

2113

player_url_json = self._search_regex(

2114

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

2115

video_webpage, 'age gate player URL')

2116

player_url = json.loads(player_url_json)

2117

2118

if 'sig' in url_data:

2119

url += '&signature=' + url_data['sig'][0]

2120

elif 's' in url_data:

2121

encrypted_sig = url_data['s'][0]

2122

2123

if self._downloader.params.get('verbose'):

2124

if player_url is None:

2125

player_desc = 'unknown'

2126

else:

2127

player_type, player_version = self._extract_player_info(player_url)

2128

player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)

2129

parts_sizes = self._signature_cache_id(encrypted_sig)

2130

self.to_screen('{%s} signature length %s, %s' %

2131

(format_id, parts_sizes, player_desc))

2132

2133

signature = self._decrypt_signature(

2134

encrypted_sig, video_id, player_url, age_gate)

2135

sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'

2136

url += '&%s=%s' % (sp, signature)

2137

if 'ratebypass' not in url:

2138

url += '&ratebypass=yes'

2139

2140

dct = {

2141

'format_id': format_id,

2142

'url': url,

2143

'player_url': player_url,

2144

}

2145

if format_id in self._formats:

2146

dct.update(self._formats[format_id])

2147

if format_id in formats_spec:

2148

dct.update(formats_spec[format_id])

2149

2150

# Some itags are not included in DASH manifest thus corresponding formats will

2151

# lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).

2152

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

2153

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

2154

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

2155

2156

if width is None:

2157

width = int_or_none(fmt.get('width'))

2158

if height is None:

2159

height = int_or_none(fmt.get('height'))

2160

2161

filesize = int_or_none(url_data.get(

2162

'clen', [None])[0]) or _extract_filesize(url)

2163

2164

quality = url_data.get('quality', [None])[0] or fmt.get('quality')

2165

quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')

2166

2167

tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)

2168

or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None

2169

fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

2170

2171

more_fields = {

2172

'filesize': filesize,

'tbr': tbr,

'width': width,

'height': height,

'fps': fps,

'format_note': quality_label or quality,

2178

}

2179

for key, value in more_fields.items():

2180

if value:

2181

dct[key] = value

2182

type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')

2183

if type_:

2184

type_split = type_.split(';')

2185

kind_ext = type_split[0].split('/')

2186

if len(kind_ext) == 2:

2187

kind, _ = kind_ext

2188

dct['ext'] = mimetype2ext(type_split[0])

2189

if kind in ('audio', 'video'):

2190

codecs = None

2191

for mobj in re.finditer(

2192

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

2193

if mobj.group('key') == 'codecs':

2194

codecs = mobj.group('val')

2195

break

2196

if codecs:

2197

dct.update(parse_codecs(codecs))

2198

if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':

2199

dct['downloader_options'] = {

2200

# Youtube throttles chunks >~10M

2201

'http_chunk_size': 10485760,

}

formats.append(dct)

else:

manifest_url = (

url_or_none(try_get(

player_response,

lambda x: x['streamingData']['hlsManifestUrl'],

2209

compat_str))

2210

or url_or_none(try_get(

2211

video_info, lambda x: x['hlsvp'][0], compat_str)))

2212

if manifest_url:

2213

formats = []

2214

m3u8_formats = self._extract_m3u8_formats(

2215

manifest_url, video_id, 'mp4', fatal=False)

2216

for a_format in m3u8_formats:

2217

itag = self._search_regex(

2218

r'/itag/(\d+)/', a_format['url'], 'itag', default=None)

2219

if itag:

2220

a_format['format_id'] = itag

2221

if itag in self._formats:

2222

dct = self._formats[itag].copy()

2223

dct.update(a_format)

2224

a_format = dct

2225

a_format['player_url'] = player_url

2226

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

2227

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

2228

formats.append(a_format)

2229

else:

2230

error_message = extract_unavailable_message()

2231

if not error_message:

2232

error_message = clean_html(try_get(

2233

player_response, lambda x: x['playabilityStatus']['reason'],

2234

compat_str))

2235

if not error_message:

2236

error_message = clean_html(

2237

try_get(video_info, lambda x: x['reason'][0], compat_str))

2238

if error_message:

2239

raise ExtractorError(error_message, expected=True)

2240

raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')

2241

2242

# uploader

2243

video_uploader = try_get(

2244

video_info, lambda x: x['author'][0],

2245

compat_str) or str_or_none(video_details.get('author'))

2246

if video_uploader:

2247

video_uploader = compat_urllib_parse_unquote_plus(video_uploader)

2248

else:

2249

self._downloader.report_warning('unable to extract uploader name')

2250

2251

# uploader_id

2252

video_uploader_id = None

2253

video_uploader_url = None

2254

mobj = re.search(

2255

r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

2256

video_webpage)

2257

if mobj is not None:

2258

video_uploader_id = mobj.group('uploader_id')

2259

video_uploader_url = mobj.group('uploader_url')

2260

else:

2261

owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))

2262

if owner_profile_url:

2263

video_uploader_id = self._search_regex(

2264

r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',

2265

default=None)

2266

video_uploader_url = owner_profile_url

2267

2268

channel_id = (

2269

str_or_none(video_details.get('channelId'))

2270

or self._html_search_meta(

2271

'channelId', video_webpage, 'channel id', default=None)

2272

or self._search_regex(

2273

r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',

2274

video_webpage, 'channel id', default=None, group='id'))

2275

channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None

2276

2277

thumbnails = []

2278

thumbnails_list = try_get(

2279

video_details, lambda x: x['thumbnail']['thumbnails'], list) or []

2280

for t in thumbnails_list:

2281

if not isinstance(t, dict):

2282

continue

2283

thumbnail_url = url_or_none(t.get('url'))

2284

if not thumbnail_url:

2285

continue

2286

thumbnails.append({

2287

'url': thumbnail_url,

2288

'width': int_or_none(t.get('width')),

2289

'height': int_or_none(t.get('height')),

})

if not thumbnails:

video_thumbnail = None

2294

# We try first to get a high quality image:

2295

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

2296

video_webpage, re.DOTALL)

2297

if m_thumb is not None:

2298

video_thumbnail = m_thumb.group(1)

2299

thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)

2300

if thumbnail_url:

2301

video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)

2302

if video_thumbnail:

2303

thumbnails.append({'url': video_thumbnail})

2304

2305

# upload date

2306

upload_date = self._html_search_meta(

2307

'datePublished', video_webpage, 'upload date', default=None)

2308

if not upload_date:

2309

upload_date = self._search_regex(

2310

[r'(?s)id="eow-date.*?>(.*?)</span>',

2311

r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],

2312

video_webpage, 'upload date', default=None)

2313

if not upload_date:

2314

upload_date = microformat.get('publishDate') or microformat.get('uploadDate')

2315

upload_date = unified_strdate(upload_date)

2316

2317

video_license = self._html_search_regex(

2318

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

2319

video_webpage, 'license', default=None)

m_music = re.search(

r'''(?x)

<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*

<ul[^>]*>\s*

<li>(?P<title>.+?)

by (?P<creator>.+?)

(?:

$.+?$|

<a[^>]*

(?:

\bhref=["\']/red[^>]*>| # drop possible

2332

>\s*Listen ad-free with YouTube Red # YouTube Red ad

)

.*?

)?</li

''',

video_webpage)

if m_music:

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

2340

video_creator = clean_html(m_music.group('creator'))

2341

else:

2342

video_alt_title = video_creator = None

2343

2344

def extract_meta(field):

2345

return self._html_search_regex(

2346

r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,

2347

video_webpage, field, default=None)

2348

2349

track = extract_meta('Song')

2350

artist = extract_meta('Artist')

2351

album = extract_meta('Album')

2352

2353

# Youtube Music Auto-generated description

2354

release_date = release_year = None

2355

if video_description:

2356

mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)

2357

if mobj:

2358

if not track:

2359

track = mobj.group('track').strip()

2360

if not artist:

2361

artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))

2362

if not album:

2363

album = mobj.group('album'.strip())

2364

release_year = mobj.group('release_year')

2365

release_date = mobj.group('release_date')

2366

if release_date:

2367

release_date = release_date.replace('-', '')

2368

if not release_year:

2369

release_year = int(release_date[:4])

2370

if release_year:

2371

release_year = int(release_year)

2372

2373

m_episode = re.search(

2374

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

2375

video_webpage)

2376

if m_episode:

2377

series = unescapeHTML(m_episode.group('series'))

2378

season_number = int(m_episode.group('season'))

2379

episode_number = int(m_episode.group('episode'))

2380

else:

2381

series = season_number = episode_number = None

2382

2383

m_cat_container = self._search_regex(

2384

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

2385

video_webpage, 'categories', default=None)

2386

category = None

2387

if m_cat_container:

2388

category = self._html_search_regex(

2389

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

default=None)

if not category:

category = try_get(

microformat, lambda x: x['category'], compat_str)

2394

video_categories = None if category is None else [category]

2395

2396

video_tags = [

2397

unescapeHTML(m.group('content'))

2398

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

2399

if not video_tags:

2400

video_tags = try_get(video_details, lambda x: x['keywords'], list)

2401

2402

def _extract_count(count_name):

2403

return str_to_int(self._search_regex(

2404

r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'

2405

% re.escape(count_name),

2406

video_webpage, count_name, default=None))

2407

2408

like_count = _extract_count('like')

2409

dislike_count = _extract_count('dislike')

2410

2411

if view_count is None:

2412

view_count = str_to_int(self._search_regex(

2413

r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,

2414

'view count', default=None))

2415

2416

average_rating = (

2417

float_or_none(video_details.get('averageRating'))

2418

or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))

2419

2420

# subtitles

2421

video_subtitles = self.extract_subtitles(

2422

video_id, video_webpage, has_live_chat_replay)

2423

automatic_captions = self.extract_automatic_captions(video_id, video_webpage)

2424

2425

video_duration = try_get(

2426

video_info, lambda x: int_or_none(x['length_seconds'][0]))

2427

if not video_duration:

2428

video_duration = int_or_none(video_details.get('lengthSeconds'))

2429

if not video_duration:

2430

video_duration = parse_duration(self._html_search_meta(

2431

'duration', video_webpage, 'video duration'))

2432

2433

# annotations

2434

video_annotations = None

2435

if self._downloader.params.get('writeannotations', False):

2436

xsrf_token = self._search_regex(

2437

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',

2438

video_webpage, 'xsrf token', group='xsrf_token', fatal=False)

2439

invideo_url = try_get(

2440

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

2441

if xsrf_token and invideo_url:

2442

xsrf_field_name = self._search_regex(

2443

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

2444

video_webpage, 'xsrf field name',

2445

group='xsrf_field_name', default='session_token')

2446

video_annotations = self._download_webpage(

2447

self._proto_relative_url(invideo_url),

2448

video_id, note='Downloading annotations',

2449

errnote='Unable to download video annotations', fatal=False,

2450

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

2451

2452

chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)

2453

2454

# Look for the DASH manifest

2455

if self._downloader.params.get('youtube_include_dash_manifest', True):

2456

dash_mpd_fatal = True

2457

for mpd_url in dash_mpds:

2458

dash_formats = {}

2459

try:

2460

def decrypt_sig(mobj):

2461

s = mobj.group(1)

2462

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

2463

return '/signature/%s' % dec_s

2464

2465

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

2466

2467

for df in self._extract_mpd_formats(

2468

mpd_url, video_id, fatal=dash_mpd_fatal,

2469

formats_dict=self._formats):

2470

if not df.get('filesize'):

2471

df['filesize'] = _extract_filesize(df['url'])

2472

# Do not overwrite DASH format found in some previous DASH manifest

2473

if df['format_id'] not in dash_formats:

2474

dash_formats[df['format_id']] = df

2475

# Additional DASH manifests may end up in HTTP Error 403 therefore

2476

# allow them to fail without bug report message if we already have

2477

# some DASH manifest succeeded. This is temporary workaround to reduce

2478

# burst of bug reports until we figure out the reason and whether it

2479

# can be fixed at all.

2480

dash_mpd_fatal = False

2481

except (ExtractorError, KeyError) as e:

2482

self.report_warning(

2483

'Skipping DASH manifest: %r' % e, video_id)

2484

if dash_formats:

2485

# Remove the formats we found through non-DASH, they

2486

# contain less info and it can be wrong, because we use

2487

# fixed values (for example the resolution). See

2488

# https://github.com/ytdl-org/youtube-dl/issues/5774 for an

2489

# example.

2490

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

2491

formats.extend(dash_formats.values())

2492

2493

# Check for malformed aspect ratio

2494

stretched_m = re.search(

2495

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

2496

video_webpage)

2497

if stretched_m:

2498

w = float(stretched_m.group('w'))

2499

h = float(stretched_m.group('h'))

2500

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

2501

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2506

f['stretched_ratio'] = ratio

2507

2508

if not formats:

2509

if 'reason' in video_info:

2510

if 'The uploader has not made this video available in your country.' in video_info['reason']:

2511

regions_allowed = self._html_search_meta(

2512

'regionsAllowed', video_webpage, default=None)

2513

countries = regions_allowed.split(',') if regions_allowed else None

2514

self.raise_geo_restricted(

2515

msg=video_info['reason'][0], countries=countries)

2516

reason = video_info['reason'][0]

2517

if 'Invalid parameters' in reason:

2518

unavailable_message = extract_unavailable_message()

2519

if unavailable_message:

2520

reason = unavailable_message

2521

raise ExtractorError(

2522

'YouTube said: %s' % reason,

2523

expected=True, video_id=video_id)

2524

if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):

2525

raise ExtractorError('This video is DRM protected.', expected=True)

2526

2527

self._sort_formats(formats)

2528

2529

self.mark_watched(video_id, video_info, player_response)

return {

'id': video_id,

'uploader': video_uploader,

2534

'uploader_id': video_uploader_id,

2535

'uploader_url': video_uploader_url,

2536

'channel_id': channel_id,

2537

'channel_url': channel_url,

2538

'upload_date': upload_date,

2539

'license': video_license,

2540

'creator': video_creator or artist,

2541

'title': video_title,

2542

'alt_title': video_alt_title or track,

2543

'thumbnails': thumbnails,

2544

'description': video_description,

2545

'categories': video_categories,

2546

'tags': video_tags,

2547

'subtitles': video_subtitles,

2548

'automatic_captions': automatic_captions,

2549

'duration': video_duration,

2550

'age_limit': 18 if age_gate else 0,

2551

'annotations': video_annotations,

2552

'chapters': chapters,

2553

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

2554

'view_count': view_count,

2555

'like_count': like_count,

2556

'dislike_count': dislike_count,

2557

'average_rating': average_rating,

2558

'formats': formats,

2559

'is_live': is_live,

2560

'start_time': start_time,

2561

'end_time': end_time,

2562

'series': series,

2563

'season_number': season_number,

2564

'episode_number': episode_number,

'track': track,

'artist': artist,

'album': album,

'release_date': release_date,

2569

'release_year': release_year,

}

class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):

2574

IE_DESC = 'YouTube.com playlists'

2575

_VALID_URL = r"""(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us

)

/

(?:

(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))

2586

\? (?:.*?[&;])*? (?:p|a|list)=

2587

| p/

2588

)|

2589

youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=

2590

)

2591

(

2592

(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}

2593

# Top tracks, they can also include dots

|(?:MC)[\w\.]*

)

.*

|

(%(playlist_id)s)

)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

2600

_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'

2601

_VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'

2602

_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'

2603

IE_NAME = 'youtube:playlist'

2604

_TESTS = [{

2605

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2606

'info_dict': {

2607

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2608

'uploader': 'Sergey M.',

2609

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2610

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2615

'info_dict': {

2616

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2617

'uploader': 'Sergey M.',

2618

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2619

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

2624

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2625

'info_dict': {

2626

'title': '29C3: Not my department',

2627

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2628

'uploader': 'Christiaan008',

2629

'uploader_id': 'ChRiStIaAn008',

2630

},

2631

'playlist_count': 96,

2632

}, {

2633

'note': 'issue #673',

2634

'url': 'PLBB231211A4F62143',

2635

'info_dict': {

2636

'title': '[OLD]Team Fortress 2 (Class-based LP)',

2637

'id': 'PLBB231211A4F62143',

2638

'uploader': 'Wickydoo',

2639

'uploader_id': 'Wickydoo',

2640

},

2641

'playlist_mincount': 26,

2642

}, {

2643

'note': 'Large playlist',

2644

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

2645

'info_dict': {

2646

'title': 'Uploads from Cauchemar',

2647

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

2648

'uploader': 'Cauchemar',

2649

'uploader_id': 'Cauchemar89',

2650

},

2651

'playlist_mincount': 799,

2652

}, {

2653

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

2654

'info_dict': {

2655

'title': 'YDL_safe_search',

2656

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

2657

},

2658

'playlist_count': 2,

2659

'skip': 'This playlist is private',

2660

}, {

2661

'note': 'embedded',

2662

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

2667

'uploader': 'milan',

2668

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

2669

}

2670

}, {

2671

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

2672

'playlist_mincount': 485,

2673

'info_dict': {

2674

'title': '2018 Chinese New Singles (11/6 updated)',

2675

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

2676

'uploader': 'LBK',

2677

'uploader_id': 'sdragonfang',

2678

}

2679

}, {

2680

'note': 'Embedded SWF player',

2681

'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',

'playlist_count': 4,

'info_dict': {

'title': 'JODA7',

'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',

2686

},

2687

'skip': 'This playlist does not exist',

2688

}, {

2689

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

2690

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

2691

'info_dict': {

2692

'title': 'Uploads from Interstellar Movie',

2693

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

2694

'uploader': 'Interstellar Movie',

2695

'uploader_id': 'InterstellarMovie1',

2696

},

2697

'playlist_mincount': 21,

2698

}, {

2699

# Playlist URL that does not actually serve a playlist

2700

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

2705

'uploader': 'STREEM',

2706

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

2707

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

2708

'upload_date': '20150526',

2709

'license': 'Standard YouTube License',

2710

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

2711

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

2716

},

2717

'params': {

2718

'skip_download': True,

2719

},

2720

'skip': 'This video is not available.',

2721

'add_ie': [YoutubeIE.ie_key()],

2722

}, {

2723

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

2728

'uploader': 'Backus-Page House Museum',

2729

'uploader_id': 'backuspagemuseum',

2730

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

2731

'upload_date': '20161008',

2732

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

2733

'categories': ['Nonprofits & Activism'],

2734

'tags': list,

2735

'like_count': int,

2736

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

2741

},

2742

}, {

2743

# https://github.com/ytdl-org/youtube-dl/issues/21844

2744

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2745

'info_dict': {

2746

'title': 'Data Analysis with Dr Mike Pound',

2747

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2748

'uploader_id': 'Computerphile',

2749

'uploader': 'Computerphile',

2750

},

2751

'playlist_mincount': 11,

2752

}, {

2753

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

2754

'only_matching': True,

2755

}, {

2756

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

2757

'only_matching': True,

2758

}, {

2759

# music album playlist

2760

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

2761

'only_matching': True,

2762

}, {

2763

'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',

2764

'only_matching': True,

2765

}, {

2766

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

2767

'only_matching': True,

2768

}]

2769

2770

def _real_initialize(self):

2771

self._login()

2772

2773

def extract_videos_from_page(self, page):

ids_in_page = []

titles_in_page = []

for item in re.findall(

2778

r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):

2779

attrs = extract_attributes(item)

2780

video_id = attrs['data-video-id']

2781

video_title = unescapeHTML(attrs.get('data-title'))

2782

if video_title:

2783

video_title = video_title.strip()

2784

ids_in_page.append(video_id)

2785

titles_in_page.append(video_title)

2786

2787

# Fallback with old _VIDEO_RE

2788

self.extract_videos_from_page_impl(

2789

self._VIDEO_RE, page, ids_in_page, titles_in_page)

2790

2791

# Relaxed fallbacks

2792

self.extract_videos_from_page_impl(

2793

r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,

2794

ids_in_page, titles_in_page)

2795

self.extract_videos_from_page_impl(

2796

r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,

2797

ids_in_page, titles_in_page)

2798

2799

return zip(ids_in_page, titles_in_page)

2800

2801

def _extract_mix(self, playlist_id):

2802

# The mixes are generated from a single video

2803

# the id of the playlist is just 'RD' + video_id

2804

ids = []

2805

last_id = playlist_id[-11:]

2806

for n in itertools.count(1):

2807

url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)

2808

webpage = self._download_webpage(

2809

url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))

2810

new_ids = orderedSet(re.findall(

2811

r'''(?xs)data-video-username=".*?".*?

2812

href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),

2813

webpage))

2814

# Fetch new pages until all the videos are repeated, it seems that

2815

# there are always 51 unique videos.

2816

new_ids = [_id for _id in new_ids if _id not in ids]

if not new_ids:

break

ids.extend(new_ids)

last_id = ids[-1]

url_results = self._ids_to_results(ids)

2823

2824

search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)

2825

title_span = (

2826

search_title('playlist-title')

2827

or search_title('title long-title')

2828

or search_title('title'))

2829

title = clean_html(title_span)

2830

2831

return self.playlist_result(url_results, playlist_id, title)

2832

2833

def _extract_playlist(self, playlist_id):

2834

url = self._TEMPLATE_URL % playlist_id

2835

page = self._download_webpage(url, playlist_id)

2836

2837

# the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)

2838

for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):

2839

match = match.strip()

2840

# Check if the playlist exists or is private

2841

mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)

2842

if mobj:

2843

reason = mobj.group('reason')

2844

message = 'This playlist %s' % reason

2845

if 'private' in reason:

2846

message += ', use --username or --netrc to access it'

2847

message += '.'

2848

raise ExtractorError(message, expected=True)

2849

elif re.match(r'[^<]*Invalid parameters[^<]*', match):

2850

raise ExtractorError(

2851

'Invalid parameters. Maybe URL is incorrect.',

2852

expected=True)

2853

elif re.match(r'[^<]*Choose your language[^<]*', match):

2854

continue

2855

else:

2856

self.report_warning('Youtube gives an alert message: ' + match)

2857

2858

playlist_title = self._html_search_regex(

2859

r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',

2860

page, 'title', default=None)

2861

2862

_UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='

2863

uploader = self._html_search_regex(

2864

r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,

2865

page, 'uploader', default=None)

2866

mobj = re.search(

2867

r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,

2868

page)

2869

if mobj:

2870

uploader_id = mobj.group('uploader_id')

2871

uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))

2872

else:

2873

uploader_id = uploader_url = None

has_videos = True

if not playlist_title:

2878

try:

2879

# Some playlist URLs don't actually serve a playlist (e.g.

2880

# https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)

2881

next(self._entries(page, playlist_id))

2882

except StopIteration:

2883

has_videos = False

2884

2885

playlist = self.playlist_result(

2886

self._entries(page, playlist_id), playlist_id, playlist_title)

2887

playlist.update({

2888

'uploader': uploader,

2889

'uploader_id': uploader_id,

2890

'uploader_url': uploader_url,

2891

})

2892

2893

return has_videos, playlist

2894

2895

def _check_download_just_video(self, url, playlist_id):

2896

# Check if it's a video-specific URL

2897

query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

2898

video_id = query_dict.get('v', [None])[0] or self._search_regex(

2899

r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,

2900

'video id', default=None)

2901

if video_id:

2902

if self._downloader.params.get('noplaylist'):

2903

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

2904

return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)

2905

else:

2906

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

2907

return video_id, None

2908

return None, None

2909

2910

def _real_extract(self, url):

2911

# Extract playlist id

2912

mobj = re.match(self._VALID_URL, url)

2913

if mobj is None:

2914

raise ExtractorError('Invalid URL: %s' % url)

2915

playlist_id = mobj.group(1) or mobj.group(2)

2916

2917

video_id, video = self._check_download_just_video(url, playlist_id)

if video:

return video

if playlist_id.startswith(('RD', 'UL', 'PU')):

2922

# Mixes require a custom extraction process

2923

return self._extract_mix(playlist_id)

2924

2925

has_videos, playlist = self._extract_playlist(playlist_id)

2926

if has_videos or not video_id:

2927

return playlist

2928

2929

# Some playlist URLs don't actually serve a playlist (see

2930

# https://github.com/ytdl-org/youtube-dl/issues/10537).

2931

# Fallback to plain video extraction if there is a video id

2932

# along with playlist id.

2933

return self.url_result(video_id, 'Youtube', video_id=video_id)

2934

2935

2936

class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):

2937

IE_DESC = 'YouTube.com channels'

2938

_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'

2939

_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'

2940

_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'

2941

IE_NAME = 'youtube:channel'

2942

_TESTS = [{

2943

'note': 'paginated channel',

2944

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

2945

'playlist_mincount': 91,

2946

'info_dict': {

2947

'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',

2948

'title': 'Uploads from lex will',

2949

'uploader': 'lex will',

2950

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

2951

}

2952

}, {

2953

'note': 'Age restricted channel',

2954

# from https://www.youtube.com/user/DeusExOfficial

2955

'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',

2956

'playlist_mincount': 64,

2957

'info_dict': {

2958

'id': 'UUs0ifCMCm1icqRbqhUINa0w',

2959

'title': 'Uploads from Deus Ex',

2960

'uploader': 'Deus Ex',

2961

'uploader_id': 'DeusExOfficial',

2962

},

2963

}, {

2964

'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',

2965

'only_matching': True,

2966

}, {

2967

'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',

2968

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

2973

return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)

2974

else super(YoutubeChannelIE, cls).suitable(url))

2975

2976

def _build_template_url(self, url, channel_id):

2977

return self._TEMPLATE_URL % channel_id

2978

2979

def _real_extract(self, url):

2980

channel_id = self._match_id(url)

2981

2982

url = self._build_template_url(url, channel_id)

2983

2984

# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)

2985

# Workaround by extracting as a playlist if managed to obtain channel playlist URL

2986

# otherwise fallback on channel by page extraction

2987

channel_page = self._download_webpage(

2988

url + '?view=57', channel_id,

2989

'Downloading channel page', fatal=False)

2990

if channel_page is False:

2991

channel_playlist_id = False

2992

else:

2993

channel_playlist_id = self._html_search_meta(

2994

'channelId', channel_page, 'channel id', default=None)

2995

if not channel_playlist_id:

2996

channel_url = self._html_search_meta(

2997

('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),

2998

channel_page, 'channel url', default=None)

2999

if channel_url:

3000

channel_playlist_id = self._search_regex(

3001

r'vnd\.youtube://user/([0-9A-Za-z_-]+)',

3002

channel_url, 'channel id', default=None)

3003

if channel_playlist_id and channel_playlist_id.startswith('UC'):

3004

playlist_id = 'UU' + channel_playlist_id[2:]

3005

return self.url_result(

3006

compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')

3007

3008

channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')

3009

autogenerated = re.search(r'''(?x)

3010

class="[^"]*?(?:

3011

channel-header-autogenerated-label|

3012

yt-channel-title-autogenerated

3013

)[^"]*"''', channel_page) is not None

3014

3015

if autogenerated:

3016

# The videos are contained in a single page

3017

# the ajax pages can't be used, they are empty

3018

entries = [

3019

self.url_result(

3020

video_id, 'Youtube', video_id=video_id,

3021

video_title=video_title)

3022

for video_id, video_title in self.extract_videos_from_page(channel_page)]

3023

return self.playlist_result(entries, channel_id)

3024

3025

try:

3026

next(self._entries(channel_page, channel_id))

3027

except StopIteration:

3028

alert_message = self._html_search_regex(

3029

r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',

3030

channel_page, 'alert', default=None, group='alert')

3031

if alert_message:

3032

raise ExtractorError('Youtube said: %s' % alert_message, expected=True)

3033

3034

return self.playlist_result(self._entries(channel_page, channel_id), channel_id)

3035

3036

3037

class YoutubeUserIE(YoutubeChannelIE):

3038

IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'

3039

3040

_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'

3041

IE_NAME = 'youtube:user'

3042

3043

_TESTS = [{

3044

'url': 'https://www.youtube.com/user/TheLinuxFoundation',

3045

'playlist_mincount': 320,

3046

'info_dict': {

3047

'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',

3048

'title': 'Uploads from The Linux Foundation',

3049

'uploader': 'The Linux Foundation',

3050

'uploader_id': 'TheLinuxFoundation',

3051

}

3052

}, {

3053

# Only available via https://www.youtube.com/c/12minuteathlete/videos

3054

# but not https://www.youtube.com/user/12minuteathlete/videos

3055

'url': 'https://www.youtube.com/c/12minuteathlete/videos',

3056

'playlist_mincount': 249,

3057

'info_dict': {

3058

'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',

3059

'title': 'Uploads from 12 Minute Athlete',

3060

'uploader': '12 Minute Athlete',

3061

'uploader_id': 'the12minuteathlete',

3062

}

3063

}, {

3064

'url': 'ytuser:phihag',

3065

'only_matching': True,

3066

}, {

3067

'url': 'https://www.youtube.com/c/gametrailers',

3068

'only_matching': True,

3069

}, {

3070

'url': 'https://www.youtube.com/gametrailers',

3071

'only_matching': True,

3072

}, {

3073

# This channel is not available, geo restricted to JP

3074

'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',

3075

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3080

# Don't return True if the url can be extracted with other youtube

3081

# extractor, the regex would is too permissive and it would match.

3082

other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)

3083

if any(ie.suitable(url) for ie in other_yt_ies):

3084

return False

3085

else:

3086

return super(YoutubeUserIE, cls).suitable(url)

3087

3088

def _build_template_url(self, url, channel_id):

3089

mobj = re.match(self._VALID_URL, url)

3090

return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))

3091

3092

3093

class YoutubeLiveIE(YoutubeBaseInfoExtractor):

3094

IE_DESC = 'YouTube.com live streams'

3095

_VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'

3096

IE_NAME = 'youtube:live'

3097

3098

_TESTS = [{

3099

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

3104

'uploader': 'The Young Turks',

3105

'uploader_id': 'TheYoungTurks',

3106

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

3107

'upload_date': '20150715',

3108

'license': 'Standard YouTube License',

3109

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

3110

'categories': ['News & Politics'],

3111

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

3112

'like_count': int,

3113

'dislike_count': int,

3114

},

3115

'params': {

3116

'skip_download': True,

3117

},

3118

}, {

3119

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

3120

'only_matching': True,

3121

}, {

3122

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

3123

'only_matching': True,

3124

}, {

3125

'url': 'https://www.youtube.com/TheYoungTurks/live',

3126

'only_matching': True,

3127

}]

3128

3129

def _real_extract(self, url):

3130

mobj = re.match(self._VALID_URL, url)

3131

channel_id = mobj.group('id')

3132

base_url = mobj.group('base_url')

3133

webpage = self._download_webpage(url, channel_id, fatal=False)

3134

if webpage:

3135

page_type = self._og_search_property(

3136

'type', webpage, 'page type', default='')

3137

video_id = self._html_search_meta(

3138

'videoId', webpage, 'video id', default=None)

3139

if page_type.startswith('video') and video_id and re.match(

3140

r'^[0-9A-Za-z_-]{11}$', video_id):

3141

return self.url_result(video_id, YoutubeIE.ie_key())

3142

return self.url_result(base_url)

3143

3144

3145

class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):

3146

IE_DESC = 'YouTube.com user/channel playlists'

3147

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'

3148

IE_NAME = 'youtube:playlists'

3149

3150

_TESTS = [{

3151

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

3152

'playlist_mincount': 4,

3153

'info_dict': {

3154

'id': 'ThirstForScience',

3155

'title': 'ThirstForScience',

3156

},

3157

}, {

3158

# with "Load more" button

3159

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

3160

'playlist_mincount': 70,

3161

'info_dict': {

3162

'id': 'igorkle1',

3163

'title': 'Игорь Клейнер',

3164

},

3165

}, {

3166

'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',

3167

'playlist_mincount': 17,

3168

'info_dict': {

3169

'id': 'UCiU1dHvZObB2iP6xkJ__Icw',

3170

'title': 'Chem Player',

},

'skip': 'Blocked',

}, {

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

3175

'only_matching': True,

}]

class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):

3180

_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'

3181

3182

3183

class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):

3184

IE_DESC = 'YouTube.com searches'

3185

# there doesn't appear to be a real limit, for example if you search for

3186

# 'python' you get more than 8.000.000 results

3187

_MAX_RESULTS = float('inf')

3188

IE_NAME = 'youtube:search'

3189

_SEARCH_KEY = 'ytsearch'

3190

_EXTRA_QUERY_ARGS = {}

3191

_TESTS = []

3192

3193

def _get_n_results(self, query, n):

3194

"""Get a specified number of results for a query"""

videos = []

limit = n

url_query = {

'search_query': query.encode('utf-8'),

3201

}

3202

url_query.update(self._EXTRA_QUERY_ARGS)

3203

result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)

3204

3205

for pagenum in itertools.count(1):

3206

data = self._download_json(

3207

result_url, video_id='query "%s"' % query,

3208

note='Downloading page %s' % pagenum,

3209

errnote='Unable to download API page',

3210

query={'spf': 'navigate'})

3211

html_content = data[1]['body']['content']

3212

3213

if 'class="search-message' in html_content:

3214

raise ExtractorError(

3215

'[youtube] No video results', expected=True)

3216

3217

new_videos = list(self._process_page(html_content))

3218

videos += new_videos

3219

if not new_videos or len(videos) > limit:

3220

break

3221

next_link = self._html_search_regex(

3222

r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',

3223

html_content, 'next link', default=None)

3224

if next_link is None:

3225

break

3226

result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)

if len(videos) > n:

videos = videos[:n]

return self.playlist_result(videos, query)

3231

3232

3233

class YoutubeSearchDateIE(YoutubeSearchIE):

3234

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

3235

_SEARCH_KEY = 'ytsearchdate'

3236

IE_DESC = 'YouTube.com searches, newest videos first'

3237

_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}

3238

3239

3240

class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):

3241

IE_DESC = 'YouTube.com search URLs'

3242

IE_NAME = 'youtube:search_url'

3243

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'

3244

_TESTS = [{

3245

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

3246

'playlist_mincount': 5,

3247

'info_dict': {

3248

'title': 'youtube-dl test video',

3249

}

3250

}, {

3251

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

3252

'only_matching': True,

3253

}]

3254

3255

def _real_extract(self, url):

3256

mobj = re.match(self._VALID_URL, url)

3257

query = compat_urllib_parse_unquote_plus(mobj.group('query'))

3258

webpage = self._download_webpage(url, query)

3259

return self.playlist_result(self._process_page(webpage), playlist_title=query)

3260

3261

3262

class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):

3263

IE_DESC = 'YouTube.com (multi-season) shows'

3264

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

3265

IE_NAME = 'youtube:show'

3266

_TESTS = [{

3267

'url': 'https://www.youtube.com/show/airdisasters',

3268

'playlist_mincount': 5,

3269

'info_dict': {

3270

'id': 'airdisasters',

3271

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

3276

playlist_id = self._match_id(url)

3277

return super(YoutubeShowIE, self)._real_extract(

3278

'https://www.youtube.com/show/%s/playlists' % playlist_id)

3279

3280

3281

class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):

3282

"""

3283

Base class for feed extractors

3284

Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.

3285

"""

3286

_LOGIN_REQUIRED = True

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

3291

3292

def _real_initialize(self):

3293

self._login()

3294

3295

def _entries(self, page):

3296

# The extraction process is the same as for playlists, but the regex

3297

# for the video ids doesn't contain an index

3298

ids = []

3299

more_widget_html = content_html = page

3300

for page_num in itertools.count(1):

3301

matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)

3302

3303

# 'recommended' feed has infinite 'load more' and each new portion spins

3304

# the same videos in (sometimes) slightly different order, so we'll check

3305

# for unicity and break when portion has no new videos

3306

new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))

if not new_ids:

break

ids.extend(new_ids)

for entry in self._ids_to_results(new_ids):

3313

yield entry

3314

3315

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

more = self._download_json(

3320

'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,

3321

'Downloading page #%s' % page_num,

3322

transform_source=uppercase_escape,

3323

headers=self._YOUTUBE_CLIENT_HEADERS)

3324

content_html = more['content_html']

3325

more_widget_html = more['load_more_widget_html']

3326

3327

def _real_extract(self, url):

3328

page = self._download_webpage(

3329

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

3330

self._PLAYLIST_TITLE)

3331

return self.playlist_result(

3332

self._entries(page), playlist_title=self._PLAYLIST_TITLE)

3333

3334

3335

class YoutubeWatchLaterIE(YoutubePlaylistIE):

3336

IE_NAME = 'youtube:watchlater'

3337

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

3338

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'

3339

3340

_TESTS = [{

3341

'url': 'https://www.youtube.com/playlist?list=WL',

3342

'only_matching': True,

3343

}, {

3344

'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',

3345

'only_matching': True,

3346

}]

3347

3348

def _real_extract(self, url):

3349

_, video = self._check_download_just_video(url, 'WL')

3350

if video:

3351

return video

3352

_, playlist = self._extract_playlist('WL')

return playlist

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

3357

IE_NAME = 'youtube:favorites'

3358

IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'

3359

_VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'

3360

_LOGIN_REQUIRED = True

3361

3362

def _real_extract(self, url):

3363

webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')

3364

playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')

3365

return self.url_result(playlist_id, 'YoutubePlaylist')

3366

3367

3368

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

3369

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

3370

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'

3371

_FEED_NAME = 'recommended'

3372

_PLAYLIST_TITLE = 'Youtube Recommended videos'

3373

3374

3375

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

3376

IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'

3377

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'

3378

_FEED_NAME = 'subscriptions'

3379

_PLAYLIST_TITLE = 'Youtube Subscriptions'

3380

3381

3382

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

3383

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

3384

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'

3385

_FEED_NAME = 'history'

3386

_PLAYLIST_TITLE = 'Youtube History'

3387

3388

3389

class YoutubeTruncatedURLIE(InfoExtractor):

3390

IE_NAME = 'youtube:truncated_url'

3391

IE_DESC = False # Do not list

3392

_VALID_URL = r'''(?x)

3393

(?:https?://)?

3394

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

3395

(?:watch\?(?:

3396

feature=[a-z_]+|

3397

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

3410

'only_matching': True,

3411

}, {

3412

'url': 'https://www.youtube.com/watch?',

3413

'only_matching': True,

3414

}, {

3415

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

3416

'only_matching': True,

3417

}, {

3418

'url': 'https://www.youtube.com/watch?feature=foo',

3419

'only_matching': True,

3420

}, {

3421

'url': 'https://www.youtube.com/watch?hl=en-GB',

3422

'only_matching': True,

3423

}, {

3424

'url': 'https://www.youtube.com/watch?t=2372',

3425

'only_matching': True,

3426

}]

3427

3428

def _real_extract(self, url):

3429

raise ExtractorError(

3430

'Did you forget to quote the URL? Remember that & is a meta '

3431

'character in most shells, so you want to put the URL in quotes, '

3432

'like youtube-dl '

3433

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

3434

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

3439

IE_NAME = 'youtube:truncated_id'

3440

IE_DESC = False # Do not list

3441

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

3442

3443

_TESTS = [{

3444

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

3445

'only_matching': True,

3446

}]

3447

3448

def _real_extract(self, url):

3449

video_id = self._match_id(url)

3450

raise ExtractorError(

3451

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

3452

expected=True)