jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_parse_qs,
	20	compat_urllib_parse_unquote,
	21	compat_urllib_parse_unquote_plus,
	22	compat_urllib_parse_urlencode,
	23	compat_urllib_parse_urlparse,
	24	compat_urlparse,
	25	compat_str,
	26	)
	27	from ..utils import (
	28	clean_html,
	29	error_to_compat_str,
	30	ExtractorError,
	31	float_or_none,
	32	get_element_by_attribute,
	33	get_element_by_id,
	34	int_or_none,
	35	mimetype2ext,
	36	orderedSet,
	37	parse_duration,
	38	remove_quotes,
	39	remove_start,
	40	sanitized_Request,
	41	smuggle_url,
	42	str_to_int,
	43	unescapeHTML,
	44	unified_strdate,
	45	unsmuggle_url,
	46	uppercase_escape,
	47	urlencode_postdata,
	48	ISO3166Utils,
	49	)
	50
	51
	52	class YoutubeBaseInfoExtractor(InfoExtractor):
	53	"""Provide base functions for Youtube extractors"""
	54	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	55	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	56	_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
	57	_NETRC_MACHINE = 'youtube'
	58	# If True it will raise an error if no login info is provided
	59	_LOGIN_REQUIRED = False
	60
	61	def _set_language(self):
	62	self._set_cookie(
	63	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	64	# YouTube sets the expire time to about two months
	65	expire_time=time.time() + 2 * 30 * 24 * 3600)
	66
	67	def _ids_to_results(self, ids):
	68	return [
	69	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	70	for vid_id in ids]
	71
	72	def _login(self):
	73	"""
	74	Attempt to log in to YouTube.
	75	True is returned if successful or skipped.
	76	False is returned if login failed.
	77
	78	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	79	"""
	80	(username, password) = self._get_login_info()
	81	# No authentication to be performed
	82	if username is None:
	83	if self._LOGIN_REQUIRED:
	84	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	85	return True
	86
	87	login_page = self._download_webpage(
	88	self._LOGIN_URL, None,
	89	note='Downloading login page',
	90	errnote='unable to fetch login page', fatal=False)
	91	if login_page is False:
	92	return
	93
	94	login_form = self._hidden_inputs(login_page)
	95
	96	login_form.update({
	97	'checkConnection': 'youtube',
	98	'Email': username,
	99	'Passwd': password,
	100	})
	101
	102	login_results = self._download_webpage(
	103	self._PASSWORD_CHALLENGE_URL, None,
	104	note='Logging in', errnote='unable to log in', fatal=False,
	105	data=urlencode_postdata(login_form))
	106	if login_results is False:
	107	return False
	108
	109	error_msg = self._html_search_regex(
	110	r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
	111	login_results, 'error message', default=None)
	112	if error_msg:
	113	raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
	114
	115	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	116	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	117
	118	# Two-Factor
	119	# TODO add SMS and phone call support - these require making a request and then prompting the user
	120
	121	if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
	122	tfa_code = self._get_tfa_info('2-step verification code')
	123
	124	if not tfa_code:
	125	self._downloader.report_warning(
	126	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	127	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	128	return False
	129
	130	tfa_code = remove_start(tfa_code, 'G-')
	131
	132	tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
	133
	134	tfa_form_strs.update({
	135	'Pin': tfa_code,
	136	'TrustDevice': 'on',
	137	})
	138
	139	tfa_data = urlencode_postdata(tfa_form_strs)
	140
	141	tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
	142	tfa_results = self._download_webpage(
	143	tfa_req, None,
	144	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	145
	146	if tfa_results is False:
	147	return False
	148
	149	if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
	150	self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
	151	return False
	152	if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
	153	self._downloader.report_warning('unable to log in - did the page structure change?')
	154	return False
	155	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	156	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	157	return False
	158
	159	if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
	160	self._downloader.report_warning('unable to log in: bad username or password')
	161	return False
	162	return True
	163
	164	def _real_initialize(self):
	165	if self._downloader is None:
	166	return
	167	self._set_language()
	168	if not self._login():
	169	return
	170
	171
	172	class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
	173	# Extract entries from page with "Load more" button
	174	def _entries(self, page, playlist_id):
	175	more_widget_html = content_html = page
	176	for page_num in itertools.count(1):
	177	for entry in self._process_page(content_html):
	178	yield entry
	179
	180	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	181	if not mobj:
	182	break
	183
	184	more = self._download_json(
	185	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	186	'Downloading page #%s' % page_num,
	187	transform_source=uppercase_escape)
	188	content_html = more['content_html']
	189	if not content_html.strip():
	190	# Some webpages show a "Load more" button but they don't
	191	# have more videos
	192	break
	193	more_widget_html = more['load_more_widget_html']
	194
	195
	196	class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	197	def _process_page(self, content):
	198	for video_id, video_title in self.extract_videos_from_page(content):
	199	yield self.url_result(video_id, 'Youtube', video_id, video_title)
	200
	201	def extract_videos_from_page(self, page):
	202	ids_in_page = []
	203	titles_in_page = []
	204	for mobj in re.finditer(self._VIDEO_RE, page):
	205	# The link with index 0 is not the first video of the playlist (not sure if still actual)
	206	if 'index' in mobj.groupdict() and mobj.group('id') == '0':
	207	continue
	208	video_id = mobj.group('id')
	209	video_title = unescapeHTML(mobj.group('title'))
	210	if video_title:
	211	video_title = video_title.strip()
	212	try:
	213	idx = ids_in_page.index(video_id)
	214	if video_title and not titles_in_page[idx]:
	215	titles_in_page[idx] = video_title
	216	except ValueError:
	217	ids_in_page.append(video_id)
	218	titles_in_page.append(video_title)
	219	return zip(ids_in_page, titles_in_page)
	220
	221
	222	class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	223	def _process_page(self, content):
	224	for playlist_id in orderedSet(re.findall(
	225	r'<h3[^>]+class="[^"]yt-lockup-title[^"]"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
	226	content)):
	227	yield self.url_result(
	228	'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
	229
	230	def _real_extract(self, url):
	231	playlist_id = self._match_id(url)
	232	webpage = self._download_webpage(url, playlist_id)
	233	title = self._og_search_title(webpage, fatal=False)
	234	return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
	235
	236
	237	class YoutubeIE(YoutubeBaseInfoExtractor):
	238	IE_DESC = 'YouTube.com'
	239	_VALID_URL = r"""(?x)^
	240	(
	241	(?:https?://\|//) # http(s):// or protocol-independent URL
	242	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	243	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	244	(?:www\.)?pwnyoutube\.com/\|
	245	(?:www\.)?yourepeat\.com/\|
	246	tube\.majestyc\.net/\|
	247	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	248	(?:.*?\#/)? # handle anchor (#/) redirect urls
	249	(?: # the various things that can precede the ID:
	250	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	251	\|(?: # or the v= param in all its forms
	252	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	253	(?:\?\|\#!?) # the params delimiter ? or # or #!
	254	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	255	v=
	256	)
	257	))
	258	\|(?:
	259	youtu\.be\| # just youtu.be/xxxx
	260	vid\.plus\| # or vid.plus/xxxx
	261	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	262	)/
	263	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	264	)
	265	)? # all until now is optional -> you can pass the naked ID
	266	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	267	(?!.*?\blist=) # combined list/video URLs are handled by the playlist IE
	268	(?(1).+)? # if we found the ID, everything can follow
	269	$"""
	270	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	271	_formats = {
	272	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	273	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	274	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	275	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	276	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	277	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	278	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	279	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	280	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	281	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	282	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	283	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	284	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	285	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	286	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	287	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	288	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	289	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	290
	291
	292	# 3D videos
	293	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	294	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	295	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	296	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	297	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	298	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	299	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	300
	301	# Apple HTTP Live Streaming
	302	'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	303	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	304	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	305	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	306	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	307	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	308	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	309	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	310
	311	# DASH mp4 video
	312	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	313	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	314	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	315	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	316	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	317	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
	318	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	319	'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	320	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	321	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
	322	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
	323	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	324
	325	# Dash mp4 audio
	326	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
	327	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
	328	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
	329	'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
	330	'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
	331
	332	# Dash webm
	333	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	334	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	335	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	336	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	337	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	338	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	339	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
	340	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	341	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	342	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	343	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	344	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	345	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	346	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	347	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	348	# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
	349	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	350	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	351	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	352	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	353	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	354	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	355
	356	# Dash webm audio
	357	'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	358	'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	359
	360	# Dash webm audio with opus inside
	361	'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	362	'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	363	'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	364
	365	# RTMP (unnamed)
	366	'_rtmp': {'protocol': 'rtmp'},
	367	}
	368	_SUBTITLE_FORMATS = ('ttml', 'vtt')
	369
	370	IE_NAME = 'youtube'
	371	_TESTS = [
	372	{
	373	'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
	374	'info_dict': {
	375	'id': 'BaW_jenozKc',
	376	'ext': 'mp4',
	377	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	378	'uploader': 'Philipp Hagemeister',
	379	'uploader_id': 'phihag',
	380	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
	381	'upload_date': '20121002',
	382	'license': 'Standard YouTube License',
	383	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	384	'categories': ['Science & Technology'],
	385	'tags': ['youtube-dl'],
	386	'like_count': int,
	387	'dislike_count': int,
	388	'start_time': 1,
	389	'end_time': 9,
	390	}
	391	},
	392	{
	393	'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
	394	'note': 'Test generic use_cipher_signature video (#897)',
	395	'info_dict': {
	396	'id': 'UxxajLWwzqY',
	397	'ext': 'mp4',
	398	'upload_date': '20120506',
	399	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	400	'alt_title': 'I Love It (feat. Charli XCX)',
	401	'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
	402	'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
	403	'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
	404	'iconic ep', 'iconic', 'love', 'it'],
	405	'uploader': 'Icona Pop',
	406	'uploader_id': 'IconaPop',
	407	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
	408	'license': 'Standard YouTube License',
	409	'creator': 'Icona Pop',
	410	}
	411	},
	412	{
	413	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	414	'note': 'Test VEVO video with age protection (#956)',
	415	'info_dict': {
	416	'id': '07FYdnEawAQ',
	417	'ext': 'mp4',
	418	'upload_date': '20130703',
	419	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	420	'alt_title': 'Tunnel Vision',
	421	'description': 'md5:64249768eec3bc4276236606ea996373',
	422	'uploader': 'justintimberlakeVEVO',
	423	'uploader_id': 'justintimberlakeVEVO',
	424	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
	425	'license': 'Standard YouTube License',
	426	'creator': 'Justin Timberlake',
	427	'age_limit': 18,
	428	}
	429	},
	430	{
	431	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	432	'note': 'Embed-only video (#1746)',
	433	'info_dict': {
	434	'id': 'yZIXLfi8CZQ',
	435	'ext': 'mp4',
	436	'upload_date': '20120608',
	437	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	438	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	439	'uploader': 'SET India',
	440	'uploader_id': 'setindia',
	441	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
	442	'license': 'Standard YouTube License',
	443	'age_limit': 18,
	444	}
	445	},
	446	{
	447	'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
	448	'note': 'Use the first video ID in the URL',
	449	'info_dict': {
	450	'id': 'BaW_jenozKc',
	451	'ext': 'mp4',
	452	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	453	'uploader': 'Philipp Hagemeister',
	454	'uploader_id': 'phihag',
	455	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
	456	'upload_date': '20121002',
	457	'license': 'Standard YouTube License',
	458	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	459	'categories': ['Science & Technology'],
	460	'tags': ['youtube-dl'],
	461	'like_count': int,
	462	'dislike_count': int,
	463	},
	464	'params': {
	465	'skip_download': True,
	466	},
	467	},
	468	{
	469	'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
	470	'note': '256k DASH audio (format 141) via DASH manifest',
	471	'info_dict': {
	472	'id': 'a9LDPn-MO4I',
	473	'ext': 'm4a',
	474	'upload_date': '20121002',
	475	'uploader_id': '8KVIDEO',
	476	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
	477	'description': '',
	478	'uploader': '8KVIDEO',
	479	'license': 'Standard YouTube License',
	480	'title': 'UHDTV TEST 8K VIDEO.mp4'
	481	},
	482	'params': {
	483	'youtube_include_dash_manifest': True,
	484	'format': '141',
	485	},
	486	'skip': 'format 141 not served anymore',
	487	},
	488	# DASH manifest with encrypted signature
	489	{
	490	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	491	'info_dict': {
	492	'id': 'IB3lcPjvWLA',
	493	'ext': 'm4a',
	494	'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
	495	'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
	496	'uploader': 'AfrojackVEVO',
	497	'uploader_id': 'AfrojackVEVO',
	498	'upload_date': '20131011',
	499	'license': 'Standard YouTube License',
	500	},

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

18

compat_chr,

19

compat_parse_qs,

20

compat_urllib_parse_unquote,

21

compat_urllib_parse_unquote_plus,

22

compat_urllib_parse_urlencode,

23

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

clean_html,

error_to_compat_str,

ExtractorError,

float_or_none,

get_element_by_attribute,

get_element_by_id,

int_or_none,

mimetype2ext,

orderedSet,

parse_duration,

remove_quotes,

remove_start,

sanitized_Request,

smuggle_url,

str_to_int,

unescapeHTML,

unified_strdate,

unsmuggle_url,

uppercase_escape,

urlencode_postdata,

ISO3166Utils,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

53

"""Provide base functions for Youtube extractors"""

54

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

55

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

56

_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'

57

_NETRC_MACHINE = 'youtube'

58

# If True it will raise an error if no login info is provided

59

_LOGIN_REQUIRED = False

60

61

def _set_language(self):

62

self._set_cookie(

63

'.youtube.com', 'PREF', 'f1=50000000&hl=en',

64

# YouTube sets the expire time to about two months

65

expire_time=time.time() + 2 * 30 * 24 * 3600)

66

67

def _ids_to_results(self, ids):

68

return [

69

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

75

True is returned if successful or skipped.

76

False is returned if login failed.

77

78

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

79

"""

80

(username, password) = self._get_login_info()

81

# No authentication to be performed

82

if username is None:

83

if self._LOGIN_REQUIRED:

84

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

85

return True

86

87

login_page = self._download_webpage(

88

self._LOGIN_URL, None,

89

note='Downloading login page',

90

errnote='unable to fetch login page', fatal=False)

91

if login_page is False:

92

return

93

94

login_form = self._hidden_inputs(login_page)

95

96

login_form.update({

97

'checkConnection': 'youtube',

'Email': username,

'Passwd': password,

})

login_results = self._download_webpage(

103

self._PASSWORD_CHALLENGE_URL, None,

104

note='Logging in', errnote='unable to log in', fatal=False,

105

data=urlencode_postdata(login_form))

106

if login_results is False:

107

return False

108

109

error_msg = self._html_search_regex(

110

r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',

111

login_results, 'error message', default=None)

112

if error_msg:

113

raise ExtractorError('Unable to login: %s' % error_msg, expected=True)

114

115

if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:

116

raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)

117

118

# Two-Factor

119

# TODO add SMS and phone call support - these require making a request and then prompting the user

120

121

if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:

122

tfa_code = self._get_tfa_info('2-step verification code')

123

124

if not tfa_code:

125

self._downloader.report_warning(

126

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

127

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

128

return False

129

130

tfa_code = remove_start(tfa_code, 'G-')

131

132

tfa_form_strs = self._form_hidden_inputs('challenge', login_results)

133

134

tfa_form_strs.update({

'Pin': tfa_code,

'TrustDevice': 'on',

})

tfa_data = urlencode_postdata(tfa_form_strs)

140

141

tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)

142

tfa_results = self._download_webpage(

143

tfa_req, None,

144

note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)

145

146

if tfa_results is False:

147

return False

148

149

if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:

150

self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')

151

return False

152

if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:

153

self._downloader.report_warning('unable to log in - did the page structure change?')

154

return False

155

if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:

156

self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')

157

return False

158

159

if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:

160

self._downloader.report_warning('unable to log in: bad username or password')

return False

return True

def _real_initialize(self):

165

if self._downloader is None:

166

return

167

self._set_language()

168

if not self._login():

return

class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):

173

# Extract entries from page with "Load more" button

174

def _entries(self, page, playlist_id):

175

more_widget_html = content_html = page

176

for page_num in itertools.count(1):

177

for entry in self._process_page(content_html):

178

yield entry

179

180

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

more = self._download_json(

185

'https://youtube.com/%s' % mobj.group('more'), playlist_id,

186

'Downloading page #%s' % page_num,

187

transform_source=uppercase_escape)

188

content_html = more['content_html']

189

if not content_html.strip():

190

# Some webpages show a "Load more" button but they don't

191

# have more videos

192

break

193

more_widget_html = more['load_more_widget_html']

194

195

196

class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

197

def _process_page(self, content):

198

for video_id, video_title in self.extract_videos_from_page(content):

199

yield self.url_result(video_id, 'Youtube', video_id, video_title)

200

201

def extract_videos_from_page(self, page):

202

ids_in_page = []

203

titles_in_page = []

204

for mobj in re.finditer(self._VIDEO_RE, page):

205

# The link with index 0 is not the first video of the playlist (not sure if still actual)

206

if 'index' in mobj.groupdict() and mobj.group('id') == '0':

207

continue

208

video_id = mobj.group('id')

209

video_title = unescapeHTML(mobj.group('title'))

210

if video_title:

211

video_title = video_title.strip()

212

try:

213

idx = ids_in_page.index(video_id)

214

if video_title and not titles_in_page[idx]:

215

titles_in_page[idx] = video_title

216

except ValueError:

217

ids_in_page.append(video_id)

218

titles_in_page.append(video_title)

219

return zip(ids_in_page, titles_in_page)

220

221

222

class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

223

def _process_page(self, content):

224

for playlist_id in orderedSet(re.findall(

225

r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',

226

content)):

227

yield self.url_result(

228

'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

229

230

def _real_extract(self, url):

231

playlist_id = self._match_id(url)

232

webpage = self._download_webpage(url, playlist_id)

233

title = self._og_search_title(webpage, fatal=False)

234

return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)

235

236

237

class YoutubeIE(YoutubeBaseInfoExtractor):

238

IE_DESC = 'YouTube.com'

239

_VALID_URL = r"""(?x)^

240

(

241

(?:https?://|//) # http(s):// or protocol-independent URL

242

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|

243

(?:www\.)?deturl\.com/www\.youtube\.com/|

244

(?:www\.)?pwnyoutube\.com/|

245

(?:www\.)?yourepeat\.com/|

246

tube\.majestyc\.net/|

247

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

248

(?:.*?\#/)? # handle anchor (#/) redirect urls

249

(?: # the various things that can precede the ID:

250

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

251

|(?: # or the v= param in all its forms

252

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

253

(?:\?|\#!?) # the params delimiter ? or # or #!

254

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

260

vid\.plus| # or vid.plus/xxxx

261

zwearz\.com/watch| # or zwearz.com/watch/xxxx

262

)/

263

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

264

)

265

)? # all until now is optional -> you can pass the naked ID

266

([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

267

(?!.*?\blist=) # combined list/video URLs are handled by the playlist IE

268

(?(1).+)? # if we found the ID, everything can follow

269

$"""

270

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

271

_formats = {

272

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

273

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

274

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

275

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

276

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

277

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

278

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

279

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

280

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

281

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

282

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

283

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

284

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

285

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

286

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

287

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

288

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

289

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

294

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

295

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

296

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

297

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

298

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

299

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

300

301

# Apple HTTP Live Streaming

302

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

303

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

304

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

305

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

306

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

307

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

308

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

309

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

310

311

# DASH mp4 video

312

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

313

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

314

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

315

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

316

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

317

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)

318

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

319

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

320

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

321

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},

322

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},

323

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

324

325

# Dash mp4 audio

326

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},

327

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},

328

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},

329

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},

330

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},

331

332

# Dash webm

333

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

334

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

335

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

336

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

337

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

338

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

339

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},

340

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

341

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

342

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

343

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

344

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

345

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

346

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

347

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

348

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

349

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

350

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

351

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

352

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

353

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

354

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

355

356

# Dash webm audio

357

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},

358

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},

359

360

# Dash webm audio with opus inside

361

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},

362

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},

363

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},

364

365

# RTMP (unnamed)

366

'_rtmp': {'protocol': 'rtmp'},

367

}

368

_SUBTITLE_FORMATS = ('ttml', 'vtt')

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

378

'uploader': 'Philipp Hagemeister',

379

'uploader_id': 'phihag',

380

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

381

'upload_date': '20121002',

382

'license': 'Standard YouTube License',

383

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

384

'categories': ['Science & Technology'],

385

'tags': ['youtube-dl'],

386

'like_count': int,

387

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',

394

'note': 'Test generic use_cipher_signature video (#897)',

'info_dict': {

'id': 'UxxajLWwzqY',

'ext': 'mp4',

'upload_date': '20120506',

399

'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',

400

'alt_title': 'I Love It (feat. Charli XCX)',

401

'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',

402

'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',

403

'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',

404

'iconic ep', 'iconic', 'love', 'it'],

405

'uploader': 'Icona Pop',

406

'uploader_id': 'IconaPop',

407

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',

408

'license': 'Standard YouTube License',

409

'creator': 'Icona Pop',

}

},

{

'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',

414

'note': 'Test VEVO video with age protection (#956)',

'info_dict': {

'id': '07FYdnEawAQ',

'ext': 'mp4',

'upload_date': '20130703',

419

'title': 'Justin Timberlake - Tunnel Vision (Explicit)',

420

'alt_title': 'Tunnel Vision',

421

'description': 'md5:64249768eec3bc4276236606ea996373',

422

'uploader': 'justintimberlakeVEVO',

423

'uploader_id': 'justintimberlakeVEVO',

424

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',

425

'license': 'Standard YouTube License',

426

'creator': 'Justin Timberlake',

'age_limit': 18,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

432

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

437

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

438

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

439

'uploader': 'SET India',

440

'uploader_id': 'setindia',

441

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

442

'license': 'Standard YouTube License',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',

448

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

453

'uploader': 'Philipp Hagemeister',

454

'uploader_id': 'phihag',

455

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

456

'upload_date': '20121002',

457

'license': 'Standard YouTube License',

458

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

459

'categories': ['Science & Technology'],

460

'tags': ['youtube-dl'],

461

'like_count': int,

462

'dislike_count': int,

463

},

464

'params': {

465

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

470

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

475

'uploader_id': '8KVIDEO',

476

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

477

'description': '',

478

'uploader': '8KVIDEO',

479

'license': 'Standard YouTube License',

480

'title': 'UHDTV TEST 8K VIDEO.mp4'

481

},

482

'params': {

483

'youtube_include_dash_manifest': True,

484

'format': '141',

485

},

486

'skip': 'format 141 not served anymore',

487

},

488

# DASH manifest with encrypted signature

489

{

490

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',

495

'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',

496

'uploader': 'AfrojackVEVO',

497

'uploader_id': 'AfrojackVEVO',

498

'upload_date': '20131011',

499

'license': 'Standard YouTube License',

500

},

501

'params': {

502

'youtube_include_dash_manifest': True,

503

'format': '141/bestaudio[ext=m4a]',

504

},

505

},

506

# JS player signature function name containing $

507

{

508

'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',

'info_dict': {

'id': 'nfWlot6h_JM',

'ext': 'm4a',

'title': 'Taylor Swift - Shake It Off',

513

'alt_title': 'Shake It Off',

514

'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',

515

'uploader': 'TaylorSwiftVEVO',

516

'uploader_id': 'TaylorSwiftVEVO',

517

'upload_date': '20140818',

518

'license': 'Standard YouTube License',

519

'creator': 'Taylor Swift',

520

},

521

'params': {

522

'youtube_include_dash_manifest': True,

523

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'upload_date': '20100909',

533

'uploader': 'The Amazing Atheist',

534

'uploader_id': 'TheAmazingAtheist',

535

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

536

'license': 'Standard YouTube License',

537

'title': 'Burning Everyone\'s Koran',

538

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

539

}

540

},

541

# Normal age-gate video (No vevo, embed allowed)

542

{

543

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

548

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

549

'uploader': 'The Witcher',

550

'uploader_id': 'WitcherGame',

551

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

552

'upload_date': '20140605',

553

'license': 'Standard YouTube License',

'age_limit': 18,

},

},

# Age-gate video with encrypted signature

558

{

559

'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',

'info_dict': {

'id': '6kLq3WMV1nU',

'ext': 'mp4',

'title': 'Dedication To My Ex (Miss That) (Lyric Video)',

564

'description': 'md5:33765bb339e1b47e7e72b5490139bb41',

565

'uploader': 'LloydVEVO',

566

'uploader_id': 'LloydVEVO',

567

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',

568

'upload_date': '20110629',

569

'license': 'Standard YouTube License',

'age_limit': 18,

},

},

# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)

574

{

575

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'upload_date': '20100430',

580

'uploader_id': 'deadmau5',

581

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

582

'creator': 'deadmau5',

583

'description': 'md5:12c56784b8032162bb936a5f76d55360',

584

'uploader': 'deadmau5',

585

'license': 'Standard YouTube License',

586

'title': 'Deadmau5 - Some Chords (HD)',

587

'alt_title': 'Some Chords',

588

},

589

'expected_warnings': [

590

'DASH manifest missing',

591

]

592

},

593

# Olympics (https://github.com/rg3/youtube-dl/issues/4431)

594

{

595

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'upload_date': '20150827',

600

'uploader_id': 'olympic',

601

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

602

'license': 'Standard YouTube License',

603

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

604

'uploader': 'Olympic',

605

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

606

},

607

'params': {

608

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

618

'upload_date': '20110310',

619

'uploader_id': 'AllenMeow',

620

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

621

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

622

'uploader': '孫艾倫',

623

'license': 'Standard YouTube License',

624

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

625

},

626

},

627

# url_encoded_fmt_stream_map is empty string

628

{

629

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

634

'description': '',

635

'upload_date': '20150404',

636

'uploader_id': 'spbelect',

637

'uploader': 'Наблюдатели Петербурга',

638

},

639

'params': {

640

'skip_download': 'requires avconv',

641

},

642

'skip': 'This live event has ended.',

643

},

644

# Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)

645

{

646

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'mp4',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

651

'description': 'md5:116377fd2963b81ec4ce64b542173306',

652

'upload_date': '20150625',

653

'uploader_id': 'dorappi2000',

654

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

655

'uploader': 'dorappi2000',

656

'license': 'Standard YouTube License',

657

'formats': 'mincount:32',

658

},

659

},

660

# DASH manifest with segment_list

661

{

662

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

663

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

668

'uploader': 'Airtek',

669

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

670

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

671

'license': 'Standard YouTube License',

672

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

673

},

674

'params': {

675

'youtube_include_dash_manifest': True,

676

'format': '135', # bestvideo

677

},

678

'skip': 'This live event has ended.',

679

},

680

{

681

# Multifeed videos (multiple cameras), URL is for Main Camera

682

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

683

'info_dict': {

684

'id': 'jqWvoWXjCVs',

685

'title': 'teamPGP: Rocket League Noob Stream',

686

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

693

'description': 'md5:dc7872fb300e143831327f1bae3af010',

694

'upload_date': '20150721',

695

'uploader': 'Beer Games Beer',

696

'uploader_id': 'beergamesbeer',

697

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

698

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

705

'description': 'md5:dc7872fb300e143831327f1bae3af010',

706

'upload_date': '20150721',

707

'uploader': 'Beer Games Beer',

708

'uploader_id': 'beergamesbeer',

709

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

710

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

717

'description': 'md5:dc7872fb300e143831327f1bae3af010',

718

'upload_date': '20150721',

719

'uploader': 'Beer Games Beer',

720

'uploader_id': 'beergamesbeer',

721

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

722

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

729

'description': 'md5:dc7872fb300e143831327f1bae3af010',

730

'upload_date': '20150721',

731

'uploader': 'Beer Games Beer',

732

'uploader_id': 'beergamesbeer',

733

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

734

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

},

},

{

# Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)

743

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

744

'info_dict': {

745

'id': 'gVfLd0zydlo',

746

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

747

},

748

'playlist_count': 2,

749

'skip': 'Not multifeed anymore',

750

},

751

{

752

'url': 'https://vid.plus/FlRa-iH7PGw',

753

'only_matching': True,

754

},

755

{

756

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

757

'only_matching': True,

758

},

759

{

760

# Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)

761

# Also tests cut-off URL expansion in video description (see

762

# https://github.com/rg3/youtube-dl/issues/1892,

763

# https://github.com/rg3/youtube-dl/issues/8164)

764

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

769

'alt_title': 'Dark Walk',

770

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

771

'upload_date': '20151119',

772

'uploader_id': 'IronSoulElf',

773

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

774

'uploader': 'IronSoulElf',

775

'license': 'Standard YouTube License',

776

'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',

777

},

778

'params': {

779

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)

784

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

785

'only_matching': True,

786

},

787

{

788

# Video with yt:stretch=17:0

789

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

794

'description': 'md5:ee18a25c350637c8faff806845bddee9',

795

'upload_date': '20151107',

796

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

797

'uploader': 'CH GAMER DROID',

798

},

799

'params': {

800

'skip_download': True,

801

},

802

'skip': 'This video does not exist.',

803

},

804

{

805

# Video licensed under Creative Commons

806

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

811

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

812

'upload_date': '20150127',

813

'uploader_id': 'BerkmanCenter',

814

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

815

'uploader': 'BerkmanCenter',

816

'license': 'Creative Commons Attribution license (reuse allowed)',

817

},

818

'params': {

819

'skip_download': True,

},

},

{

# Channel-like uploader_url

824

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

829

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

830

'upload_date': '20151119',

831

'uploader': 'Bernie 2016',

832

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

833

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

834

'license': 'Creative Commons Attribution license (reuse allowed)',

835

},

836

'params': {

837

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

842

'only_matching': True,

843

},

844

{

845

# YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)

846

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

847

'only_matching': True,

848

},

849

{

850

# Rental video preview

851

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

856

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

857

'upload_date': '20150811',

858

'uploader': 'FlixMatrix',

859

'uploader_id': 'FlixMatrixKaravan',

860

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

861

'license': 'Standard YouTube License',

862

},

863

'params': {

864

'skip_download': True,

},

},

{

# YouTube Red video with episode data

869

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

874

'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822',

875

'upload_date': '20170118',

876

'uploader': 'Vsauce',

877

'uploader_id': 'Vsauce',

878

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

879

'license': 'Standard YouTube License',

880

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

886

},

887

'expected_warnings': [

888

'Skipping DASH manifest',

],

},

{

# itag 212

'url': '1t24XAntNCY',

894

'only_matching': True,

}

]

def __init__(self, *args, **kwargs):

899

super(YoutubeIE, self).__init__(*args, **kwargs)

900

self._player_cache = {}

901

902

def report_video_info_webpage_download(self, video_id):

903

"""Report attempt to download video info webpage."""

904

self.to_screen('%s: Downloading video info webpage' % video_id)

905

906

def report_information_extraction(self, video_id):

907

"""Report attempt to extract video information."""

908

self.to_screen('%s: Extracting video information' % video_id)

909

910

def report_unavailable_format(self, video_id, format):

911

"""Report extracted video URL."""

912

self.to_screen('%s: Format %s not available' % (video_id, format))

913

914

def report_rtmp_download(self):

915

"""Indicate the download will use the RTMP protocol."""

916

self.to_screen('RTMP download detected')

917

918

def _signature_cache_id(self, example_sig):

919

""" Return a string representation of a signature """

920

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

921

922

def _extract_signature_function(self, video_id, player_url, example_sig):

923

id_m = re.match(

924

r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',

925

player_url)

926

if not id_m:

927

raise ExtractorError('Cannot identify player %r' % player_url)

928

player_type = id_m.group('ext')

929

player_id = id_m.group('id')

930

931

# Read from filesystem cache

932

func_id = '%s_%s_%s' % (

933

player_type, player_id, self._signature_cache_id(example_sig))

934

assert os.path.basename(func_id) == func_id

935

936

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

937

if cache_spec is not None:

938

return lambda s: ''.join(s[i] for i in cache_spec)

939

940

download_note = (

941

'Downloading player %s' % player_url

942

if self._downloader.params.get('verbose') else

943

'Downloading %s player %s' % (player_type, player_id)

944

)

945

if player_type == 'js':

946

code = self._download_webpage(

947

player_url, video_id,

948

note=download_note,

949

errnote='Download of %s failed' % player_url)

950

res = self._parse_sig_js(code)

951

elif player_type == 'swf':

952

urlh = self._request_webpage(

953

player_url, video_id,

954

note=download_note,

955

errnote='Download of %s failed' % player_url)

956

code = urlh.read()

957

res = self._parse_sig_swf(code)

958

else:

959

assert False, 'Invalid player type %r' % player_type

960

961

test_string = ''.join(map(compat_chr, range(len(example_sig))))

962

cache_res = res(test_string)

963

cache_spec = [ord(c) for c in cache_res]

964

965

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

966

return res

967

968

def _print_sig_code(self, func, example_sig):

969

def gen_sig_code(idxs):

970

def _genslice(start, end, step):

971

starts = '' if start == 0 else str(start)

972

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

973

steps = '' if step == 1 else (':%d' % step)

974

return 's[%s%s%s]' % (starts, ends, steps)

975

976

step = None

977

# Quelch pyflakes warnings - start will be set when step is set

978

start = '(Never used)'

979

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

984

step = None

985

continue

986

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

996

997

test_string = ''.join(map(compat_chr, range(len(example_sig))))

998

cache_res = func(test_string)

999

cache_spec = [ord(c) for c in cache_res]

1000

expr_code = ' + '.join(gen_sig_code(cache_spec))

1001

signature_id_tuple = '(%s)' % (

1002

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1003

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1004

' return %s\n') % (signature_id_tuple, expr_code)

1005

self.to_screen('Extracted signature function:\n' + code)

1006

1007

def _parse_sig_js(self, jscode):

1008

funcname = self._search_regex(

1009

r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,

1010

'Initial JS player signature function name')

1011

1012

jsi = JSInterpreter(jscode)

1013

initial_function = jsi.extract_function(funcname)

1014

return lambda s: initial_function([s])

1015

1016

def _parse_sig_swf(self, file_contents):

1017

swfi = SWFInterpreter(file_contents)

1018

TARGET_CLASSNAME = 'SignatureDecipher'

1019

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1020

initial_function = swfi.extract_function(searched_class, 'decipher')

1021

return lambda s: initial_function([s])

1022

1023

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1024

"""Turn the encrypted s field into a working signature"""

1025

1026

if player_url is None:

1027

raise ExtractorError('Cannot decrypt signature without player_url')

1028

1029

if player_url.startswith('//'):

1030

player_url = 'https:' + player_url

1031

try:

1032

player_id = (player_url, self._signature_cache_id(s))

1033

if player_id not in self._player_cache:

1034

func = self._extract_signature_function(

1035

video_id, player_url, s

1036

)

1037

self._player_cache[player_id] = func

1038

func = self._player_cache[player_id]

1039

if self._downloader.params.get('youtube_print_sig_code'):

1040

self._print_sig_code(func, s)

1041

return func(s)

1042

except Exception as e:

1043

tb = traceback.format_exc()

1044

raise ExtractorError(

1045

'Signature extraction failed: ' + tb, cause=e)

1046

1047

def _get_subtitles(self, video_id, webpage):

1048

try:

1049

subs_doc = self._download_xml(

1050

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1051

video_id, note=False)

1052

except ExtractorError as err:

1053

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1058

lang = track.attrib['lang_code']

1059

if lang in sub_lang_list:

1060

continue

1061

sub_formats = []

1062

for ext in self._SUBTITLE_FORMATS:

1063

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1068

})

1069

sub_formats.append({

1070

'url': 'https://www.youtube.com/api/timedtext?' + params,

1071

'ext': ext,

1072

})

1073

sub_lang_list[lang] = sub_formats

1074

if not sub_lang_list:

1075

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1080

patterns = (

1081

# User data may contain arbitrary character sequences that may affect

1082

# JSON extraction with regex, e.g. when '};' is contained the second

1083

# regex won't capture the whole JSON. Yet working around by trying more

1084

# concrete regex first keeping in mind proper quoted string handling

1085

# to be implemented in future that will replace this workaround (see

1086

# https://github.com/rg3/youtube-dl/issues/7468,

1087

# https://github.com/rg3/youtube-dl/pull/7599)

1088

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1089

r';ytplayer\.config\s*=\s*({.+?});',

1090

)

1091

config = self._search_regex(

1092

patterns, webpage, 'ytplayer.config', default=None)

1093

if config:

1094

return self._parse_json(

1095

uppercase_escape(config), video_id, fatal=False)

1096

1097

def _get_automatic_captions(self, video_id, webpage):

1098

"""We need the webpage for getting the captions url, pass it as an

1099

argument to speed up the process."""

1100

self.to_screen('%s: Looking for automatic captions' % video_id)

1101

player_config = self._get_ytplayer_config(video_id, webpage)

1102

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1103

if not player_config:

1104

self._downloader.report_warning(err_msg)

1105

return {}

1106

try:

1107

args = player_config['args']

1108

caption_url = args.get('ttsurl')

1109

if caption_url:

1110

timestamp = args['timestamp']

1111

# We get the available subtitles

1112

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1118

caption_list = self._download_xml(list_url, video_id)

1119

original_lang_node = caption_list.find('track')

1120

if original_lang_node is None:

1121

self._downloader.report_warning('Video doesn\'t have automatic captions')

1122

return {}

1123

original_lang = original_lang_node.attrib['lang_code']

1124

caption_kind = original_lang_node.attrib.get('kind', '')

1125

1126

sub_lang_list = {}

1127

for lang_node in caption_list.findall('target'):

1128

sub_lang = lang_node.attrib['lang_code']

1129

sub_formats = []

1130

for ext in self._SUBTITLE_FORMATS:

1131

params = compat_urllib_parse_urlencode({

1132

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1137

})

1138

sub_formats.append({

1139

'url': caption_url + '&' + params,

1140

'ext': ext,

1141

})

1142

sub_lang_list[sub_lang] = sub_formats

1143

return sub_lang_list

1144

1145

# Some videos don't provide ttsurl but rather caption_tracks and

1146

# caption_translation_languages (e.g. 20LmZk1hakA)

1147

caption_tracks = args['caption_tracks']

1148

caption_translation_languages = args['caption_translation_languages']

1149

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1150

parsed_caption_url = compat_urllib_parse_urlparse(caption_url)

1151

caption_qs = compat_parse_qs(parsed_caption_url.query)

1152

1153

sub_lang_list = {}

1154

for lang in caption_translation_languages.split(','):

1155

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1156

sub_lang = lang_qs.get('lc', [None])[0]

if not sub_lang:

continue

sub_formats = []

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(

1166

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

sub_lang_list[sub_lang] = sub_formats

1172

return sub_lang_list

1173

# An extractor error can be raise by the download process if there are

1174

# no automatic captions but there are subtitles

1175

except (KeyError, ExtractorError):

1176

self._downloader.report_warning(err_msg)

1177

return {}

1178

1179

def _mark_watched(self, video_id, video_info):

1180

playback_url = video_info.get('videostats_playback_base_url', [None])[0]

1181

if not playback_url:

1182

return

1183

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1184

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1185

1186

# cpn generation algorithm is reverse engineered from base.js.

1187

# In fact it works even with dummy cpn.

1188

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1189

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1196

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1197

1198

self._download_webpage(

1199

playback_url, video_id, 'Marking watched',

1200

'Unable to mark watched', fatal=False)

1201

1202

@classmethod

1203

def extract_id(cls, url):

1204

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1205

if mobj is None:

1206

raise ExtractorError('Invalid URL: %s' % url)

1207

video_id = mobj.group(2)

1208

return video_id

1209

1210

def _extract_from_m3u8(self, manifest_url, video_id):

1211

url_map = {}

1212

1213

def _get_urls(_manifest):

1214

lines = _manifest.split('\n')

1215

urls = filter(lambda l: l and not l.startswith('#'),

1216

lines)

1217

return urls

1218

manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')

1219

formats_urls = _get_urls(manifest)

1220

for format_url in formats_urls:

1221

itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')

1222

url_map[itag] = format_url

1223

return url_map

1224

1225

def _extract_annotations(self, video_id):

1226

url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id

1227

return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')

1228

1229

def _real_extract(self, url):

1230

url, smuggled_data = unsmuggle_url(url, {})

1231

1232

proto = (

1233

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1239

for component in [parsed_url.fragment, parsed_url.query]:

1240

query = compat_parse_qs(component)

1241

if start_time is None and 't' in query:

1242

start_time = parse_duration(query['t'][0])

1243

if start_time is None and 'start' in query:

1244

start_time = parse_duration(query['start'][0])

1245

if end_time is None and 'end' in query:

1246

end_time = parse_duration(query['end'][0])

1247

1248

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1249

mobj = re.search(self._NEXT_URL_RE, url)

1250

if mobj:

1251

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1252

video_id = self.extract_id(url)

1253

1254

# Get video webpage

1255

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1256

video_webpage = self._download_webpage(url, video_id)

1257

1258

# Attempt to extract SWF player URL

1259

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1260

if mobj is not None:

1261

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1268

dash_mpd = video_info.get('dashmpd')

1269

if dash_mpd and dash_mpd[0] not in dash_mpds:

1270

dash_mpds.append(dash_mpd[0])

# Get video info

embed_webpage = None

is_live = None

if re.search(r'player-age-gate-content">', video_webpage) is not None:

1276

age_gate = True

1277

# We simulate the access to the video from www.youtube.com/v/{video_id}

1278

# this can be viewed without login into Youtube

1279

url = proto + '://www.youtube.com/embed/%s' % video_id

1280

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1281

data = compat_urllib_parse_urlencode({

1282

'video_id': video_id,

1283

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1284

'sts': self._search_regex(

1285

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1286

})

1287

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1288

video_info_webpage = self._download_webpage(

1289

video_info_url, video_id,

1290

note='Refetching age-gated info webpage',

1291

errnote='unable to download video info webpage')

1292

video_info = compat_parse_qs(video_info_webpage)

1293

add_dash_mpd(video_info)

else:

age_gate = False

video_info = None

# Try looking directly into the video webpage

1298

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1299

if ytplayer_config:

1300

args = ytplayer_config['args']

1301

if args.get('url_encoded_fmt_stream_map'):

1302

# Convert to the same format returned by compat_parse_qs

1303

video_info = dict((k, [v]) for k, v in args.items())

1304

add_dash_mpd(video_info)

1305

# Rental video is not rented but preview is available (e.g.

1306

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1307

# https://github.com/rg3/youtube-dl/issues/10532)

1308

if not video_info and args.get('ypc_vid'):

1309

return self.url_result(

1310

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1311

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1312

is_live = True

1313

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1314

# We also try looking in get_video_info since it may contain different dashmpd

1315

# URL that points to a DASH manifest with possibly different itag set (some itags

1316

# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH

1317

# manifest pointed by get_video_info's dashmpd).

1318

# The general idea is to take a union of itags of both DASH manifests (for example

1319

# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)

1320

self.report_video_info_webpage_download(video_id)

1321

for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:

1322

video_info_url = (

1323

'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'

1324

% (proto, video_id, el_type))

1325

video_info_webpage = self._download_webpage(

1326

video_info_url,

1327

video_id, note=False,

1328

errnote='unable to download video info webpage')

1329

get_video_info = compat_parse_qs(video_info_webpage)

1330

if get_video_info.get('use_cipher_signature') != ['True']:

1331

add_dash_mpd(get_video_info)

1332

if not video_info:

1333

video_info = get_video_info

1334

if 'token' in get_video_info:

1335

# Different get_video_info requests may report different results, e.g.

1336

# some may report video unavailability, but some may serve it without

1337

# any complaint (see https://github.com/rg3/youtube-dl/issues/7362,

1338

# the original webpage as well as el=info and el=embedded get_video_info

1339

# requests report video unavailability due to geo restriction while

1340

# el=detailpage succeeds and returns valid data). This is probably

1341

# due to YouTube measures against IP ranges of hosting providers.

1342

# Working around by preferring the first succeeded video_info containing

1343

# the token if no such video_info yet was found.

1344

if 'token' not in video_info:

1345

video_info = get_video_info

1346

break

1347

if 'token' not in video_info:

1348

if 'reason' in video_info:

1349

if 'The uploader has not made this video available in your country.' in video_info['reason']:

1350

regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)

1351

if regions_allowed:

1352

raise ExtractorError('YouTube said: This video is available in %s only' % (

1353

', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),

1354

expected=True)

1355

raise ExtractorError(

1356

'YouTube said: %s' % video_info['reason'][0],

1357

expected=True, video_id=video_id)

1358

else:

1359

raise ExtractorError(

1360

'"token" parameter not in video info for unknown reason',

video_id=video_id)

# title

if 'title' in video_info:

1365

video_title = video_info['title'][0]

1366

else:

1367

self._downloader.report_warning('Unable to extract video title')

video_title = '_'

# description

video_description = get_element_by_id("eow-description", video_webpage)

1372

if video_description:

1373

video_description = re.sub(r'''(?x)

1374

<a\s+

1375

(?:[a-zA-Z-]+="[^"]*"\s+)*?

1376

(?:title|href)="([^"]+)"\s+

1377

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', r'\1', video_description)

1382

video_description = clean_html(video_description)

1383

else:

1384

fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)

1385

if fd_mobj:

1386

video_description = unescapeHTML(fd_mobj.group(1))

1387

else:

1388

video_description = ''

1389

1390

if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):

1391

if not self._downloader.params.get('noplaylist'):

1392

entries = []

1393

feed_ids = []

1394

multifeed_metadata_list = video_info['multifeed_metadata_list'][0]

1395

for feed in multifeed_metadata_list.split(','):

1396

# Unquote should take place before split on comma (,) since textual

1397

# fields may contain comma as well (see

1398

# https://github.com/rg3/youtube-dl/issues/8536)

1399

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1400

entries.append({

1401

'_type': 'url_transparent',

1402

'ie_key': 'Youtube',

1403

'url': smuggle_url(

1404

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1405

{'force_singlefeed': True}),

1406

'title': '%s (%s)' % (video_title, feed_data['title'][0]),

1407

})

1408

feed_ids.append(feed_data['id'][0])

1409

self.to_screen(

1410

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1411

% (', '.join(feed_ids), video_id))

1412

return self.playlist_result(entries, video_id, video_title, video_description)

1413

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1414

1415

if 'view_count' in video_info:

1416

view_count = int(video_info['view_count'][0])

else:

view_count = None

# Check for "rental" videos

1421

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

1422

raise ExtractorError('"rental" videos not supported')

1423

1424

# Start extracting information

1425

self.report_information_extraction(video_id)

1426

1427

# uploader

1428

if 'author' not in video_info:

1429

raise ExtractorError('Unable to extract uploader name')

1430

video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])

1431

1432

# uploader_id

1433

video_uploader_id = None

1434

video_uploader_url = None

1435

mobj = re.search(

1436

r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

1437

video_webpage)

1438

if mobj is not None:

1439

video_uploader_id = mobj.group('uploader_id')

1440

video_uploader_url = mobj.group('uploader_url')

1441

else:

1442

self._downloader.report_warning('unable to extract uploader nickname')

1443

1444

# thumbnail image

1445

# We try first to get a high quality image:

1446

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

1447

video_webpage, re.DOTALL)

1448

if m_thumb is not None:

1449

video_thumbnail = m_thumb.group(1)

1450

elif 'thumbnail_url' not in video_info:

1451

self._downloader.report_warning('unable to extract video thumbnail')

1452

video_thumbnail = None

1453

else: # don't panic if we can't find it

1454

video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])

1455

1456

# upload date

1457

upload_date = self._html_search_meta(

1458

'datePublished', video_webpage, 'upload date', default=None)

1459

if not upload_date:

1460

upload_date = self._search_regex(

1461

[r'(?s)id="eow-date.*?>(.*?)</span>',

1462

r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],

1463

video_webpage, 'upload date', default=None)

1464

if upload_date:

1465

upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())

1466

upload_date = unified_strdate(upload_date)

1467

1468

video_license = self._html_search_regex(

1469

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

1470

video_webpage, 'license', default=None)

1471

1472

m_music = re.search(

1473

r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:$.+?$)?</li',

1474

video_webpage)

1475

if m_music:

1476

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

1477

video_creator = clean_html(m_music.group('creator'))

1478

else:

1479

video_alt_title = video_creator = None

1480

1481

m_episode = re.search(

1482

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

1483

video_webpage)

1484

if m_episode:

1485

series = m_episode.group('series')

1486

season_number = int(m_episode.group('season'))

1487

episode_number = int(m_episode.group('episode'))

1488

else:

1489

series = season_number = episode_number = None

1490

1491

m_cat_container = self._search_regex(

1492

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

1493

video_webpage, 'categories', default=None)

1494

if m_cat_container:

1495

category = self._html_search_regex(

1496

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

1497

default=None)

1498

video_categories = None if category is None else [category]

1499

else:

1500

video_categories = None

1501

1502

video_tags = [

1503

unescapeHTML(m.group('content'))

1504

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

1505

1506

def _extract_count(count_name):

1507

return str_to_int(self._search_regex(

1508

r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'

1509

% re.escape(count_name),

1510

video_webpage, count_name, default=None))

1511

1512

like_count = _extract_count('like')

1513

dislike_count = _extract_count('dislike')

1514

1515

# subtitles

1516

video_subtitles = self.extract_subtitles(video_id, video_webpage)

1517

automatic_captions = self.extract_automatic_captions(video_id, video_webpage)

1518

1519

if 'length_seconds' not in video_info:

1520

self._downloader.report_warning('unable to extract video duration')

1521

video_duration = None

1522

else:

1523

video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0]))

1524

1525

# annotations

1526

video_annotations = None

1527

if self._downloader.params.get('writeannotations', False):

1528

video_annotations = self._extract_annotations(video_id)

1529

1530

def _map_to_format_list(urlmap):

1531

formats = []

1532

for itag, video_real_url in urlmap.items():

1533

dct = {

1534

'format_id': itag,

1535

'url': video_real_url,

1536

'player_url': player_url,

1537

}

1538

if itag in self._formats:

1539

dct.update(self._formats[itag])

formats.append(dct)

return formats

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

1544

self.report_rtmp_download()

1545

formats = [{

1546

'format_id': '_rtmp',

1547

'protocol': 'rtmp',

1548

'url': video_info['conn'][0],

1549

'player_url': player_url,

1550

}]

1551

elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:

1552

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

1553

if 'rtmpe%3Dyes' in encoded_url_map:

1554

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)

1555

formats_spec = {}

1556

fmt_list = video_info.get('fmt_list', [''])[0]

1557

if fmt_list:

1558

for fmt in fmt_list.split(','):

1559

spec = fmt.split('/')

1560

if len(spec) > 1:

1561

width_height = spec[1].split('x')

1562

if len(width_height) == 2:

1563

formats_spec[spec[0]] = {

1564

'resolution': spec[1],

1565

'width': int_or_none(width_height[0]),

1566

'height': int_or_none(width_height[1]),

1567

}

1568

formats = []

1569

for url_data_str in encoded_url_map.split(','):

1570

url_data = compat_parse_qs(url_data_str)

1571

if 'itag' not in url_data or 'url' not in url_data:

1572

continue

1573

format_id = url_data['itag'][0]

1574

url = url_data['url'][0]

1575

1576

if 'sig' in url_data:

1577

url += '&signature=' + url_data['sig'][0]

1578

elif 's' in url_data:

1579

encrypted_sig = url_data['s'][0]

1580

ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'

1581

1582

jsplayer_url_json = self._search_regex(

1583

ASSETS_RE,

1584

embed_webpage if age_gate else video_webpage,

1585

'JS player URL (1)', default=None)

1586

if not jsplayer_url_json and not age_gate:

1587

# We need the embed website after all

1588

if embed_webpage is None:

1589

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

1590

embed_webpage = self._download_webpage(

1591

embed_url, video_id, 'Downloading embed webpage')

1592

jsplayer_url_json = self._search_regex(

1593

ASSETS_RE, embed_webpage, 'JS player URL')

1594

1595

player_url = json.loads(jsplayer_url_json)

1596

if player_url is None:

1597

player_url_json = self._search_regex(

1598

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

1599

video_webpage, 'age gate player URL')

1600

player_url = json.loads(player_url_json)

1601

1602

if self._downloader.params.get('verbose'):

1603

if player_url is None:

1604

player_version = 'unknown'

1605

player_desc = 'unknown'

1606

else:

1607

if player_url.endswith('swf'):

1608

player_version = self._search_regex(

1609

r'-(.+?)(?:/watch_as3)?\.swf$', player_url,

1610

'flash player', fatal=False)

1611

player_desc = 'flash player %s' % player_version

1612

else:

1613

player_version = self._search_regex(

1614

[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],

1615

player_url,

1616

'html5 player', fatal=False)

1617

player_desc = 'html5 player %s' % player_version

1618

1619

parts_sizes = self._signature_cache_id(encrypted_sig)

1620

self.to_screen('{%s} signature length %s, %s' %

1621

(format_id, parts_sizes, player_desc))

1622

1623

signature = self._decrypt_signature(

1624

encrypted_sig, video_id, player_url, age_gate)

1625

url += '&signature=' + signature

1626

if 'ratebypass' not in url:

1627

url += '&ratebypass=yes'

1628

1629

dct = {

1630

'format_id': format_id,

1631

'url': url,

1632

'player_url': player_url,

1633

}

1634

if format_id in self._formats:

1635

dct.update(self._formats[format_id])

1636

if format_id in formats_spec:

1637

dct.update(formats_spec[format_id])

1638

1639

# Some itags are not included in DASH manifest thus corresponding formats will

1640

# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).

1641

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

1642

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

1643

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

1644

1645

more_fields = {

1646

'filesize': int_or_none(url_data.get('clen', [None])[0]),

1647

'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),

1648

'width': width,

1649

'height': height,

1650

'fps': int_or_none(url_data.get('fps', [None])[0]),

1651

'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],

1652

}

1653

for key, value in more_fields.items():

1654

if value:

1655

dct[key] = value

1656

type_ = url_data.get('type', [None])[0]

1657

if type_:

1658

type_split = type_.split(';')

1659

kind_ext = type_split[0].split('/')

1660

if len(kind_ext) == 2:

1661

kind, _ = kind_ext

1662

dct['ext'] = mimetype2ext(type_split[0])

1663

if kind in ('audio', 'video'):

1664

codecs = None

1665

for mobj in re.finditer(

1666

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

1667

if mobj.group('key') == 'codecs':

1668

codecs = mobj.group('val')

1669

break

1670

if codecs:

1671

codecs = codecs.split(',')

1672

if len(codecs) == 2:

1673

acodec, vcodec = codecs[1], codecs[0]

1674

else:

1675

acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])

dct.update({

'acodec': acodec,

'vcodec': vcodec,

})

formats.append(dct)

elif video_info.get('hlsvp'):

1682

manifest_url = video_info['hlsvp'][0]

1683

url_map = self._extract_from_m3u8(manifest_url, video_id)

1684

formats = _map_to_format_list(url_map)

1685

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

1686

for a_format in formats:

1687

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

1688

else:

1689

unavailable_message = self._html_search_regex(

1690

r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',

1691

video_webpage, 'unavailable message', default=None)

1692

if unavailable_message:

1693

raise ExtractorError(unavailable_message, expected=True)

1694

raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')

1695

1696

# Look for the DASH manifest

1697

if self._downloader.params.get('youtube_include_dash_manifest', True):

1698

dash_mpd_fatal = True

1699

for mpd_url in dash_mpds:

1700

dash_formats = {}

1701

try:

1702

def decrypt_sig(mobj):

1703

s = mobj.group(1)

1704

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

1705

return '/signature/%s' % dec_s

1706

1707

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

1708

1709

for df in self._extract_mpd_formats(

1710

mpd_url, video_id, fatal=dash_mpd_fatal,

1711

formats_dict=self._formats):

1712

# Do not overwrite DASH format found in some previous DASH manifest

1713

if df['format_id'] not in dash_formats:

1714

dash_formats[df['format_id']] = df

1715

# Additional DASH manifests may end up in HTTP Error 403 therefore

1716

# allow them to fail without bug report message if we already have

1717

# some DASH manifest succeeded. This is temporary workaround to reduce

1718

# burst of bug reports until we figure out the reason and whether it

1719

# can be fixed at all.

1720

dash_mpd_fatal = False

1721

except (ExtractorError, KeyError) as e:

1722

self.report_warning(

1723

'Skipping DASH manifest: %r' % e, video_id)

1724

if dash_formats:

1725

# Remove the formats we found through non-DASH, they

1726

# contain less info and it can be wrong, because we use

1727

# fixed values (for example the resolution). See

1728

# https://github.com/rg3/youtube-dl/issues/5774 for an

1729

# example.

1730

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

1731

formats.extend(dash_formats.values())

1732

1733

# Check for malformed aspect ratio

1734

stretched_m = re.search(

1735

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

1736

video_webpage)

1737

if stretched_m:

1738

w = float(stretched_m.group('w'))

1739

h = float(stretched_m.group('h'))

1740

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

1741

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

1746

f['stretched_ratio'] = ratio

1747

1748

self._sort_formats(formats)

1749

1750

self.mark_watched(video_id, video_info)

return {

'id': video_id,

'uploader': video_uploader,

1755

'uploader_id': video_uploader_id,

1756

'uploader_url': video_uploader_url,

1757

'upload_date': upload_date,

1758

'license': video_license,

1759

'creator': video_creator,

1760

'title': video_title,

1761

'alt_title': video_alt_title,

1762

'thumbnail': video_thumbnail,

1763

'description': video_description,

1764

'categories': video_categories,

1765

'tags': video_tags,

1766

'subtitles': video_subtitles,

1767

'automatic_captions': automatic_captions,

1768

'duration': video_duration,

1769

'age_limit': 18 if age_gate else 0,

1770

'annotations': video_annotations,

1771

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

1772

'view_count': view_count,

1773

'like_count': like_count,

1774

'dislike_count': dislike_count,

1775

'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),

1776

'formats': formats,

1777

'is_live': is_live,

1778

'start_time': start_time,

1779

'end_time': end_time,

1780

'series': series,

1781

'season_number': season_number,

1782

'episode_number': episode_number,

}

class YoutubeSharedVideoIE(InfoExtractor):

1787

_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'

1788

IE_NAME = 'youtube:shared'

1789

1790

_TEST = {

1791

'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',

'info_dict': {

'id': 'uPDB5I9wfp8',

'ext': 'webm',

'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',

1796

'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',

1797

'upload_date': '20160219',

1798

'uploader': 'Pocoyo - Português (BR)',

1799

'uploader_id': 'PocoyoBrazil',

1800

},

1801

'add_ie': ['Youtube'],

1802

'params': {

1803

# There are already too many Youtube downloads

1804

'skip_download': True,

},

}

def _real_extract(self, url):

1809

video_id = self._match_id(url)

1810

1811

webpage = self._download_webpage(url, video_id)

1812

1813

real_video_id = self._html_search_meta(

1814

'videoId', webpage, 'YouTube video id', fatal=True)

1815

1816

return self.url_result(real_video_id, YoutubeIE.ie_key())

1817

1818

1819

class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):

1820

IE_DESC = 'YouTube.com playlists'

1821

_VALID_URL = r"""(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

youtube\.com/

(?:

\? (?:.*?[&;])*? (?:p|a|list)=

1829

| p/

1830

)|

1831

youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=

1832

)

1833

(

1834

(?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}

1835

# Top tracks, they can also include dots

|(?:MC)[\w\.]*

)

.*

|

((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})

1841

)"""

1842

_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'

1843

_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'

1844

IE_NAME = 'youtube:playlist'

1845

_TESTS = [{

1846

'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',

1847

'info_dict': {

1848

'title': 'ytdl test PL',

1849

'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',

},

'playlist_count': 3,

}, {

'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',

1854

'info_dict': {

1855

'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',

1856

'title': 'YDL_Empty_List',

1857

},

1858

'playlist_count': 0,

1859

'skip': 'This playlist is private',

1860

}, {

1861

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

1862

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

1863

'info_dict': {

1864

'title': '29C3: Not my department',

1865

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

1866

},

1867

'playlist_count': 95,

1868

}, {

1869

'note': 'issue #673',

1870

'url': 'PLBB231211A4F62143',

1871

'info_dict': {

1872

'title': '[OLD]Team Fortress 2 (Class-based LP)',

1873

'id': 'PLBB231211A4F62143',

1874

},

1875

'playlist_mincount': 26,

1876

}, {

1877

'note': 'Large playlist',

1878

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

1879

'info_dict': {

1880

'title': 'Uploads from Cauchemar',

1881

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

1882

},

1883

'playlist_mincount': 799,

1884

}, {

1885

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

1886

'info_dict': {

1887

'title': 'YDL_safe_search',

1888

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

1889

},

1890

'playlist_count': 2,

1891

'skip': 'This playlist is private',

1892

}, {

1893

'note': 'embedded',

1894

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

1899

}

1900

}, {

1901

'note': 'Embedded SWF player',

1902

'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',

'playlist_count': 4,

'info_dict': {

'title': 'JODA7',

'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',

1907

}

1908

}, {

1909

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

1910

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

1911

'info_dict': {

1912

'title': 'Uploads from Interstellar Movie',

1913

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

1914

},

1915

'playlist_mincount': 21,

1916

}, {

1917

# Playlist URL that does not actually serve a playlist

1918

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

1923

'uploader': 'STREEM',

1924

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

1925

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

1926

'upload_date': '20150526',

1927

'license': 'Standard YouTube License',

1928

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

1929

'categories': ['People & Blogs'],

1930

'tags': list,

1931

'like_count': int,

1932

'dislike_count': int,

1933

},

1934

'params': {

1935

'skip_download': True,

1936

},

1937

'add_ie': [YoutubeIE.ie_key()],

1938

}, {

1939

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

1944

'uploader': 'Backus-Page House Museum',

1945

'uploader_id': 'backuspagemuseum',

1946

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

1947

'upload_date': '20161008',

1948

'license': 'Standard YouTube License',

1949

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

1950

'categories': ['Nonprofits & Activism'],

1951

'tags': list,

1952

'like_count': int,

1953

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

1958

},

1959

}, {

1960

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

1961

'only_matching': True,

1962

}]

1963

1964

def _real_initialize(self):

1965

self._login()

1966

1967

def _extract_mix(self, playlist_id):

1968

# The mixes are generated from a single video

1969

# the id of the playlist is just 'RD' + video_id

1970

ids = []

1971

last_id = playlist_id[-11:]

1972

for n in itertools.count(1):

1973

url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)

1974

webpage = self._download_webpage(

1975

url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))

1976

new_ids = orderedSet(re.findall(

1977

r'''(?xs)data-video-username=".*?".*?

1978

href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),

1979

webpage))

1980

# Fetch new pages until all the videos are repeated, it seems that

1981

# there are always 51 unique videos.

1982

new_ids = [_id for _id in new_ids if _id not in ids]

if not new_ids:

break

ids.extend(new_ids)

last_id = ids[-1]

url_results = self._ids_to_results(ids)

1989

1990

search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)

1991

title_span = (

1992

search_title('playlist-title') or

1993

search_title('title long-title') or

1994

search_title('title'))

1995

title = clean_html(title_span)

1996

1997

return self.playlist_result(url_results, playlist_id, title)

1998

1999

def _extract_playlist(self, playlist_id):

2000

url = self._TEMPLATE_URL % playlist_id

2001

page = self._download_webpage(url, playlist_id)

2002

2003

# the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)

2004

for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):

2005

match = match.strip()

2006

# Check if the playlist exists or is private

2007

mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)

2008

if mobj:

2009

reason = mobj.group('reason')

2010

message = 'This playlist %s' % reason

2011

if 'private' in reason:

2012

message += ', use --username or --netrc to access it'

2013

message += '.'

2014

raise ExtractorError(message, expected=True)

2015

elif re.match(r'[^<]*Invalid parameters[^<]*', match):

2016

raise ExtractorError(

2017

'Invalid parameters. Maybe URL is incorrect.',

2018

expected=True)

2019

elif re.match(r'[^<]*Choose your language[^<]*', match):

2020

continue

2021

else:

2022

self.report_warning('Youtube gives an alert message: ' + match)

2023

2024

playlist_title = self._html_search_regex(

2025

r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',

2026

page, 'title', default=None)

has_videos = True

if not playlist_title:

2031

try:

2032

# Some playlist URLs don't actually serve a playlist (e.g.

2033

# https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)

2034

next(self._entries(page, playlist_id))

2035

except StopIteration:

2036

has_videos = False

2037

2038

return has_videos, self.playlist_result(

2039

self._entries(page, playlist_id), playlist_id, playlist_title)

2040

2041

def _check_download_just_video(self, url, playlist_id):

2042

# Check if it's a video-specific URL

2043

query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

2044

video_id = query_dict.get('v', [None])[0] or self._search_regex(

2045

r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,

2046

'video id', default=None)

2047

if video_id:

2048

if self._downloader.params.get('noplaylist'):

2049

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

2050

return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)

2051

else:

2052

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

2053

return video_id, None

2054

return None, None

2055

2056

def _real_extract(self, url):

2057

# Extract playlist id

2058

mobj = re.match(self._VALID_URL, url)

2059

if mobj is None:

2060

raise ExtractorError('Invalid URL: %s' % url)

2061

playlist_id = mobj.group(1) or mobj.group(2)

2062

2063

video_id, video = self._check_download_just_video(url, playlist_id)

if video:

return video

if playlist_id.startswith(('RD', 'UL', 'PU')):

2068

# Mixes require a custom extraction process

2069

return self._extract_mix(playlist_id)

2070

2071

has_videos, playlist = self._extract_playlist(playlist_id)

2072

if has_videos or not video_id:

2073

return playlist

2074

2075

# Some playlist URLs don't actually serve a playlist (see

2076

# https://github.com/rg3/youtube-dl/issues/10537).

2077

# Fallback to plain video extraction if there is a video id

2078

# along with playlist id.

2079

return self.url_result(video_id, 'Youtube', video_id=video_id)

2080

2081

2082

class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):

2083

IE_DESC = 'YouTube.com channels'

2084

_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'

2085

_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'

2086

_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'

2087

IE_NAME = 'youtube:channel'

2088

_TESTS = [{

2089

'note': 'paginated channel',

2090

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

2091

'playlist_mincount': 91,

2092

'info_dict': {

2093

'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',

2094

'title': 'Uploads from lex will',

2095

}

2096

}, {

2097

'note': 'Age restricted channel',

2098

# from https://www.youtube.com/user/DeusExOfficial

2099

'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',

2100

'playlist_mincount': 64,

2101

'info_dict': {

2102

'id': 'UUs0ifCMCm1icqRbqhUINa0w',

2103

'title': 'Uploads from Deus Ex',

},

}]

@classmethod

def suitable(cls, url):

2109

return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)

2110

else super(YoutubeChannelIE, cls).suitable(url))

2111

2112

def _build_template_url(self, url, channel_id):

2113

return self._TEMPLATE_URL % channel_id

2114

2115

def _real_extract(self, url):

2116

channel_id = self._match_id(url)

2117

2118

url = self._build_template_url(url, channel_id)

2119

2120

# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)

2121

# Workaround by extracting as a playlist if managed to obtain channel playlist URL

2122

# otherwise fallback on channel by page extraction

2123

channel_page = self._download_webpage(

2124

url + '?view=57', channel_id,

2125

'Downloading channel page', fatal=False)

2126

if channel_page is False:

2127

channel_playlist_id = False

2128

else:

2129

channel_playlist_id = self._html_search_meta(

2130

'channelId', channel_page, 'channel id', default=None)

2131

if not channel_playlist_id:

2132

channel_url = self._html_search_meta(

2133

('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),

2134

channel_page, 'channel url', default=None)

2135

if channel_url:

2136

channel_playlist_id = self._search_regex(

2137

r'vnd\.youtube://user/([0-9A-Za-z_-]+)',

2138

channel_url, 'channel id', default=None)

2139

if channel_playlist_id and channel_playlist_id.startswith('UC'):

2140

playlist_id = 'UU' + channel_playlist_id[2:]

2141

return self.url_result(

2142

compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')

2143

2144

channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')

2145

autogenerated = re.search(r'''(?x)

2146

class="[^"]*?(?:

2147

channel-header-autogenerated-label|

2148

yt-channel-title-autogenerated

2149

)[^"]*"''', channel_page) is not None

2150

2151

if autogenerated:

2152

# The videos are contained in a single page

2153

# the ajax pages can't be used, they are empty

2154

entries = [

2155

self.url_result(

2156

video_id, 'Youtube', video_id=video_id,

2157

video_title=video_title)

2158

for video_id, video_title in self.extract_videos_from_page(channel_page)]

2159

return self.playlist_result(entries, channel_id)

2160

2161

try:

2162

next(self._entries(channel_page, channel_id))

2163

except StopIteration:

2164

alert_message = self._html_search_regex(

2165

r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',

2166

channel_page, 'alert', default=None, group='alert')

2167

if alert_message:

2168

raise ExtractorError('Youtube said: %s' % alert_message, expected=True)

2169

2170

return self.playlist_result(self._entries(channel_page, channel_id), channel_id)

2171

2172

2173

class YoutubeUserIE(YoutubeChannelIE):

2174

IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'

2175

2176

_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'

2177

IE_NAME = 'youtube:user'

2178

2179

_TESTS = [{

2180

'url': 'https://www.youtube.com/user/TheLinuxFoundation',

2181

'playlist_mincount': 320,

2182

'info_dict': {

2183

'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',

2184

'title': 'Uploads from The Linux Foundation',

2185

}

2186

}, {

2187

# Only available via https://www.youtube.com/c/12minuteathlete/videos

2188

# but not https://www.youtube.com/user/12minuteathlete/videos

2189

'url': 'https://www.youtube.com/c/12minuteathlete/videos',

2190

'playlist_mincount': 249,

2191

'info_dict': {

2192

'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',

2193

'title': 'Uploads from 12 Minute Athlete',

2194

}

2195

}, {

2196

'url': 'ytuser:phihag',

2197

'only_matching': True,

2198

}, {

2199

'url': 'https://www.youtube.com/c/gametrailers',

2200

'only_matching': True,

2201

}, {

2202

'url': 'https://www.youtube.com/gametrailers',

2203

'only_matching': True,

2204

}, {

2205

# This channel is not available.

2206

'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',

2207

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

2212

# Don't return True if the url can be extracted with other youtube

2213

# extractor, the regex would is too permissive and it would match.

2214

other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)

2215

if any(ie.suitable(url) for ie in other_yt_ies):

2216

return False

2217

else:

2218

return super(YoutubeUserIE, cls).suitable(url)

2219

2220

def _build_template_url(self, url, channel_id):

2221

mobj = re.match(self._VALID_URL, url)

2222

return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))

2223

2224

2225

class YoutubeLiveIE(YoutubeBaseInfoExtractor):

2226

IE_DESC = 'YouTube.com live streams'

2227

_VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'

2228

IE_NAME = 'youtube:live'

2229

2230

_TESTS = [{

2231

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

2236

'uploader': 'The Young Turks',

2237

'uploader_id': 'TheYoungTurks',

2238

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

2239

'upload_date': '20150715',

2240

'license': 'Standard YouTube License',

2241

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

2242

'categories': ['News & Politics'],

2243

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

2244

'like_count': int,

2245

'dislike_count': int,

2246

},

2247

'params': {

2248

'skip_download': True,

2249

},

2250

}, {

2251

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

2252

'only_matching': True,

2253

}, {

2254

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

2255

'only_matching': True,

2256

}, {

2257

'url': 'https://www.youtube.com/TheYoungTurks/live',

2258

'only_matching': True,

2259

}]

2260

2261

def _real_extract(self, url):

2262

mobj = re.match(self._VALID_URL, url)

2263

channel_id = mobj.group('id')

2264

base_url = mobj.group('base_url')

2265

webpage = self._download_webpage(url, channel_id, fatal=False)

2266

if webpage:

2267

page_type = self._og_search_property(

2268

'type', webpage, 'page type', default=None)

2269

video_id = self._html_search_meta(

2270

'videoId', webpage, 'video id', default=None)

2271

if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):

2272

return self.url_result(video_id, YoutubeIE.ie_key())

2273

return self.url_result(base_url)

2274

2275

2276

class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):

2277

IE_DESC = 'YouTube.com user/channel playlists'

2278

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'

2279

IE_NAME = 'youtube:playlists'

2280

2281

_TESTS = [{

2282

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

2283

'playlist_mincount': 4,

2284

'info_dict': {

2285

'id': 'ThirstForScience',

2286

'title': 'Thirst for Science',

2287

},

2288

}, {

2289

# with "Load more" button

2290

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

2291

'playlist_mincount': 70,

2292

'info_dict': {

2293

'id': 'igorkle1',

2294

'title': 'Игорь Клейнер',

2295

},

2296

}, {

2297

'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',

2298

'playlist_mincount': 17,

2299

'info_dict': {

2300

'id': 'UCiU1dHvZObB2iP6xkJ__Icw',

2301

'title': 'Chem Player',

},

}]

class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):

2307

IE_DESC = 'YouTube.com searches'

2308

# there doesn't appear to be a real limit, for example if you search for

2309

# 'python' you get more than 8.000.000 results

2310

_MAX_RESULTS = float('inf')

2311

IE_NAME = 'youtube:search'

2312

_SEARCH_KEY = 'ytsearch'

2313

_EXTRA_QUERY_ARGS = {}

2314

_TESTS = []

2315

2316

def _get_n_results(self, query, n):

2317

"""Get a specified number of results for a query"""

videos = []

limit = n

for pagenum in itertools.count(1):

2323

url_query = {

2324

'search_query': query.encode('utf-8'),

'page': pagenum,

'spf': 'navigate',

}

url_query.update(self._EXTRA_QUERY_ARGS)

2329

result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)

2330

data = self._download_json(

2331

result_url, video_id='query "%s"' % query,

2332

note='Downloading page %s' % pagenum,

2333

errnote='Unable to download API page')

2334

html_content = data[1]['body']['content']

2335

2336

if 'class="search-message' in html_content:

2337

raise ExtractorError(

2338

'[youtube] No video results', expected=True)

2339

2340

new_videos = self._ids_to_results(orderedSet(re.findall(

2341

r'href="/watch\?v=(.{11})', html_content)))

2342

videos += new_videos

2343

if not new_videos or len(videos) > limit:

break

if len(videos) > n:

videos = videos[:n]

return self.playlist_result(videos, query)

2349

2350

2351

class YoutubeSearchDateIE(YoutubeSearchIE):

2352

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

2353

_SEARCH_KEY = 'ytsearchdate'

2354

IE_DESC = 'YouTube.com searches, newest videos first'

2355

_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}

2356

2357

2358

class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):

2359

IE_DESC = 'YouTube.com search URLs'

2360

IE_NAME = 'youtube:search_url'

2361

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'

2362

_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'

2363

_TESTS = [{

2364

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

2365

'playlist_mincount': 5,

2366

'info_dict': {

2367

'title': 'youtube-dl test video',

2368

}

2369

}, {

2370

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

2371

'only_matching': True,

2372

}]

2373

2374

def _real_extract(self, url):

2375

mobj = re.match(self._VALID_URL, url)

2376

query = compat_urllib_parse_unquote_plus(mobj.group('query'))

2377

webpage = self._download_webpage(url, query)

2378

return self.playlist_result(self._process_page(webpage), playlist_title=query)

2379

2380

2381

class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):

2382

IE_DESC = 'YouTube.com (multi-season) shows'

2383

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

2384

IE_NAME = 'youtube:show'

2385

_TESTS = [{

2386

'url': 'https://www.youtube.com/show/airdisasters',

2387

'playlist_mincount': 5,

2388

'info_dict': {

2389

'id': 'airdisasters',

2390

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

2395

playlist_id = self._match_id(url)

2396

return super(YoutubeShowIE, self)._real_extract(

2397

'https://www.youtube.com/show/%s/playlists' % playlist_id)

2398

2399

2400

class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):

2401

"""

2402

Base class for feed extractors

2403

Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.

2404

"""

2405

_LOGIN_REQUIRED = True

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

2410

2411

def _real_initialize(self):

2412

self._login()

2413

2414

def _real_extract(self, url):

2415

page = self._download_webpage(

2416

'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)

2417

2418

# The extraction process is the same as for playlists, but the regex

2419

# for the video ids doesn't contain an index

2420

ids = []

2421

more_widget_html = content_html = page

2422

for page_num in itertools.count(1):

2423

matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)

2424

2425

# 'recommended' feed has infinite 'load more' and each new portion spins

2426

# the same videos in (sometimes) slightly different order, so we'll check

2427

# for unicity and break when portion has no new videos

2428

new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))

if not new_ids:

break

ids.extend(new_ids)

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

more = self._download_json(

2439

'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,

2440

'Downloading page #%s' % page_num,

2441

transform_source=uppercase_escape)

2442

content_html = more['content_html']

2443

more_widget_html = more['load_more_widget_html']

2444

2445

return self.playlist_result(

2446

self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)

2447

2448

2449

class YoutubeWatchLaterIE(YoutubePlaylistIE):

2450

IE_NAME = 'youtube:watchlater'

2451

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

2452

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'

2453

2454

_TESTS = [{

2455

'url': 'https://www.youtube.com/playlist?list=WL',

2456

'only_matching': True,

2457

}, {

2458

'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',

2459

'only_matching': True,

2460

}]

2461

2462

def _real_extract(self, url):

2463

_, video = self._check_download_just_video(url, 'WL')

2464

if video:

2465

return video

2466

_, playlist = self._extract_playlist('WL')

return playlist

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

2471

IE_NAME = 'youtube:favorites'

2472

IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'

2473

_VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'

2474

_LOGIN_REQUIRED = True

2475

2476

def _real_extract(self, url):

2477

webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')

2478

playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')

2479

return self.url_result(playlist_id, 'YoutubePlaylist')

2480

2481

2482

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

2483

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

2484

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'

2485

_FEED_NAME = 'recommended'

2486

_PLAYLIST_TITLE = 'Youtube Recommended videos'

2487

2488

2489

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

2490

IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'

2491

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'

2492

_FEED_NAME = 'subscriptions'

2493

_PLAYLIST_TITLE = 'Youtube Subscriptions'

2494

2495

2496

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

2497

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

2498

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'

2499

_FEED_NAME = 'history'

2500

_PLAYLIST_TITLE = 'Youtube History'

2501

2502

2503

class YoutubeTruncatedURLIE(InfoExtractor):

2504

IE_NAME = 'youtube:truncated_url'

2505

IE_DESC = False # Do not list

2506

_VALID_URL = r'''(?x)

2507

(?:https?://)?

2508

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

2509

(?:watch\?(?:

2510

feature=[a-z_]+|

2511

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

2524

'only_matching': True,

2525

}, {

2526

'url': 'https://www.youtube.com/watch?',

2527

'only_matching': True,

2528

}, {

2529

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

2530

'only_matching': True,

2531

}, {

2532

'url': 'https://www.youtube.com/watch?feature=foo',

2533

'only_matching': True,

2534

}, {

2535

'url': 'https://www.youtube.com/watch?hl=en-GB',

2536

'only_matching': True,

2537

}, {

2538

'url': 'https://www.youtube.com/watch?t=2372',

2539

'only_matching': True,

2540

}]

2541

2542

def _real_extract(self, url):

2543

raise ExtractorError(

2544

'Did you forget to quote the URL? Remember that & is a meta '

2545

'character in most shells, so you want to put the URL in quotes, '

2546

'like youtube-dl '

2547

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

2548

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

2553

IE_NAME = 'youtube:truncated_id'

2554

IE_DESC = False # Do not list

2555

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

2556

2557

_TESTS = [{

2558

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

2559

'only_matching': True,

2560

}]

2561

2562

def _real_extract(self, url):

2563

video_id = self._match_id(url)

2564

raise ExtractorError(

2565

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

2566

expected=True)