jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_parse_qs,
	20	compat_urllib_parse_unquote,
	21	compat_urllib_parse_unquote_plus,
	22	compat_urllib_parse_urlencode,
	23	compat_urllib_parse_urlparse,
	24	compat_urlparse,
	25	compat_str,
	26	)
	27	from ..utils import (
	28	clean_html,
	29	error_to_compat_str,
	30	ExtractorError,
	31	float_or_none,
	32	get_element_by_attribute,
	33	get_element_by_id,
	34	int_or_none,
	35	mimetype2ext,
	36	orderedSet,
	37	parse_duration,
	38	remove_quotes,
	39	remove_start,
	40	sanitized_Request,
	41	smuggle_url,
	42	str_to_int,
	43	try_get,
	44	unescapeHTML,
	45	unified_strdate,
	46	unsmuggle_url,
	47	uppercase_escape,
	48	urlencode_postdata,
	49	ISO3166Utils,
	50	)
	51
	52
	53	class YoutubeBaseInfoExtractor(InfoExtractor):
	54	"""Provide base functions for Youtube extractors"""
	55	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	56	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	57	_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
	58	_NETRC_MACHINE = 'youtube'
	59	# If True it will raise an error if no login info is provided
	60	_LOGIN_REQUIRED = False
	61
	62	def _set_language(self):
	63	self._set_cookie(
	64	'.youtube.com', 'PREF', 'f1=50000000&hl=en',
	65	# YouTube sets the expire time to about two months
	66	expire_time=time.time() + 2 * 30 * 24 * 3600)
	67
	68	def _ids_to_results(self, ids):
	69	return [
	70	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	71	for vid_id in ids]
	72
	73	def _login(self):
	74	"""
	75	Attempt to log in to YouTube.
	76	True is returned if successful or skipped.
	77	False is returned if login failed.
	78
	79	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	80	"""
	81	(username, password) = self._get_login_info()
	82	# No authentication to be performed
	83	if username is None:
	84	if self._LOGIN_REQUIRED:
	85	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	86	return True
	87
	88	login_page = self._download_webpage(
	89	self._LOGIN_URL, None,
	90	note='Downloading login page',
	91	errnote='unable to fetch login page', fatal=False)
	92	if login_page is False:
	93	return
	94
	95	login_form = self._hidden_inputs(login_page)
	96
	97	login_form.update({
	98	'checkConnection': 'youtube',
	99	'Email': username,
	100	'Passwd': password,
	101	})
	102
	103	login_results = self._download_webpage(
	104	self._PASSWORD_CHALLENGE_URL, None,
	105	note='Logging in', errnote='unable to log in', fatal=False,
	106	data=urlencode_postdata(login_form))
	107	if login_results is False:
	108	return False
	109
	110	error_msg = self._html_search_regex(
	111	r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
	112	login_results, 'error message', default=None)
	113	if error_msg:
	114	raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
	115
	116	if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
	117	raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
	118
	119	# Two-Factor
	120	# TODO add SMS and phone call support - these require making a request and then prompting the user
	121
	122	if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
	123	tfa_code = self._get_tfa_info('2-step verification code')
	124
	125	if not tfa_code:
	126	self._downloader.report_warning(
	127	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	128	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	129	return False
	130
	131	tfa_code = remove_start(tfa_code, 'G-')
	132
	133	tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
	134
	135	tfa_form_strs.update({
	136	'Pin': tfa_code,
	137	'TrustDevice': 'on',
	138	})
	139
	140	tfa_data = urlencode_postdata(tfa_form_strs)
	141
	142	tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
	143	tfa_results = self._download_webpage(
	144	tfa_req, None,
	145	note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
	146
	147	if tfa_results is False:
	148	return False
	149
	150	if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
	151	self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
	152	return False
	153	if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
	154	self._downloader.report_warning('unable to log in - did the page structure change?')
	155	return False
	156	if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
	157	self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
	158	return False
	159
	160	if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
	161	self._downloader.report_warning('unable to log in: bad username or password')
	162	return False
	163	return True
	164
	165	def _real_initialize(self):
	166	if self._downloader is None:
	167	return
	168	self._set_language()
	169	if not self._login():
	170	return
	171
	172
	173	class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
	174	# Extract entries from page with "Load more" button
	175	def _entries(self, page, playlist_id):
	176	more_widget_html = content_html = page
	177	for page_num in itertools.count(1):
	178	for entry in self._process_page(content_html):
	179	yield entry
	180
	181	mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
	182	if not mobj:
	183	break
	184
	185	more = self._download_json(
	186	'https://youtube.com/%s' % mobj.group('more'), playlist_id,
	187	'Downloading page #%s' % page_num,
	188	transform_source=uppercase_escape)
	189	content_html = more['content_html']
	190	if not content_html.strip():
	191	# Some webpages show a "Load more" button but they don't
	192	# have more videos
	193	break
	194	more_widget_html = more['load_more_widget_html']
	195
	196
	197	class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	198	def _process_page(self, content):
	199	for video_id, video_title in self.extract_videos_from_page(content):
	200	yield self.url_result(video_id, 'Youtube', video_id, video_title)
	201
	202	def extract_videos_from_page(self, page):
	203	ids_in_page = []
	204	titles_in_page = []
	205	for mobj in re.finditer(self._VIDEO_RE, page):
	206	# The link with index 0 is not the first video of the playlist (not sure if still actual)
	207	if 'index' in mobj.groupdict() and mobj.group('id') == '0':
	208	continue
	209	video_id = mobj.group('id')
	210	video_title = unescapeHTML(mobj.group('title'))
	211	if video_title:
	212	video_title = video_title.strip()
	213	try:
	214	idx = ids_in_page.index(video_id)
	215	if video_title and not titles_in_page[idx]:
	216	titles_in_page[idx] = video_title
	217	except ValueError:
	218	ids_in_page.append(video_id)
	219	titles_in_page.append(video_title)
	220	return zip(ids_in_page, titles_in_page)
	221
	222
	223	class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	224	def _process_page(self, content):
	225	for playlist_id in orderedSet(re.findall(
	226	r'<h3[^>]+class="[^"]yt-lockup-title[^"]"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
	227	content)):
	228	yield self.url_result(
	229	'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
	230
	231	def _real_extract(self, url):
	232	playlist_id = self._match_id(url)
	233	webpage = self._download_webpage(url, playlist_id)
	234	title = self._og_search_title(webpage, fatal=False)
	235	return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
	236
	237
	238	class YoutubeIE(YoutubeBaseInfoExtractor):
	239	IE_DESC = 'YouTube.com'
	240	_VALID_URL = r"""(?x)^
	241	(
	242	(?:https?://\|//) # http(s):// or protocol-independent URL
	243	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/\|
	244	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	245	(?:www\.)?pwnyoutube\.com/\|
	246	(?:www\.)?yourepeat\.com/\|
	247	tube\.majestyc\.net/\|
	248	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	249	(?:.*?\#/)? # handle anchor (#/) redirect urls
	250	(?: # the various things that can precede the ID:
	251	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	252	\|(?: # or the v= param in all its forms
	253	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	254	(?:\?\|\#!?) # the params delimiter ? or # or #!
	255	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	256	v=
	257	)
	258	))
	259	\|(?:
	260	youtu\.be\| # just youtu.be/xxxx
	261	vid\.plus\| # or vid.plus/xxxx
	262	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	263	)/
	264	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	265	)
	266	)? # all until now is optional -> you can pass the naked ID
	267	([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
	268	(?!.*?\blist=) # combined list/video URLs are handled by the playlist IE
	269	(?(1).+)? # if we found the ID, everything can follow
	270	$"""
	271	_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
	272	_formats = {
	273	'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	274	'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
	275	'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
	276	'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
	277	'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
	278	'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	279	'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	280	'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	281	# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
	282	'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
	283	'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	284	'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
	285	'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	286	'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
	287	'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	288	'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
	289	'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	290	'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
	291
	292
	293	# 3D videos
	294	'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	295	'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
	296	'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	297	'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
	298	'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
	299	'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	300	'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
	301
	302	# Apple HTTP Live Streaming
	303	'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	304	'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	305	'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	306	'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
	307	'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	308	'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
	309	'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
	310	'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
	311
	312	# DASH mp4 video
	313	'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	314	'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	315	'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	316	'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	317	'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	318	'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
	319	'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	320	'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	321	'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	322	'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
	323	'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
	324	'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
	325
	326	# Dash mp4 audio
	327	'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
	328	'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
	329	'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
	330	'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
	331	'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
	332	'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'},
	333	'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'},
	334
	335	# Dash webm
	336	'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	337	'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	338	'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	339	'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	340	'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	341	'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
	342	'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
	343	'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	344	'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	345	'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	346	'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	347	'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	348	'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	349	'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	350	'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	351	# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
	352	'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	353	'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	354	'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	355	'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	356	'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
	357	'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
	358
	359	# Dash webm audio
	360	'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
	361	'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
	362
	363	# Dash webm audio with opus inside
	364	'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
	365	'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
	366	'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
	367
	368	# RTMP (unnamed)
	369	'_rtmp': {'protocol': 'rtmp'},
	370	}
	371	_SUBTITLE_FORMATS = ('ttml', 'vtt')
	372
	373	IE_NAME = 'youtube'
	374	_TESTS = [
	375	{
	376	'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
	377	'info_dict': {
	378	'id': 'BaW_jenozKc',
	379	'ext': 'mp4',
	380	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	381	'uploader': 'Philipp Hagemeister',
	382	'uploader_id': 'phihag',
	383	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
	384	'upload_date': '20121002',
	385	'license': 'Standard YouTube License',
	386	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	387	'categories': ['Science & Technology'],
	388	'tags': ['youtube-dl'],
	389	'duration': 10,
	390	'like_count': int,
	391	'dislike_count': int,
	392	'start_time': 1,
	393	'end_time': 9,
	394	}
	395	},
	396	{
	397	'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
	398	'note': 'Test generic use_cipher_signature video (#897)',
	399	'info_dict': {
	400	'id': 'UxxajLWwzqY',
	401	'ext': 'mp4',
	402	'upload_date': '20120506',
	403	'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
	404	'alt_title': 'I Love It (feat. Charli XCX)',
	405	'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
	406	'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
	407	'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
	408	'iconic ep', 'iconic', 'love', 'it'],
	409	'duration': 180,
	410	'uploader': 'Icona Pop',
	411	'uploader_id': 'IconaPop',
	412	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
	413	'license': 'Standard YouTube License',
	414	'creator': 'Icona Pop',
	415	}
	416	},
	417	{
	418	'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
	419	'note': 'Test VEVO video with age protection (#956)',
	420	'info_dict': {
	421	'id': '07FYdnEawAQ',
	422	'ext': 'mp4',
	423	'upload_date': '20130703',
	424	'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
	425	'alt_title': 'Tunnel Vision',
	426	'description': 'md5:64249768eec3bc4276236606ea996373',
	427	'duration': 419,
	428	'uploader': 'justintimberlakeVEVO',
	429	'uploader_id': 'justintimberlakeVEVO',
	430	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
	431	'license': 'Standard YouTube License',
	432	'creator': 'Justin Timberlake',
	433	'age_limit': 18,
	434	}
	435	},
	436	{
	437	'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
	438	'note': 'Embed-only video (#1746)',
	439	'info_dict': {
	440	'id': 'yZIXLfi8CZQ',
	441	'ext': 'mp4',
	442	'upload_date': '20120608',
	443	'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
	444	'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
	445	'uploader': 'SET India',
	446	'uploader_id': 'setindia',
	447	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
	448	'license': 'Standard YouTube License',
	449	'age_limit': 18,
	450	}
	451	},
	452	{
	453	'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
	454	'note': 'Use the first video ID in the URL',
	455	'info_dict': {
	456	'id': 'BaW_jenozKc',
	457	'ext': 'mp4',
	458	'title': 'youtube-dl test video "\'/\\ä↭𝕐',
	459	'uploader': 'Philipp Hagemeister',
	460	'uploader_id': 'phihag',
	461	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
	462	'upload_date': '20121002',
	463	'license': 'Standard YouTube License',
	464	'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
	465	'categories': ['Science & Technology'],
	466	'tags': ['youtube-dl'],
	467	'duration': 10,
	468	'like_count': int,
	469	'dislike_count': int,
	470	},
	471	'params': {
	472	'skip_download': True,
	473	},
	474	},
	475	{
	476	'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
	477	'note': '256k DASH audio (format 141) via DASH manifest',
	478	'info_dict': {
	479	'id': 'a9LDPn-MO4I',
	480	'ext': 'm4a',
	481	'upload_date': '20121002',
	482	'uploader_id': '8KVIDEO',
	483	'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
	484	'description': '',
	485	'uploader': '8KVIDEO',
	486	'license': 'Standard YouTube License',
	487	'title': 'UHDTV TEST 8K VIDEO.mp4'
	488	},
	489	'params': {
	490	'youtube_include_dash_manifest': True,
	491	'format': '141',
	492	},
	493	'skip': 'format 141 not served anymore',
	494	},
	495	# DASH manifest with encrypted signature
	496	{
	497	'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
	498	'info_dict': {
	499	'id': 'IB3lcPjvWLA',
	500	'ext': 'm4a',

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

18

compat_chr,

19

compat_parse_qs,

20

compat_urllib_parse_unquote,

21

compat_urllib_parse_unquote_plus,

22

compat_urllib_parse_urlencode,

23

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

clean_html,

error_to_compat_str,

ExtractorError,

float_or_none,

get_element_by_attribute,

get_element_by_id,

int_or_none,

mimetype2ext,

orderedSet,

parse_duration,

remove_quotes,

remove_start,

sanitized_Request,

smuggle_url,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

uppercase_escape,

urlencode_postdata,

ISO3166Utils,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

54

"""Provide base functions for Youtube extractors"""

55

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

56

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

57

_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'

58

_NETRC_MACHINE = 'youtube'

59

# If True it will raise an error if no login info is provided

60

_LOGIN_REQUIRED = False

61

62

def _set_language(self):

63

self._set_cookie(

64

'.youtube.com', 'PREF', 'f1=50000000&hl=en',

65

# YouTube sets the expire time to about two months

66

expire_time=time.time() + 2 * 30 * 24 * 3600)

67

68

def _ids_to_results(self, ids):

69

return [

70

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

76

True is returned if successful or skipped.

77

False is returned if login failed.

78

79

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

80

"""

81

(username, password) = self._get_login_info()

82

# No authentication to be performed

83

if username is None:

84

if self._LOGIN_REQUIRED:

85

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

86

return True

87

88

login_page = self._download_webpage(

89

self._LOGIN_URL, None,

90

note='Downloading login page',

91

errnote='unable to fetch login page', fatal=False)

92

if login_page is False:

93

return

94

95

login_form = self._hidden_inputs(login_page)

96

97

login_form.update({

98

'checkConnection': 'youtube',

'Email': username,

'Passwd': password,

})

login_results = self._download_webpage(

104

self._PASSWORD_CHALLENGE_URL, None,

105

note='Logging in', errnote='unable to log in', fatal=False,

106

data=urlencode_postdata(login_form))

107

if login_results is False:

108

return False

109

110

error_msg = self._html_search_regex(

111

r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',

112

login_results, 'error message', default=None)

113

if error_msg:

114

raise ExtractorError('Unable to login: %s' % error_msg, expected=True)

115

116

if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:

117

raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)

118

119

# Two-Factor

120

# TODO add SMS and phone call support - these require making a request and then prompting the user

121

122

if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:

123

tfa_code = self._get_tfa_info('2-step verification code')

124

125

if not tfa_code:

126

self._downloader.report_warning(

127

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

128

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

129

return False

130

131

tfa_code = remove_start(tfa_code, 'G-')

132

133

tfa_form_strs = self._form_hidden_inputs('challenge', login_results)

134

135

tfa_form_strs.update({

'Pin': tfa_code,

'TrustDevice': 'on',

})

tfa_data = urlencode_postdata(tfa_form_strs)

141

142

tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)

143

tfa_results = self._download_webpage(

144

tfa_req, None,

145

note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)

146

147

if tfa_results is False:

148

return False

149

150

if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:

151

self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')

152

return False

153

if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:

154

self._downloader.report_warning('unable to log in - did the page structure change?')

155

return False

156

if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:

157

self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')

158

return False

159

160

if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:

161

self._downloader.report_warning('unable to log in: bad username or password')

return False

return True

def _real_initialize(self):

166

if self._downloader is None:

167

return

168

self._set_language()

169

if not self._login():

return

class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):

174

# Extract entries from page with "Load more" button

175

def _entries(self, page, playlist_id):

176

more_widget_html = content_html = page

177

for page_num in itertools.count(1):

178

for entry in self._process_page(content_html):

179

yield entry

180

181

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

more = self._download_json(

186

'https://youtube.com/%s' % mobj.group('more'), playlist_id,

187

'Downloading page #%s' % page_num,

188

transform_source=uppercase_escape)

189

content_html = more['content_html']

190

if not content_html.strip():

191

# Some webpages show a "Load more" button but they don't

192

# have more videos

193

break

194

more_widget_html = more['load_more_widget_html']

195

196

197

class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

198

def _process_page(self, content):

199

for video_id, video_title in self.extract_videos_from_page(content):

200

yield self.url_result(video_id, 'Youtube', video_id, video_title)

201

202

def extract_videos_from_page(self, page):

203

ids_in_page = []

204

titles_in_page = []

205

for mobj in re.finditer(self._VIDEO_RE, page):

206

# The link with index 0 is not the first video of the playlist (not sure if still actual)

207

if 'index' in mobj.groupdict() and mobj.group('id') == '0':

208

continue

209

video_id = mobj.group('id')

210

video_title = unescapeHTML(mobj.group('title'))

211

if video_title:

212

video_title = video_title.strip()

213

try:

214

idx = ids_in_page.index(video_id)

215

if video_title and not titles_in_page[idx]:

216

titles_in_page[idx] = video_title

217

except ValueError:

218

ids_in_page.append(video_id)

219

titles_in_page.append(video_title)

220

return zip(ids_in_page, titles_in_page)

221

222

223

class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

224

def _process_page(self, content):

225

for playlist_id in orderedSet(re.findall(

226

r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',

227

content)):

228

yield self.url_result(

229

'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

230

231

def _real_extract(self, url):

232

playlist_id = self._match_id(url)

233

webpage = self._download_webpage(url, playlist_id)

234

title = self._og_search_title(webpage, fatal=False)

235

return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)

236

237

238

class YoutubeIE(YoutubeBaseInfoExtractor):

239

IE_DESC = 'YouTube.com'

240

_VALID_URL = r"""(?x)^

241

(

242

(?:https?://|//) # http(s):// or protocol-independent URL

243

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|

244

(?:www\.)?deturl\.com/www\.youtube\.com/|

245

(?:www\.)?pwnyoutube\.com/|

246

(?:www\.)?yourepeat\.com/|

247

tube\.majestyc\.net/|

248

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

249

(?:.*?\#/)? # handle anchor (#/) redirect urls

250

(?: # the various things that can precede the ID:

251

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

252

|(?: # or the v= param in all its forms

253

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

254

(?:\?|\#!?) # the params delimiter ? or # or #!

255

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

261

vid\.plus| # or vid.plus/xxxx

262

zwearz\.com/watch| # or zwearz.com/watch/xxxx

263

)/

264

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

265

)

266

)? # all until now is optional -> you can pass the naked ID

267

([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

268

(?!.*?\blist=) # combined list/video URLs are handled by the playlist IE

269

(?(1).+)? # if we found the ID, everything can follow

270

$"""

271

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

272

_formats = {

273

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

274

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

275

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

276

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

277

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

278

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

279

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

280

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

281

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

282

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

283

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

284

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

285

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

286

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

287

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

288

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

289

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

290

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

295

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

296

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

297

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

298

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

299

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

300

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

301

302

# Apple HTTP Live Streaming

303

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

304

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

305

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

306

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

307

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

308

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

309

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

310

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

311

312

# DASH mp4 video

313

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

314

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

315

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

316

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

317

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

318

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)

319

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

320

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

321

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

322

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},

323

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},

324

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},

325

326

# Dash mp4 audio

327

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},

328

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},

329

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},

330

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},

331

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},

332

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'},

333

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'},

334

335

# Dash webm

336

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

337

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

338

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

339

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

340

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

341

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},

342

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},

343

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

344

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

345

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

346

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

347

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

348

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

349

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

350

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

351

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

352

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

353

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

354

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

355

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

356

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},

357

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},

358

359

# Dash webm audio

360

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},

361

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},

362

363

# Dash webm audio with opus inside

364

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},

365

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},

366

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},

367

368

# RTMP (unnamed)

369

'_rtmp': {'protocol': 'rtmp'},

370

}

371

_SUBTITLE_FORMATS = ('ttml', 'vtt')

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

381

'uploader': 'Philipp Hagemeister',

382

'uploader_id': 'phihag',

383

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

384

'upload_date': '20121002',

385

'license': 'Standard YouTube License',

386

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

387

'categories': ['Science & Technology'],

388

'tags': ['youtube-dl'],

389

'duration': 10,

390

'like_count': int,

391

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',

398

'note': 'Test generic use_cipher_signature video (#897)',

'info_dict': {

'id': 'UxxajLWwzqY',

'ext': 'mp4',

'upload_date': '20120506',

403

'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',

404

'alt_title': 'I Love It (feat. Charli XCX)',

405

'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',

406

'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',

407

'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',

408

'iconic ep', 'iconic', 'love', 'it'],

409

'duration': 180,

410

'uploader': 'Icona Pop',

411

'uploader_id': 'IconaPop',

412

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',

413

'license': 'Standard YouTube License',

414

'creator': 'Icona Pop',

}

},

{

'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',

419

'note': 'Test VEVO video with age protection (#956)',

'info_dict': {

'id': '07FYdnEawAQ',

'ext': 'mp4',

'upload_date': '20130703',

424

'title': 'Justin Timberlake - Tunnel Vision (Explicit)',

425

'alt_title': 'Tunnel Vision',

426

'description': 'md5:64249768eec3bc4276236606ea996373',

427

'duration': 419,

428

'uploader': 'justintimberlakeVEVO',

429

'uploader_id': 'justintimberlakeVEVO',

430

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',

431

'license': 'Standard YouTube License',

432

'creator': 'Justin Timberlake',

'age_limit': 18,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

438

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

443

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

444

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

445

'uploader': 'SET India',

446

'uploader_id': 'setindia',

447

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

448

'license': 'Standard YouTube License',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',

454

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

459

'uploader': 'Philipp Hagemeister',

460

'uploader_id': 'phihag',

461

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

462

'upload_date': '20121002',

463

'license': 'Standard YouTube License',

464

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

465

'categories': ['Science & Technology'],

466

'tags': ['youtube-dl'],

467

'duration': 10,

468

'like_count': int,

469

'dislike_count': int,

470

},

471

'params': {

472

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

477

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

482

'uploader_id': '8KVIDEO',

483

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

484

'description': '',

485

'uploader': '8KVIDEO',

486

'license': 'Standard YouTube License',

487

'title': 'UHDTV TEST 8K VIDEO.mp4'

488

},

489

'params': {

490

'youtube_include_dash_manifest': True,

491

'format': '141',

492

},

493

'skip': 'format 141 not served anymore',

494

},

495

# DASH manifest with encrypted signature

496

{

497

'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',

'info_dict': {

'id': 'IB3lcPjvWLA',

'ext': 'm4a',

'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',

502

'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',

503

'duration': 244,

504

'uploader': 'AfrojackVEVO',

505

'uploader_id': 'AfrojackVEVO',

506

'upload_date': '20131011',

507

'license': 'Standard YouTube License',

508

},

509

'params': {

510

'youtube_include_dash_manifest': True,

511

'format': '141/bestaudio[ext=m4a]',

512

},

513

},

514

# JS player signature function name containing $

515

{

516

'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',

'info_dict': {

'id': 'nfWlot6h_JM',

'ext': 'm4a',

'title': 'Taylor Swift - Shake It Off',

521

'alt_title': 'Shake It Off',

522

'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',

523

'duration': 242,

524

'uploader': 'TaylorSwiftVEVO',

525

'uploader_id': 'TaylorSwiftVEVO',

526

'upload_date': '20140818',

527

'license': 'Standard YouTube License',

528

'creator': 'Taylor Swift',

529

},

530

'params': {

531

'youtube_include_dash_manifest': True,

532

'format': '141/bestaudio[ext=m4a]',

},

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

543

'uploader': 'The Amazing Atheist',

544

'uploader_id': 'TheAmazingAtheist',

545

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

546

'license': 'Standard YouTube License',

547

'title': 'Burning Everyone\'s Koran',

548

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

549

}

550

},

551

# Normal age-gate video (No vevo, embed allowed)

552

{

553

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

558

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

559

'duration': 142,

560

'uploader': 'The Witcher',

561

'uploader_id': 'WitcherGame',

562

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

563

'upload_date': '20140605',

564

'license': 'Standard YouTube License',

'age_limit': 18,

},

},

# Age-gate video with encrypted signature

569

{

570

'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',

'info_dict': {

'id': '6kLq3WMV1nU',

'ext': 'mp4',

'title': 'Dedication To My Ex (Miss That) (Lyric Video)',

575

'description': 'md5:33765bb339e1b47e7e72b5490139bb41',

576

'duration': 247,

577

'uploader': 'LloydVEVO',

578

'uploader_id': 'LloydVEVO',

579

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',

580

'upload_date': '20110629',

581

'license': 'Standard YouTube License',

'age_limit': 18,

},

},

# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)

586

{

587

'url': '__2ABJjxzNo',

'info_dict': {

'id': '__2ABJjxzNo',

'ext': 'mp4',

'duration': 266,

'upload_date': '20100430',

593

'uploader_id': 'deadmau5',

594

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',

595

'creator': 'deadmau5',

596

'description': 'md5:12c56784b8032162bb936a5f76d55360',

597

'uploader': 'deadmau5',

598

'license': 'Standard YouTube License',

599

'title': 'Deadmau5 - Some Chords (HD)',

600

'alt_title': 'Some Chords',

601

},

602

'expected_warnings': [

603

'DASH manifest missing',

604

]

605

},

606

# Olympics (https://github.com/rg3/youtube-dl/issues/4431)

607

{

608

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

614

'uploader_id': 'olympic',

615

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

616

'license': 'Standard YouTube License',

617

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

618

'uploader': 'Olympic',

619

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

620

},

621

'params': {

622

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

632

'duration': 85,

633

'upload_date': '20110310',

634

'uploader_id': 'AllenMeow',

635

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

636

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

637

'uploader': '孫艾倫',

638

'license': 'Standard YouTube License',

639

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

640

},

641

},

642

# url_encoded_fmt_stream_map is empty string

643

{

644

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

649

'description': '',

650

'upload_date': '20150404',

651

'uploader_id': 'spbelect',

652

'uploader': 'Наблюдатели Петербурга',

653

},

654

'params': {

655

'skip_download': 'requires avconv',

656

},

657

'skip': 'This live event has ended.',

658

},

659

# Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)

660

{

661

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'mp4',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

666

'description': 'md5:116377fd2963b81ec4ce64b542173306',

667

'duration': 220,

668

'upload_date': '20150625',

669

'uploader_id': 'dorappi2000',

670

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

671

'uploader': 'dorappi2000',

672

'license': 'Standard YouTube License',

673

'formats': 'mincount:32',

674

},

675

},

676

# DASH manifest with segment_list

677

{

678

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

679

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

684

'uploader': 'Airtek',

685

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

686

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

687

'license': 'Standard YouTube License',

688

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

689

},

690

'params': {

691

'youtube_include_dash_manifest': True,

692

'format': '135', # bestvideo

693

},

694

'skip': 'This live event has ended.',

695

},

696

{

697

# Multifeed videos (multiple cameras), URL is for Main Camera

698

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

699

'info_dict': {

700

'id': 'jqWvoWXjCVs',

701

'title': 'teamPGP: Rocket League Noob Stream',

702

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

709

'description': 'md5:dc7872fb300e143831327f1bae3af010',

710

'duration': 7335,

711

'upload_date': '20150721',

712

'uploader': 'Beer Games Beer',

713

'uploader_id': 'beergamesbeer',

714

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

715

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

722

'description': 'md5:dc7872fb300e143831327f1bae3af010',

723

'duration': 7337,

724

'upload_date': '20150721',

725

'uploader': 'Beer Games Beer',

726

'uploader_id': 'beergamesbeer',

727

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

728

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

735

'description': 'md5:dc7872fb300e143831327f1bae3af010',

736

'duration': 7337,

737

'upload_date': '20150721',

738

'uploader': 'Beer Games Beer',

739

'uploader_id': 'beergamesbeer',

740

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

741

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

748

'description': 'md5:dc7872fb300e143831327f1bae3af010',

749

'duration': 7334,

750

'upload_date': '20150721',

751

'uploader': 'Beer Games Beer',

752

'uploader_id': 'beergamesbeer',

753

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

754

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

},

},

{

# Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)

763

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

764

'info_dict': {

765

'id': 'gVfLd0zydlo',

766

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

767

},

768

'playlist_count': 2,

769

'skip': 'Not multifeed anymore',

770

},

771

{

772

'url': 'https://vid.plus/FlRa-iH7PGw',

773

'only_matching': True,

774

},

775

{

776

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

777

'only_matching': True,

778

},

779

{

780

# Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)

781

# Also tests cut-off URL expansion in video description (see

782

# https://github.com/rg3/youtube-dl/issues/1892,

783

# https://github.com/rg3/youtube-dl/issues/8164)

784

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

789

'alt_title': 'Dark Walk',

790

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

791

'duration': 133,

792

'upload_date': '20151119',

793

'uploader_id': 'IronSoulElf',

794

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

795

'uploader': 'IronSoulElf',

796

'license': 'Standard YouTube License',

797

'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',

798

},

799

'params': {

800

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)

805

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

806

'only_matching': True,

807

},

808

{

809

# Video with yt:stretch=17:0

810

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

815

'description': 'md5:ee18a25c350637c8faff806845bddee9',

816

'upload_date': '20151107',

817

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

818

'uploader': 'CH GAMER DROID',

819

},

820

'params': {

821

'skip_download': True,

822

},

823

'skip': 'This video does not exist.',

824

},

825

{

826

# Video licensed under Creative Commons

827

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

832

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

833

'duration': 721,

834

'upload_date': '20150127',

835

'uploader_id': 'BerkmanCenter',

836

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

837

'uploader': 'The Berkman Klein Center for Internet & Society',

838

'license': 'Creative Commons Attribution license (reuse allowed)',

839

},

840

'params': {

841

'skip_download': True,

},

},

{

# Channel-like uploader_url

846

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

851

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

852

'duration': 4060,

853

'upload_date': '20151119',

854

'uploader': 'Bernie 2016',

855

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

856

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

857

'license': 'Creative Commons Attribution license (reuse allowed)',

858

},

859

'params': {

860

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

865

'only_matching': True,

866

},

867

{

868

# YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)

869

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

870

'only_matching': True,

871

},

872

{

873

# Rental video preview

874

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

879

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

880

'upload_date': '20150811',

881

'uploader': 'FlixMatrix',

882

'uploader_id': 'FlixMatrixKaravan',

883

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

884

'license': 'Standard YouTube License',

885

},

886

'params': {

887

'skip_download': True,

},

},

{

# YouTube Red video with episode data

892

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

897

'description': 'md5:8013b7ddea787342608f63a13ddc9492',

898

'duration': 2085,

899

'upload_date': '20170118',

900

'uploader': 'Vsauce',

901

'uploader_id': 'Vsauce',

902

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

903

'license': 'Standard YouTube License',

904

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

910

},

911

'expected_warnings': [

912

'Skipping DASH manifest',

],

},

{

# itag 212

'url': '1t24XAntNCY',

918

'only_matching': True,

}

]

def __init__(self, *args, **kwargs):

923

super(YoutubeIE, self).__init__(*args, **kwargs)

924

self._player_cache = {}

925

926

def report_video_info_webpage_download(self, video_id):

927

"""Report attempt to download video info webpage."""

928

self.to_screen('%s: Downloading video info webpage' % video_id)

929

930

def report_information_extraction(self, video_id):

931

"""Report attempt to extract video information."""

932

self.to_screen('%s: Extracting video information' % video_id)

933

934

def report_unavailable_format(self, video_id, format):

935

"""Report extracted video URL."""

936

self.to_screen('%s: Format %s not available' % (video_id, format))

937

938

def report_rtmp_download(self):

939

"""Indicate the download will use the RTMP protocol."""

940

self.to_screen('RTMP download detected')

941

942

def _signature_cache_id(self, example_sig):

943

""" Return a string representation of a signature """

944

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

945

946

def _extract_signature_function(self, video_id, player_url, example_sig):

947

id_m = re.match(

948

r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',

949

player_url)

950

if not id_m:

951

raise ExtractorError('Cannot identify player %r' % player_url)

952

player_type = id_m.group('ext')

953

player_id = id_m.group('id')

954

955

# Read from filesystem cache

956

func_id = '%s_%s_%s' % (

957

player_type, player_id, self._signature_cache_id(example_sig))

958

assert os.path.basename(func_id) == func_id

959

960

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

961

if cache_spec is not None:

962

return lambda s: ''.join(s[i] for i in cache_spec)

963

964

download_note = (

965

'Downloading player %s' % player_url

966

if self._downloader.params.get('verbose') else

967

'Downloading %s player %s' % (player_type, player_id)

968

)

969

if player_type == 'js':

970

code = self._download_webpage(

971

player_url, video_id,

972

note=download_note,

973

errnote='Download of %s failed' % player_url)

974

res = self._parse_sig_js(code)

975

elif player_type == 'swf':

976

urlh = self._request_webpage(

977

player_url, video_id,

978

note=download_note,

979

errnote='Download of %s failed' % player_url)

980

code = urlh.read()

981

res = self._parse_sig_swf(code)

982

else:

983

assert False, 'Invalid player type %r' % player_type

984

985

test_string = ''.join(map(compat_chr, range(len(example_sig))))

986

cache_res = res(test_string)

987

cache_spec = [ord(c) for c in cache_res]

988

989

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

990

return res

991

992

def _print_sig_code(self, func, example_sig):

993

def gen_sig_code(idxs):

994

def _genslice(start, end, step):

995

starts = '' if start == 0 else str(start)

996

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

997

steps = '' if step == 1 else (':%d' % step)

998

return 's[%s%s%s]' % (starts, ends, steps)

999

1000

step = None

1001

# Quelch pyflakes warnings - start will be set when step is set

1002

start = '(Never used)'

1003

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1008

step = None

1009

continue

1010

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1020

1021

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1022

cache_res = func(test_string)

1023

cache_spec = [ord(c) for c in cache_res]

1024

expr_code = ' + '.join(gen_sig_code(cache_spec))

1025

signature_id_tuple = '(%s)' % (

1026

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1027

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1028

' return %s\n') % (signature_id_tuple, expr_code)

1029

self.to_screen('Extracted signature function:\n' + code)

1030

1031

def _parse_sig_js(self, jscode):

1032

funcname = self._search_regex(

1033

(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1034

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),

1035

jscode, 'Initial JS player signature function name', group='sig')

1036

1037

jsi = JSInterpreter(jscode)

1038

initial_function = jsi.extract_function(funcname)

1039

return lambda s: initial_function([s])

1040

1041

def _parse_sig_swf(self, file_contents):

1042

swfi = SWFInterpreter(file_contents)

1043

TARGET_CLASSNAME = 'SignatureDecipher'

1044

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1045

initial_function = swfi.extract_function(searched_class, 'decipher')

1046

return lambda s: initial_function([s])

1047

1048

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1049

"""Turn the encrypted s field into a working signature"""

1050

1051

if player_url is None:

1052

raise ExtractorError('Cannot decrypt signature without player_url')

1053

1054

if player_url.startswith('//'):

1055

player_url = 'https:' + player_url

1056

elif not re.match(r'https?://', player_url):

1057

player_url = compat_urlparse.urljoin(

1058

'https://www.youtube.com', player_url)

1059

try:

1060

player_id = (player_url, self._signature_cache_id(s))

1061

if player_id not in self._player_cache:

1062

func = self._extract_signature_function(

1063

video_id, player_url, s

1064

)

1065

self._player_cache[player_id] = func

1066

func = self._player_cache[player_id]

1067

if self._downloader.params.get('youtube_print_sig_code'):

1068

self._print_sig_code(func, s)

1069

return func(s)

1070

except Exception as e:

1071

tb = traceback.format_exc()

1072

raise ExtractorError(

1073

'Signature extraction failed: ' + tb, cause=e)

1074

1075

def _get_subtitles(self, video_id, webpage):

1076

try:

1077

subs_doc = self._download_xml(

1078

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1079

video_id, note=False)

1080

except ExtractorError as err:

1081

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1086

lang = track.attrib['lang_code']

1087

if lang in sub_lang_list:

1088

continue

1089

sub_formats = []

1090

for ext in self._SUBTITLE_FORMATS:

1091

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1096

})

1097

sub_formats.append({

1098

'url': 'https://www.youtube.com/api/timedtext?' + params,

1099

'ext': ext,

1100

})

1101

sub_lang_list[lang] = sub_formats

1102

if not sub_lang_list:

1103

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1108

patterns = (

1109

# User data may contain arbitrary character sequences that may affect

1110

# JSON extraction with regex, e.g. when '};' is contained the second

1111

# regex won't capture the whole JSON. Yet working around by trying more

1112

# concrete regex first keeping in mind proper quoted string handling

1113

# to be implemented in future that will replace this workaround (see

1114

# https://github.com/rg3/youtube-dl/issues/7468,

1115

# https://github.com/rg3/youtube-dl/pull/7599)

1116

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1117

r';ytplayer\.config\s*=\s*({.+?});',

1118

)

1119

config = self._search_regex(

1120

patterns, webpage, 'ytplayer.config', default=None)

1121

if config:

1122

return self._parse_json(

1123

uppercase_escape(config), video_id, fatal=False)

1124

1125

def _get_automatic_captions(self, video_id, webpage):

1126

"""We need the webpage for getting the captions url, pass it as an

1127

argument to speed up the process."""

1128

self.to_screen('%s: Looking for automatic captions' % video_id)

1129

player_config = self._get_ytplayer_config(video_id, webpage)

1130

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1131

if not player_config:

1132

self._downloader.report_warning(err_msg)

1133

return {}

1134

try:

1135

args = player_config['args']

1136

caption_url = args.get('ttsurl')

1137

if caption_url:

1138

timestamp = args['timestamp']

1139

# We get the available subtitles

1140

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1146

caption_list = self._download_xml(list_url, video_id)

1147

original_lang_node = caption_list.find('track')

1148

if original_lang_node is None:

1149

self._downloader.report_warning('Video doesn\'t have automatic captions')

1150

return {}

1151

original_lang = original_lang_node.attrib['lang_code']

1152

caption_kind = original_lang_node.attrib.get('kind', '')

1153

1154

sub_lang_list = {}

1155

for lang_node in caption_list.findall('target'):

1156

sub_lang = lang_node.attrib['lang_code']

1157

sub_formats = []

1158

for ext in self._SUBTITLE_FORMATS:

1159

params = compat_urllib_parse_urlencode({

1160

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1165

})

1166

sub_formats.append({

1167

'url': caption_url + '&' + params,

1168

'ext': ext,

1169

})

1170

sub_lang_list[sub_lang] = sub_formats

1171

return sub_lang_list

1172

1173

# Some videos don't provide ttsurl but rather caption_tracks and

1174

# caption_translation_languages (e.g. 20LmZk1hakA)

1175

caption_tracks = args['caption_tracks']

1176

caption_translation_languages = args['caption_translation_languages']

1177

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1178

parsed_caption_url = compat_urllib_parse_urlparse(caption_url)

1179

caption_qs = compat_parse_qs(parsed_caption_url.query)

1180

1181

sub_lang_list = {}

1182

for lang in caption_translation_languages.split(','):

1183

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1184

sub_lang = lang_qs.get('lc', [None])[0]

if not sub_lang:

continue

sub_formats = []

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(

1194

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

sub_lang_list[sub_lang] = sub_formats

1200

return sub_lang_list

1201

# An extractor error can be raise by the download process if there are

1202

# no automatic captions but there are subtitles

1203

except (KeyError, ExtractorError):

1204

self._downloader.report_warning(err_msg)

1205

return {}

1206

1207

def _mark_watched(self, video_id, video_info):

1208

playback_url = video_info.get('videostats_playback_base_url', [None])[0]

1209

if not playback_url:

1210

return

1211

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1212

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1213

1214

# cpn generation algorithm is reverse engineered from base.js.

1215

# In fact it works even with dummy cpn.

1216

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1217

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1224

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1225

1226

self._download_webpage(

1227

playback_url, video_id, 'Marking watched',

1228

'Unable to mark watched', fatal=False)

1229

1230

@classmethod

1231

def extract_id(cls, url):

1232

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1233

if mobj is None:

1234

raise ExtractorError('Invalid URL: %s' % url)

1235

video_id = mobj.group(2)

1236

return video_id

1237

1238

def _extract_from_m3u8(self, manifest_url, video_id):

1239

url_map = {}

1240

1241

def _get_urls(_manifest):

1242

lines = _manifest.split('\n')

1243

urls = filter(lambda l: l and not l.startswith('#'),

1244

lines)

1245

return urls

1246

manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')

1247

formats_urls = _get_urls(manifest)

1248

for format_url in formats_urls:

1249

itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')

1250

url_map[itag] = format_url

1251

return url_map

1252

1253

def _extract_annotations(self, video_id):

1254

url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id

1255

return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')

1256

1257

def _real_extract(self, url):

1258

url, smuggled_data = unsmuggle_url(url, {})

1259

1260

proto = (

1261

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1267

for component in [parsed_url.fragment, parsed_url.query]:

1268

query = compat_parse_qs(component)

1269

if start_time is None and 't' in query:

1270

start_time = parse_duration(query['t'][0])

1271

if start_time is None and 'start' in query:

1272

start_time = parse_duration(query['start'][0])

1273

if end_time is None and 'end' in query:

1274

end_time = parse_duration(query['end'][0])

1275

1276

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1277

mobj = re.search(self._NEXT_URL_RE, url)

1278

if mobj:

1279

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1280

video_id = self.extract_id(url)

1281

1282

# Get video webpage

1283

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1284

video_webpage = self._download_webpage(url, video_id)

1285

1286

# Attempt to extract SWF player URL

1287

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1288

if mobj is not None:

1289

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1296

dash_mpd = video_info.get('dashmpd')

1297

if dash_mpd and dash_mpd[0] not in dash_mpds:

1298

dash_mpds.append(dash_mpd[0])

# Get video info

embed_webpage = None

is_live = None

if re.search(r'player-age-gate-content">', video_webpage) is not None:

1304

age_gate = True

1305

# We simulate the access to the video from www.youtube.com/v/{video_id}

1306

# this can be viewed without login into Youtube

1307

url = proto + '://www.youtube.com/embed/%s' % video_id

1308

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1309

data = compat_urllib_parse_urlencode({

1310

'video_id': video_id,

1311

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1312

'sts': self._search_regex(

1313

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1314

})

1315

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1316

video_info_webpage = self._download_webpage(

1317

video_info_url, video_id,

1318

note='Refetching age-gated info webpage',

1319

errnote='unable to download video info webpage')

1320

video_info = compat_parse_qs(video_info_webpage)

1321

add_dash_mpd(video_info)

else:

age_gate = False

video_info = None

# Try looking directly into the video webpage

1326

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1327

if ytplayer_config:

1328

args = ytplayer_config['args']

1329

if args.get('url_encoded_fmt_stream_map'):

1330

# Convert to the same format returned by compat_parse_qs

1331

video_info = dict((k, [v]) for k, v in args.items())

1332

add_dash_mpd(video_info)

1333

# Rental video is not rented but preview is available (e.g.

1334

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1335

# https://github.com/rg3/youtube-dl/issues/10532)

1336

if not video_info and args.get('ypc_vid'):

1337

return self.url_result(

1338

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1339

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1340

is_live = True

1341

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1342

# We also try looking in get_video_info since it may contain different dashmpd

1343

# URL that points to a DASH manifest with possibly different itag set (some itags

1344

# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH

1345

# manifest pointed by get_video_info's dashmpd).

1346

# The general idea is to take a union of itags of both DASH manifests (for example

1347

# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)

1348

self.report_video_info_webpage_download(video_id)

1349

for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:

1350

video_info_url = (

1351

'%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'

1352

% (proto, video_id, el_type))

1353

video_info_webpage = self._download_webpage(

1354

video_info_url,

1355

video_id, note=False,

1356

errnote='unable to download video info webpage')

1357

get_video_info = compat_parse_qs(video_info_webpage)

1358

if get_video_info.get('use_cipher_signature') != ['True']:

1359

add_dash_mpd(get_video_info)

1360

if not video_info:

1361

video_info = get_video_info

1362

if 'token' in get_video_info:

1363

# Different get_video_info requests may report different results, e.g.

1364

# some may report video unavailability, but some may serve it without

1365

# any complaint (see https://github.com/rg3/youtube-dl/issues/7362,

1366

# the original webpage as well as el=info and el=embedded get_video_info

1367

# requests report video unavailability due to geo restriction while

1368

# el=detailpage succeeds and returns valid data). This is probably

1369

# due to YouTube measures against IP ranges of hosting providers.

1370

# Working around by preferring the first succeeded video_info containing

1371

# the token if no such video_info yet was found.

1372

if 'token' not in video_info:

1373

video_info = get_video_info

1374

break

1375

if 'token' not in video_info:

1376

if 'reason' in video_info:

1377

if 'The uploader has not made this video available in your country.' in video_info['reason']:

1378

regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)

1379

if regions_allowed:

1380

raise ExtractorError('YouTube said: This video is available in %s only' % (

1381

', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),

1382

expected=True)

1383

raise ExtractorError(

1384

'YouTube said: %s' % video_info['reason'][0],

1385

expected=True, video_id=video_id)

1386

else:

1387

raise ExtractorError(

1388

'"token" parameter not in video info for unknown reason',

video_id=video_id)

# title

if 'title' in video_info:

1393

video_title = video_info['title'][0]

1394

else:

1395

self._downloader.report_warning('Unable to extract video title')

video_title = '_'

# description

video_description = get_element_by_id("eow-description", video_webpage)

1400

if video_description:

1401

video_description = re.sub(r'''(?x)

1402

<a\s+

1403

(?:[a-zA-Z-]+="[^"]*"\s+)*?

1404

(?:title|href)="([^"]+)"\s+

1405

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', r'\1', video_description)

1410

video_description = clean_html(video_description)

1411

else:

1412

fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)

1413

if fd_mobj:

1414

video_description = unescapeHTML(fd_mobj.group(1))

1415

else:

1416

video_description = ''

1417

1418

if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):

1419

if not self._downloader.params.get('noplaylist'):

1420

entries = []

1421

feed_ids = []

1422

multifeed_metadata_list = video_info['multifeed_metadata_list'][0]

1423

for feed in multifeed_metadata_list.split(','):

1424

# Unquote should take place before split on comma (,) since textual

1425

# fields may contain comma as well (see

1426

# https://github.com/rg3/youtube-dl/issues/8536)

1427

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

1428

entries.append({

1429

'_type': 'url_transparent',

1430

'ie_key': 'Youtube',

1431

'url': smuggle_url(

1432

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

1433

{'force_singlefeed': True}),

1434

'title': '%s (%s)' % (video_title, feed_data['title'][0]),

1435

})

1436

feed_ids.append(feed_data['id'][0])

1437

self.to_screen(

1438

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

1439

% (', '.join(feed_ids), video_id))

1440

return self.playlist_result(entries, video_id, video_title, video_description)

1441

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

1442

1443

if 'view_count' in video_info:

1444

view_count = int(video_info['view_count'][0])

else:

view_count = None

# Check for "rental" videos

1449

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

1450

raise ExtractorError('"rental" videos not supported')

1451

1452

# Start extracting information

1453

self.report_information_extraction(video_id)

1454

1455

# uploader

1456

if 'author' not in video_info:

1457

raise ExtractorError('Unable to extract uploader name')

1458

video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0])

1459

1460

# uploader_id

1461

video_uploader_id = None

1462

video_uploader_url = None

1463

mobj = re.search(

1464

r'<link itemprop="url" href="(?P<uploader_url>https?://www.youtube.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

1465

video_webpage)

1466

if mobj is not None:

1467

video_uploader_id = mobj.group('uploader_id')

1468

video_uploader_url = mobj.group('uploader_url')

1469

else:

1470

self._downloader.report_warning('unable to extract uploader nickname')

1471

1472

# thumbnail image

1473

# We try first to get a high quality image:

1474

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

1475

video_webpage, re.DOTALL)

1476

if m_thumb is not None:

1477

video_thumbnail = m_thumb.group(1)

1478

elif 'thumbnail_url' not in video_info:

1479

self._downloader.report_warning('unable to extract video thumbnail')

1480

video_thumbnail = None

1481

else: # don't panic if we can't find it

1482

video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])

1483

1484

# upload date

1485

upload_date = self._html_search_meta(

1486

'datePublished', video_webpage, 'upload date', default=None)

1487

if not upload_date:

1488

upload_date = self._search_regex(

1489

[r'(?s)id="eow-date.*?>(.*?)</span>',

1490

r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],

1491

video_webpage, 'upload date', default=None)

1492

if upload_date:

1493

upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())

1494

upload_date = unified_strdate(upload_date)

1495

1496

video_license = self._html_search_regex(

1497

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

1498

video_webpage, 'license', default=None)

1499

1500

m_music = re.search(

1501

r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:$.+?$)?</li',

1502

video_webpage)

1503

if m_music:

1504

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

1505

video_creator = clean_html(m_music.group('creator'))

1506

else:

1507

video_alt_title = video_creator = None

1508

1509

m_episode = re.search(

1510

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

1511

video_webpage)

1512

if m_episode:

1513

series = m_episode.group('series')

1514

season_number = int(m_episode.group('season'))

1515

episode_number = int(m_episode.group('episode'))

1516

else:

1517

series = season_number = episode_number = None

1518

1519

m_cat_container = self._search_regex(

1520

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

1521

video_webpage, 'categories', default=None)

1522

if m_cat_container:

1523

category = self._html_search_regex(

1524

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

1525

default=None)

1526

video_categories = None if category is None else [category]

1527

else:

1528

video_categories = None

1529

1530

video_tags = [

1531

unescapeHTML(m.group('content'))

1532

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

1533

1534

def _extract_count(count_name):

1535

return str_to_int(self._search_regex(

1536

r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'

1537

% re.escape(count_name),

1538

video_webpage, count_name, default=None))

1539

1540

like_count = _extract_count('like')

1541

dislike_count = _extract_count('dislike')

1542

1543

# subtitles

1544

video_subtitles = self.extract_subtitles(video_id, video_webpage)

1545

automatic_captions = self.extract_automatic_captions(video_id, video_webpage)

1546

1547

video_duration = try_get(

1548

video_info, lambda x: int_or_none(x['length_seconds'][0]))

1549

if not video_duration:

1550

video_duration = parse_duration(self._html_search_meta(

1551

'duration', video_webpage, 'video duration'))

1552

1553

# annotations

1554

video_annotations = None

1555

if self._downloader.params.get('writeannotations', False):

1556

video_annotations = self._extract_annotations(video_id)

1557

1558

def _map_to_format_list(urlmap):

1559

formats = []

1560

for itag, video_real_url in urlmap.items():

1561

dct = {

1562

'format_id': itag,

1563

'url': video_real_url,

1564

'player_url': player_url,

1565

}

1566

if itag in self._formats:

1567

dct.update(self._formats[itag])

formats.append(dct)

return formats

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

1572

self.report_rtmp_download()

1573

formats = [{

1574

'format_id': '_rtmp',

1575

'protocol': 'rtmp',

1576

'url': video_info['conn'][0],

1577

'player_url': player_url,

1578

}]

1579

elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:

1580

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

1581

if 'rtmpe%3Dyes' in encoded_url_map:

1582

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)

1583

formats_spec = {}

1584

fmt_list = video_info.get('fmt_list', [''])[0]

1585

if fmt_list:

1586

for fmt in fmt_list.split(','):

1587

spec = fmt.split('/')

1588

if len(spec) > 1:

1589

width_height = spec[1].split('x')

1590

if len(width_height) == 2:

1591

formats_spec[spec[0]] = {

1592

'resolution': spec[1],

1593

'width': int_or_none(width_height[0]),

1594

'height': int_or_none(width_height[1]),

1595

}

1596

formats = []

1597

for url_data_str in encoded_url_map.split(','):

1598

url_data = compat_parse_qs(url_data_str)

1599

if 'itag' not in url_data or 'url' not in url_data:

1600

continue

1601

format_id = url_data['itag'][0]

1602

url = url_data['url'][0]

1603

1604

if 'sig' in url_data:

1605

url += '&signature=' + url_data['sig'][0]

1606

elif 's' in url_data:

1607

encrypted_sig = url_data['s'][0]

1608

ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'

1609

1610

jsplayer_url_json = self._search_regex(

1611

ASSETS_RE,

1612

embed_webpage if age_gate else video_webpage,

1613

'JS player URL (1)', default=None)

1614

if not jsplayer_url_json and not age_gate:

1615

# We need the embed website after all

1616

if embed_webpage is None:

1617

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

1618

embed_webpage = self._download_webpage(

1619

embed_url, video_id, 'Downloading embed webpage')

1620

jsplayer_url_json = self._search_regex(

1621

ASSETS_RE, embed_webpage, 'JS player URL')

1622

1623

player_url = json.loads(jsplayer_url_json)

1624

if player_url is None:

1625

player_url_json = self._search_regex(

1626

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

1627

video_webpage, 'age gate player URL')

1628

player_url = json.loads(player_url_json)

1629

1630

if self._downloader.params.get('verbose'):

1631

if player_url is None:

1632

player_version = 'unknown'

1633

player_desc = 'unknown'

1634

else:

1635

if player_url.endswith('swf'):

1636

player_version = self._search_regex(

1637

r'-(.+?)(?:/watch_as3)?\.swf$', player_url,

1638

'flash player', fatal=False)

1639

player_desc = 'flash player %s' % player_version

1640

else:

1641

player_version = self._search_regex(

1642

[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],

1643

player_url,

1644

'html5 player', fatal=False)

1645

player_desc = 'html5 player %s' % player_version

1646

1647

parts_sizes = self._signature_cache_id(encrypted_sig)

1648

self.to_screen('{%s} signature length %s, %s' %

1649

(format_id, parts_sizes, player_desc))

1650

1651

signature = self._decrypt_signature(

1652

encrypted_sig, video_id, player_url, age_gate)

1653

url += '&signature=' + signature

1654

if 'ratebypass' not in url:

1655

url += '&ratebypass=yes'

1656

1657

dct = {

1658

'format_id': format_id,

1659

'url': url,

1660

'player_url': player_url,

1661

}

1662

if format_id in self._formats:

1663

dct.update(self._formats[format_id])

1664

if format_id in formats_spec:

1665

dct.update(formats_spec[format_id])

1666

1667

# Some itags are not included in DASH manifest thus corresponding formats will

1668

# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).

1669

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

1670

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

1671

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

1672

1673

more_fields = {

1674

'filesize': int_or_none(url_data.get('clen', [None])[0]),

1675

'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),

1676

'width': width,

1677

'height': height,

1678

'fps': int_or_none(url_data.get('fps', [None])[0]),

1679

'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],

1680

}

1681

for key, value in more_fields.items():

1682

if value:

1683

dct[key] = value

1684

type_ = url_data.get('type', [None])[0]

1685

if type_:

1686

type_split = type_.split(';')

1687

kind_ext = type_split[0].split('/')

1688

if len(kind_ext) == 2:

1689

kind, _ = kind_ext

1690

dct['ext'] = mimetype2ext(type_split[0])

1691

if kind in ('audio', 'video'):

1692

codecs = None

1693

for mobj in re.finditer(

1694

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

1695

if mobj.group('key') == 'codecs':

1696

codecs = mobj.group('val')

1697

break

1698

if codecs:

1699

codecs = codecs.split(',')

1700

if len(codecs) == 2:

1701

acodec, vcodec = codecs[1], codecs[0]

1702

else:

1703

acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])

dct.update({

'acodec': acodec,

'vcodec': vcodec,

})

formats.append(dct)

elif video_info.get('hlsvp'):

1710

manifest_url = video_info['hlsvp'][0]

1711

url_map = self._extract_from_m3u8(manifest_url, video_id)

1712

formats = _map_to_format_list(url_map)

1713

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

1714

for a_format in formats:

1715

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

1716

else:

1717

unavailable_message = self._html_search_regex(

1718

r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',

1719

video_webpage, 'unavailable message', default=None)

1720

if unavailable_message:

1721

raise ExtractorError(unavailable_message, expected=True)

1722

raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')

1723

1724

# Look for the DASH manifest

1725

if self._downloader.params.get('youtube_include_dash_manifest', True):

1726

dash_mpd_fatal = True

1727

for mpd_url in dash_mpds:

1728

dash_formats = {}

1729

try:

1730

def decrypt_sig(mobj):

1731

s = mobj.group(1)

1732

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

1733

return '/signature/%s' % dec_s

1734

1735

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

1736

1737

for df in self._extract_mpd_formats(

1738

mpd_url, video_id, fatal=dash_mpd_fatal,

1739

formats_dict=self._formats):

1740

# Do not overwrite DASH format found in some previous DASH manifest

1741

if df['format_id'] not in dash_formats:

1742

dash_formats[df['format_id']] = df

1743

# Additional DASH manifests may end up in HTTP Error 403 therefore

1744

# allow them to fail without bug report message if we already have

1745

# some DASH manifest succeeded. This is temporary workaround to reduce

1746

# burst of bug reports until we figure out the reason and whether it

1747

# can be fixed at all.

1748

dash_mpd_fatal = False

1749

except (ExtractorError, KeyError) as e:

1750

self.report_warning(

1751

'Skipping DASH manifest: %r' % e, video_id)

1752

if dash_formats:

1753

# Remove the formats we found through non-DASH, they

1754

# contain less info and it can be wrong, because we use

1755

# fixed values (for example the resolution). See

1756

# https://github.com/rg3/youtube-dl/issues/5774 for an

1757

# example.

1758

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

1759

formats.extend(dash_formats.values())

1760

1761

# Check for malformed aspect ratio

1762

stretched_m = re.search(

1763

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

1764

video_webpage)

1765

if stretched_m:

1766

w = float(stretched_m.group('w'))

1767

h = float(stretched_m.group('h'))

1768

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

1769

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

1774

f['stretched_ratio'] = ratio

1775

1776

self._sort_formats(formats)

1777

1778

self.mark_watched(video_id, video_info)

return {

'id': video_id,

'uploader': video_uploader,

1783

'uploader_id': video_uploader_id,

1784

'uploader_url': video_uploader_url,

1785

'upload_date': upload_date,

1786

'license': video_license,

1787

'creator': video_creator,

1788

'title': video_title,

1789

'alt_title': video_alt_title,

1790

'thumbnail': video_thumbnail,

1791

'description': video_description,

1792

'categories': video_categories,

1793

'tags': video_tags,

1794

'subtitles': video_subtitles,

1795

'automatic_captions': automatic_captions,

1796

'duration': video_duration,

1797

'age_limit': 18 if age_gate else 0,

1798

'annotations': video_annotations,

1799

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

1800

'view_count': view_count,

1801

'like_count': like_count,

1802

'dislike_count': dislike_count,

1803

'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),

1804

'formats': formats,

1805

'is_live': is_live,

1806

'start_time': start_time,

1807

'end_time': end_time,

1808

'series': series,

1809

'season_number': season_number,

1810

'episode_number': episode_number,

}

class YoutubeSharedVideoIE(InfoExtractor):

1815

_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'

1816

IE_NAME = 'youtube:shared'

1817

1818

_TEST = {

1819

'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',

'info_dict': {

'id': 'uPDB5I9wfp8',

'ext': 'webm',

'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',

1824

'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',

1825

'upload_date': '20160219',

1826

'uploader': 'Pocoyo - Português (BR)',

1827

'uploader_id': 'PocoyoBrazil',

1828

},

1829

'add_ie': ['Youtube'],

1830

'params': {

1831

# There are already too many Youtube downloads

1832

'skip_download': True,

},

}

def _real_extract(self, url):

1837

video_id = self._match_id(url)

1838

1839

webpage = self._download_webpage(url, video_id)

1840

1841

real_video_id = self._html_search_meta(

1842

'videoId', webpage, 'YouTube video id', fatal=True)

1843

1844

return self.url_result(real_video_id, YoutubeIE.ie_key())

1845

1846

1847

class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):

1848

IE_DESC = 'YouTube.com playlists'

1849

_VALID_URL = r"""(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

youtube\.com/

(?:

\? (?:.*?[&;])*? (?:p|a|list)=

1857

| p/

1858

)|

1859

youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=

1860

)

1861

(

1862

(?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}

1863

# Top tracks, they can also include dots

|(?:MC)[\w\.]*

)

.*

|

((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})

1869

)"""

1870

_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'

1871

_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'

1872

IE_NAME = 'youtube:playlist'

1873

_TESTS = [{

1874

'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',

1875

'info_dict': {

1876

'title': 'ytdl test PL',

1877

'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',

},

'playlist_count': 3,

}, {

'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',

1882

'info_dict': {

1883

'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',

1884

'title': 'YDL_Empty_List',

1885

},

1886

'playlist_count': 0,

1887

'skip': 'This playlist is private',

1888

}, {

1889

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

1890

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

1891

'info_dict': {

1892

'title': '29C3: Not my department',

1893

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

1894

},

1895

'playlist_count': 95,

1896

}, {

1897

'note': 'issue #673',

1898

'url': 'PLBB231211A4F62143',

1899

'info_dict': {

1900

'title': '[OLD]Team Fortress 2 (Class-based LP)',

1901

'id': 'PLBB231211A4F62143',

1902

},

1903

'playlist_mincount': 26,

1904

}, {

1905

'note': 'Large playlist',

1906

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

1907

'info_dict': {

1908

'title': 'Uploads from Cauchemar',

1909

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

1910

},

1911

'playlist_mincount': 799,

1912

}, {

1913

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

1914

'info_dict': {

1915

'title': 'YDL_safe_search',

1916

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

1917

},

1918

'playlist_count': 2,

1919

'skip': 'This playlist is private',

1920

}, {

1921

'note': 'embedded',

1922

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

1927

}

1928

}, {

1929

'note': 'Embedded SWF player',

1930

'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',

'playlist_count': 4,

'info_dict': {

'title': 'JODA7',

'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',

1935

}

1936

}, {

1937

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

1938

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

1939

'info_dict': {

1940

'title': 'Uploads from Interstellar Movie',

1941

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

1942

},

1943

'playlist_mincount': 21,

1944

}, {

1945

# Playlist URL that does not actually serve a playlist

1946

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

1951

'uploader': 'STREEM',

1952

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

1953

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

1954

'upload_date': '20150526',

1955

'license': 'Standard YouTube License',

1956

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

1957

'categories': ['People & Blogs'],

1958

'tags': list,

1959

'like_count': int,

1960

'dislike_count': int,

1961

},

1962

'params': {

1963

'skip_download': True,

1964

},

1965

'add_ie': [YoutubeIE.ie_key()],

1966

}, {

1967

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

1972

'uploader': 'Backus-Page House Museum',

1973

'uploader_id': 'backuspagemuseum',

1974

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

1975

'upload_date': '20161008',

1976

'license': 'Standard YouTube License',

1977

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

1978

'categories': ['Nonprofits & Activism'],

1979

'tags': list,

1980

'like_count': int,

1981

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

1986

},

1987

}, {

1988

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

1989

'only_matching': True,

1990

}, {

1991

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

1992

'only_matching': True,

1993

}]

1994

1995

def _real_initialize(self):

1996

self._login()

1997

1998

def _extract_mix(self, playlist_id):

1999

# The mixes are generated from a single video

2000

# the id of the playlist is just 'RD' + video_id

2001

ids = []

2002

last_id = playlist_id[-11:]

2003

for n in itertools.count(1):

2004

url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)

2005

webpage = self._download_webpage(

2006

url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))

2007

new_ids = orderedSet(re.findall(

2008

r'''(?xs)data-video-username=".*?".*?

2009

href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),

2010

webpage))

2011

# Fetch new pages until all the videos are repeated, it seems that

2012

# there are always 51 unique videos.

2013

new_ids = [_id for _id in new_ids if _id not in ids]

if not new_ids:

break

ids.extend(new_ids)

last_id = ids[-1]

url_results = self._ids_to_results(ids)

2020

2021

search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)

2022

title_span = (

2023

search_title('playlist-title') or

2024

search_title('title long-title') or

2025

search_title('title'))

2026

title = clean_html(title_span)

2027

2028

return self.playlist_result(url_results, playlist_id, title)

2029

2030

def _extract_playlist(self, playlist_id):

2031

url = self._TEMPLATE_URL % playlist_id

2032

page = self._download_webpage(url, playlist_id)

2033

2034

# the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)

2035

for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):

2036

match = match.strip()

2037

# Check if the playlist exists or is private

2038

mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)

2039

if mobj:

2040

reason = mobj.group('reason')

2041

message = 'This playlist %s' % reason

2042

if 'private' in reason:

2043

message += ', use --username or --netrc to access it'

2044

message += '.'

2045

raise ExtractorError(message, expected=True)

2046

elif re.match(r'[^<]*Invalid parameters[^<]*', match):

2047

raise ExtractorError(

2048

'Invalid parameters. Maybe URL is incorrect.',

2049

expected=True)

2050

elif re.match(r'[^<]*Choose your language[^<]*', match):

2051

continue

2052

else:

2053

self.report_warning('Youtube gives an alert message: ' + match)

2054

2055

playlist_title = self._html_search_regex(

2056

r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',

2057

page, 'title', default=None)

has_videos = True

if not playlist_title:

2062

try:

2063

# Some playlist URLs don't actually serve a playlist (e.g.

2064

# https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)

2065

next(self._entries(page, playlist_id))

2066

except StopIteration:

2067

has_videos = False

2068

2069

return has_videos, self.playlist_result(

2070

self._entries(page, playlist_id), playlist_id, playlist_title)

2071

2072

def _check_download_just_video(self, url, playlist_id):

2073

# Check if it's a video-specific URL

2074

query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

2075

video_id = query_dict.get('v', [None])[0] or self._search_regex(

2076

r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,

2077

'video id', default=None)

2078

if video_id:

2079

if self._downloader.params.get('noplaylist'):

2080

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

2081

return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)

2082

else:

2083

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

2084

return video_id, None

2085

return None, None

2086

2087

def _real_extract(self, url):

2088

# Extract playlist id

2089

mobj = re.match(self._VALID_URL, url)

2090

if mobj is None:

2091

raise ExtractorError('Invalid URL: %s' % url)

2092

playlist_id = mobj.group(1) or mobj.group(2)

2093

2094

video_id, video = self._check_download_just_video(url, playlist_id)

if video:

return video

if playlist_id.startswith(('RD', 'UL', 'PU')):

2099

# Mixes require a custom extraction process

2100

return self._extract_mix(playlist_id)

2101

2102

has_videos, playlist = self._extract_playlist(playlist_id)

2103

if has_videos or not video_id:

2104

return playlist

2105

2106

# Some playlist URLs don't actually serve a playlist (see

2107

# https://github.com/rg3/youtube-dl/issues/10537).

2108

# Fallback to plain video extraction if there is a video id

2109

# along with playlist id.

2110

return self.url_result(video_id, 'Youtube', video_id=video_id)

2111

2112

2113

class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):

2114

IE_DESC = 'YouTube.com channels'

2115

_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'

2116

_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'

2117

_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'

2118

IE_NAME = 'youtube:channel'

2119

_TESTS = [{

2120

'note': 'paginated channel',

2121

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

2122

'playlist_mincount': 91,

2123

'info_dict': {

2124

'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',

2125

'title': 'Uploads from lex will',

2126

}

2127

}, {

2128

'note': 'Age restricted channel',

2129

# from https://www.youtube.com/user/DeusExOfficial

2130

'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',

2131

'playlist_mincount': 64,

2132

'info_dict': {

2133

'id': 'UUs0ifCMCm1icqRbqhUINa0w',

2134

'title': 'Uploads from Deus Ex',

},

}]

@classmethod

def suitable(cls, url):

2140

return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)

2141

else super(YoutubeChannelIE, cls).suitable(url))

2142

2143

def _build_template_url(self, url, channel_id):

2144

return self._TEMPLATE_URL % channel_id

2145

2146

def _real_extract(self, url):

2147

channel_id = self._match_id(url)

2148

2149

url = self._build_template_url(url, channel_id)

2150

2151

# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)

2152

# Workaround by extracting as a playlist if managed to obtain channel playlist URL

2153

# otherwise fallback on channel by page extraction

2154

channel_page = self._download_webpage(

2155

url + '?view=57', channel_id,

2156

'Downloading channel page', fatal=False)

2157

if channel_page is False:

2158

channel_playlist_id = False

2159

else:

2160

channel_playlist_id = self._html_search_meta(

2161

'channelId', channel_page, 'channel id', default=None)

2162

if not channel_playlist_id:

2163

channel_url = self._html_search_meta(

2164

('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),

2165

channel_page, 'channel url', default=None)

2166

if channel_url:

2167

channel_playlist_id = self._search_regex(

2168

r'vnd\.youtube://user/([0-9A-Za-z_-]+)',

2169

channel_url, 'channel id', default=None)

2170

if channel_playlist_id and channel_playlist_id.startswith('UC'):

2171

playlist_id = 'UU' + channel_playlist_id[2:]

2172

return self.url_result(

2173

compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')

2174

2175

channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')

2176

autogenerated = re.search(r'''(?x)

2177

class="[^"]*?(?:

2178

channel-header-autogenerated-label|

2179

yt-channel-title-autogenerated

2180

)[^"]*"''', channel_page) is not None

2181

2182

if autogenerated:

2183

# The videos are contained in a single page

2184

# the ajax pages can't be used, they are empty

2185

entries = [

2186

self.url_result(

2187

video_id, 'Youtube', video_id=video_id,

2188

video_title=video_title)

2189

for video_id, video_title in self.extract_videos_from_page(channel_page)]

2190

return self.playlist_result(entries, channel_id)

2191

2192

try:

2193

next(self._entries(channel_page, channel_id))

2194

except StopIteration:

2195

alert_message = self._html_search_regex(

2196

r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',

2197

channel_page, 'alert', default=None, group='alert')

2198

if alert_message:

2199

raise ExtractorError('Youtube said: %s' % alert_message, expected=True)

2200

2201

return self.playlist_result(self._entries(channel_page, channel_id), channel_id)

2202

2203

2204

class YoutubeUserIE(YoutubeChannelIE):

2205

IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'

2206

2207

_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'

2208

IE_NAME = 'youtube:user'

2209

2210

_TESTS = [{

2211

'url': 'https://www.youtube.com/user/TheLinuxFoundation',

2212

'playlist_mincount': 320,

2213

'info_dict': {

2214

'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',

2215

'title': 'Uploads from The Linux Foundation',

2216

}

2217

}, {

2218

# Only available via https://www.youtube.com/c/12minuteathlete/videos

2219

# but not https://www.youtube.com/user/12minuteathlete/videos

2220

'url': 'https://www.youtube.com/c/12minuteathlete/videos',

2221

'playlist_mincount': 249,

2222

'info_dict': {

2223

'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',

2224

'title': 'Uploads from 12 Minute Athlete',

2225

}

2226

}, {

2227

'url': 'ytuser:phihag',

2228

'only_matching': True,

2229

}, {

2230

'url': 'https://www.youtube.com/c/gametrailers',

2231

'only_matching': True,

2232

}, {

2233

'url': 'https://www.youtube.com/gametrailers',

2234

'only_matching': True,

2235

}, {

2236

# This channel is not available.

2237

'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',

2238

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

2243

# Don't return True if the url can be extracted with other youtube

2244

# extractor, the regex would is too permissive and it would match.

2245

other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)

2246

if any(ie.suitable(url) for ie in other_yt_ies):

2247

return False

2248

else:

2249

return super(YoutubeUserIE, cls).suitable(url)

2250

2251

def _build_template_url(self, url, channel_id):

2252

mobj = re.match(self._VALID_URL, url)

2253

return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))

2254

2255

2256

class YoutubeLiveIE(YoutubeBaseInfoExtractor):

2257

IE_DESC = 'YouTube.com live streams'

2258

_VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'

2259

IE_NAME = 'youtube:live'

2260

2261

_TESTS = [{

2262

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

2267

'uploader': 'The Young Turks',

2268

'uploader_id': 'TheYoungTurks',

2269

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

2270

'upload_date': '20150715',

2271

'license': 'Standard YouTube License',

2272

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

2273

'categories': ['News & Politics'],

2274

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

2275

'like_count': int,

2276

'dislike_count': int,

2277

},

2278

'params': {

2279

'skip_download': True,

2280

},

2281

}, {

2282

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

2283

'only_matching': True,

2284

}, {

2285

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

2286

'only_matching': True,

2287

}, {

2288

'url': 'https://www.youtube.com/TheYoungTurks/live',

2289

'only_matching': True,

2290

}]

2291

2292

def _real_extract(self, url):

2293

mobj = re.match(self._VALID_URL, url)

2294

channel_id = mobj.group('id')

2295

base_url = mobj.group('base_url')

2296

webpage = self._download_webpage(url, channel_id, fatal=False)

2297

if webpage:

2298

page_type = self._og_search_property(

2299

'type', webpage, 'page type', default=None)

2300

video_id = self._html_search_meta(

2301

'videoId', webpage, 'video id', default=None)

2302

if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id):

2303

return self.url_result(video_id, YoutubeIE.ie_key())

2304

return self.url_result(base_url)

2305

2306

2307

class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):

2308

IE_DESC = 'YouTube.com user/channel playlists'

2309

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'

2310

IE_NAME = 'youtube:playlists'

2311

2312

_TESTS = [{

2313

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

2314

'playlist_mincount': 4,

2315

'info_dict': {

2316

'id': 'ThirstForScience',

2317

'title': 'Thirst for Science',

2318

},

2319

}, {

2320

# with "Load more" button

2321

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

2322

'playlist_mincount': 70,

2323

'info_dict': {

2324

'id': 'igorkle1',

2325

'title': 'Игорь Клейнер',

2326

},

2327

}, {

2328

'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',

2329

'playlist_mincount': 17,

2330

'info_dict': {

2331

'id': 'UCiU1dHvZObB2iP6xkJ__Icw',

2332

'title': 'Chem Player',

},

}]

class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):

2338

IE_DESC = 'YouTube.com searches'

2339

# there doesn't appear to be a real limit, for example if you search for

2340

# 'python' you get more than 8.000.000 results

2341

_MAX_RESULTS = float('inf')

2342

IE_NAME = 'youtube:search'

2343

_SEARCH_KEY = 'ytsearch'

2344

_EXTRA_QUERY_ARGS = {}

2345

_TESTS = []

2346

2347

def _get_n_results(self, query, n):

2348

"""Get a specified number of results for a query"""

videos = []

limit = n

url_query = {

'search_query': query.encode('utf-8'),

2355

}

2356

url_query.update(self._EXTRA_QUERY_ARGS)

2357

result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)

2358

2359

for pagenum in itertools.count(1):

2360

data = self._download_json(

2361

result_url, video_id='query "%s"' % query,

2362

note='Downloading page %s' % pagenum,

2363

errnote='Unable to download API page',

2364

query={'spf': 'navigate'})

2365

html_content = data[1]['body']['content']

2366

2367

if 'class="search-message' in html_content:

2368

raise ExtractorError(

2369

'[youtube] No video results', expected=True)

2370

2371

new_videos = self._ids_to_results(orderedSet(re.findall(

2372

r'href="/watch\?v=(.{11})', html_content)))

2373

videos += new_videos

2374

if not new_videos or len(videos) > limit:

2375

break

2376

next_link = self._html_search_regex(

2377

r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',

2378

html_content, 'next link', default=None)

2379

if next_link is None:

2380

break

2381

result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)

if len(videos) > n:

videos = videos[:n]

return self.playlist_result(videos, query)

2386

2387

2388

class YoutubeSearchDateIE(YoutubeSearchIE):

2389

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

2390

_SEARCH_KEY = 'ytsearchdate'

2391

IE_DESC = 'YouTube.com searches, newest videos first'

2392

_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}

2393

2394

2395

class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):

2396

IE_DESC = 'YouTube.com search URLs'

2397

IE_NAME = 'youtube:search_url'

2398

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'

2399

_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'

2400

_TESTS = [{

2401

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

2402

'playlist_mincount': 5,

2403

'info_dict': {

2404

'title': 'youtube-dl test video',

2405

}

2406

}, {

2407

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

2408

'only_matching': True,

2409

}]

2410

2411

def _real_extract(self, url):

2412

mobj = re.match(self._VALID_URL, url)

2413

query = compat_urllib_parse_unquote_plus(mobj.group('query'))

2414

webpage = self._download_webpage(url, query)

2415

return self.playlist_result(self._process_page(webpage), playlist_title=query)

2416

2417

2418

class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):

2419

IE_DESC = 'YouTube.com (multi-season) shows'

2420

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

2421

IE_NAME = 'youtube:show'

2422

_TESTS = [{

2423

'url': 'https://www.youtube.com/show/airdisasters',

2424

'playlist_mincount': 5,

2425

'info_dict': {

2426

'id': 'airdisasters',

2427

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

2432

playlist_id = self._match_id(url)

2433

return super(YoutubeShowIE, self)._real_extract(

2434

'https://www.youtube.com/show/%s/playlists' % playlist_id)

2435

2436

2437

class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):

2438

"""

2439

Base class for feed extractors

2440

Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.

2441

"""

2442

_LOGIN_REQUIRED = True

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

2447

2448

def _real_initialize(self):

2449

self._login()

2450

2451

def _real_extract(self, url):

2452

page = self._download_webpage(

2453

'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)

2454

2455

# The extraction process is the same as for playlists, but the regex

2456

# for the video ids doesn't contain an index

2457

ids = []

2458

more_widget_html = content_html = page

2459

for page_num in itertools.count(1):

2460

matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)

2461

2462

# 'recommended' feed has infinite 'load more' and each new portion spins

2463

# the same videos in (sometimes) slightly different order, so we'll check

2464

# for unicity and break when portion has no new videos

2465

new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))

if not new_ids:

break

ids.extend(new_ids)

mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)

if not mobj:

break

more = self._download_json(

2476

'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,

2477

'Downloading page #%s' % page_num,

2478

transform_source=uppercase_escape)

2479

content_html = more['content_html']

2480

more_widget_html = more['load_more_widget_html']

2481

2482

return self.playlist_result(

2483

self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)

2484

2485

2486

class YoutubeWatchLaterIE(YoutubePlaylistIE):

2487

IE_NAME = 'youtube:watchlater'

2488

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

2489

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'

2490

2491

_TESTS = [{

2492

'url': 'https://www.youtube.com/playlist?list=WL',

2493

'only_matching': True,

2494

}, {

2495

'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',

2496

'only_matching': True,

2497

}]

2498

2499

def _real_extract(self, url):

2500

_, video = self._check_download_just_video(url, 'WL')

2501

if video:

2502

return video

2503

_, playlist = self._extract_playlist('WL')

return playlist

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

2508

IE_NAME = 'youtube:favorites'

2509

IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'

2510

_VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'

2511

_LOGIN_REQUIRED = True

2512

2513

def _real_extract(self, url):

2514

webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')

2515

playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')

2516

return self.url_result(playlist_id, 'YoutubePlaylist')

2517

2518

2519

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

2520

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

2521

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'

2522

_FEED_NAME = 'recommended'

2523

_PLAYLIST_TITLE = 'Youtube Recommended videos'

2524

2525

2526

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

2527

IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'

2528

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'

2529

_FEED_NAME = 'subscriptions'

2530

_PLAYLIST_TITLE = 'Youtube Subscriptions'

2531

2532

2533

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

2534

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

2535

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'

2536

_FEED_NAME = 'history'

2537

_PLAYLIST_TITLE = 'Youtube History'

2538

2539

2540

class YoutubeTruncatedURLIE(InfoExtractor):

2541

IE_NAME = 'youtube:truncated_url'

2542

IE_DESC = False # Do not list

2543

_VALID_URL = r'''(?x)

2544

(?:https?://)?

2545

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

2546

(?:watch\?(?:

2547

feature=[a-z_]+|

2548

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

2561

'only_matching': True,

2562

}, {

2563

'url': 'https://www.youtube.com/watch?',

2564

'only_matching': True,

2565

}, {

2566

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

2567

'only_matching': True,

2568

}, {

2569

'url': 'https://www.youtube.com/watch?feature=foo',

2570

'only_matching': True,

2571

}, {

2572

'url': 'https://www.youtube.com/watch?hl=en-GB',

2573

'only_matching': True,

2574

}, {

2575

'url': 'https://www.youtube.com/watch?t=2372',

2576

'only_matching': True,

2577

}]

2578

2579

def _real_extract(self, url):

2580

raise ExtractorError(

2581

'Did you forget to quote the URL? Remember that & is a meta '

2582

'character in most shells, so you want to put the URL in quotes, '

2583

'like youtube-dl '

2584

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

2585

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

2590

IE_NAME = 'youtube:truncated_id'

2591

IE_DESC = False # Do not list

2592

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

2593

2594

_TESTS = [{

2595

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

2596

'only_matching': True,

2597

}]

2598

2599

def _real_extract(self, url):

2600

video_id = self._match_id(url)

2601

raise ExtractorError(

2602

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

2603

expected=True)