jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/tubetugraz.py

Commit	Line	Data
49afc1d8 FB	1	from .common import InfoExtractor
	2	from ..utils import (
	3	float_or_none,
	4	parse_resolution,
	5	traverse_obj,
	6	urlencode_postdata,
	7	variadic,
	8	)
	9
	10
	11	class TubeTuGrazBaseIE(InfoExtractor):
	12	_NETRC_MACHINE = 'tubetugraz'
	13
	14	_API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
	15	_FORMAT_TYPES = ('presentation', 'presenter')
	16
	17	def _perform_login(self, username, password):
	18	urlh = self._request_webpage(
	19	'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
	20	None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
	21	if not urlh:
	22	return
	23
6aaf96a3	24	response = self._download_webpage_handle(
3d2623a8	25	urlh.url, None, fatal=False, headers={'referer': urlh.url},
f44cb4e7 FB	26	note='logging in', errnote='unable to log in',
f44cb4e7 FB	27	data=urlencode_postdata({
49afc1d8 FB	28	'lang': 'de',
	29	'_eventId_proceed': '',
	30	'j_username': username,
add96eb9	31	'j_password': password,
49afc1d8	32	}))
6aaf96a3	33	if not response:
	34	return
	35
	36	content, urlh = response
	37	if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
f44cb4e7	38	return
49afc1d8	39
f44cb4e7 FB	40	if not self._html_search_regex(
	41	r'<p\b[^>]*>(Bitte geben Sie einen OTP-Wert ein:)</p>',
	42	content, 'TFA prompt', default=None):
49afc1d8	43	self.report_warning('unable to login: incorrect password')
f44cb4e7 FB	44	return
f44cb4e7 FB	45
6aaf96a3	46	urlh = self._request_webpage(
3d2623a8	47	urlh.url, None, fatal=False, headers={'referer': urlh.url},
f44cb4e7 FB	48	note='logging in with TFA', errnote='unable to log in with TFA',
	49	data=urlencode_postdata({
	50	'lang': 'de',
	51	'_eventId_proceed': '',
	52	'j_tokenNumber': self._get_tfa_info(),
	53	}))
3d2623a8	54	if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
f44cb4e7 FB	55	return
	56
	57	self.report_warning('unable to login: incorrect TFA code')
49afc1d8 FB	58
49afc1d8 FB	59	def _extract_episode(self, episode_info):
add96eb9	60	video_id = episode_info.get('id')
49afc1d8	61	formats = list(self._extract_formats(
add96eb9	62	traverse_obj(episode_info, ('mediapackage', 'media', 'track')), video_id))
49afc1d8 FB	63
	64	title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
	65	series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
	66	creator = ', '.join(variadic(traverse_obj(
	67	episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
	68	return {
add96eb9	69	'id': video_id,
49afc1d8 FB	70	'title': title,
	71	'creator': creator or None,
	72	'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
	73	'series': series_title,
	74	'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
	75	'episode': series_title and title,
add96eb9	76	'formats': formats,
49afc1d8 FB	77	}
49afc1d8 FB	78
add96eb9	79	def _set_format_type(self, formats, fmt_type):
49afc1d8	80	for f in formats:
add96eb9	81	f['format_note'] = fmt_type
add96eb9	82	if not fmt_type.startswith(self._FORMAT_TYPES[0]):
49afc1d8 FB	83	f['preference'] = -2
	84	return formats
	85
add96eb9	86	def _extract_formats(self, format_list, video_id):
49afc1d8 FB	87	has_hls, has_dash = False, False
	88
	89	for format_info in format_list or []:
	90	url = traverse_obj(format_info, ('tags', 'url'), 'url')
	91	if url is None:
	92	continue
	93
add96eb9	94	fmt_type = format_info.get('type') or 'unknown'
49afc1d8 FB	95	transport = (format_info.get('transport') or 'https').lower()
	96
	97	if transport == 'https':
	98	formats = [{
	99	'url': url,
	100	'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
	101	'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
	102	'fps': traverse_obj(format_info, ('video', 'framerate')),
	103	**parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
	104	}]
	105	elif transport == 'hls':
	106	has_hls, formats = True, self._extract_m3u8_formats(
add96eb9	107	url, video_id, 'mp4', fatal=False, note=f'downloading {fmt_type} HLS manifest')
49afc1d8 FB	108	elif transport == 'dash':
49afc1d8 FB	109	has_dash, formats = True, self._extract_mpd_formats(
add96eb9	110	url, video_id, fatal=False, note=f'downloading {fmt_type} DASH manifest')
49afc1d8 FB	111	else:
	112	# RTMP, HDS, SMOOTH, and unknown formats
	113	# - RTMP url fails on every tested entry until now
	114	# - HDS url 404's on every tested entry until now
	115	# - SMOOTH url 404's on every tested entry until now
	116	continue
	117
add96eb9	118	yield from self._set_format_type(formats, fmt_type)
49afc1d8 FB	119
49afc1d8 FB	120	# TODO: Add test for these
add96eb9	121	for fmt_type in self._FORMAT_TYPES:
49afc1d8 FB	122	if not has_hls:
49afc1d8 FB	123	hls_formats = self._extract_m3u8_formats(
add96eb9	124	f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/playlist.m3u8',
	125	video_id, 'mp4', fatal=False, note=f'Downloading {fmt_type} HLS manifest', errnote=False) or []
	126	yield from self._set_format_type(hls_formats, fmt_type)
49afc1d8 FB	127
	128	if not has_dash:
	129	dash_formats = self._extract_mpd_formats(
add96eb9	130	f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/manifest_mpm4sav_mvlist.mpd',
	131	video_id, fatal=False, note=f'Downloading {fmt_type} DASH manifest', errnote=False)
	132	yield from self._set_format_type(dash_formats, fmt_type)
49afc1d8 FB	133
	134
	135	class TubeTuGrazIE(TubeTuGrazBaseIE):
	136	IE_DESC = 'tube.tugraz.at'
	137
	138	_VALID_URL = r'''(?x)
	139	https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
	140	(?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
	141	'''
	142	_TESTS = [
	143	{
	144	'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
	145	'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
	146	'info_dict': {
	147	'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
	148	'ext': 'mp4',
	149	'title': '#6 (23.11.2017)',
	150	'episode': '#6 (23.11.2017)',
	151	'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
	152	'creator': 'Safran C',
	153	'duration': 3295818,
	154	'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
add96eb9	155	},
49afc1d8 FB	156	}, {
	157	'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
	158	'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
	159	'info_dict': {
	160	'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
	161	'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
	162	'ext': 'mp4',
	163	},
	164	'expected_warnings': ['Extractor failed to obtain "title"'],
add96eb9	165	},
49afc1d8 FB	166	]
	167
	168	def _real_extract(self, url):
	169	video_id = self._match_id(url)
	170	episode_data = self._download_json(
	171	self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
	172
	173	episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
	174	return self._extract_episode(episode_info)
	175
	176
	177	class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
	178	_VALID_URL = r'''(?x)
	179	https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
	180	(?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
	181	'''
	182	_TESTS = [{
	183	'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
	184	'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
	185	'info_dict': {
	186	'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
	187	'title': '[209351] Strassenwesen',
	188	},
	189	'playlist': [
	190	{
	191	'info_dict': {
	192	'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
	193	'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
	194	'ext': 'mp4',
	195	'title': '#4 Detailprojekt',
	196	'episode': '#4 Detailprojekt',
	197	'series': '[209351] Strassenwesen',
	198	'creator': 'Neuhold R',
	199	'duration': 6127024,
add96eb9	200	},
49afc1d8 FB	201	},
	202	{
	203	'info_dict': {
	204	'id': '87350498-799a-44d3-863f-d1518a98b114',
	205	'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
	206	'ext': 'mp4',
	207	'title': '#3 Generelles Projekt',
	208	'episode': '#3 Generelles Projekt',
	209	'series': '[209351] Strassenwesen',
	210	'creator': 'Neuhold R',
	211	'duration': 5374422,
add96eb9	212	},
49afc1d8 FB	213	},
	214	{
	215	'info_dict': {
	216	'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
	217	'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
	218	'ext': 'mp4',
	219	'title': '#2 Vorprojekt',
	220	'episode': '#2 Vorprojekt',
	221	'series': '[209351] Strassenwesen',
	222	'creator': 'Neuhold R',
	223	'duration': 5566404,
add96eb9	224	},
49afc1d8 FB	225	},
	226	{
	227	'info_dict': {
	228	'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
	229	'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
	230	'ext': 'mp4',
	231	'title': '#1 Variantenstudium',
	232	'episode': '#1 Variantenstudium',
	233	'series': '[209351] Strassenwesen',
	234	'creator': 'Neuhold R',
	235	'duration': 5420200,
add96eb9	236	},
add96eb9	237	},
49afc1d8	238	],
add96eb9	239	'min_playlist_count': 4,
49afc1d8 FB	240	}]
	241
	242	def _real_extract(self, url):
add96eb9	243	playlist_id = self._match_id(url)
	244	episodes_data = self._download_json(
	245	self._API_EPISODE, playlist_id, query={'sid': playlist_id}, note='Downloading episode list')
49afc1d8	246	series_data = self._download_json(
add96eb9	247	'https://tube.tugraz.at/series/series.json', playlist_id, fatal=False,
49afc1d8 FB	248	note='downloading series metadata', errnote='failed to download series metadata',
49afc1d8 FB	249	query={
add96eb9	250	'seriesId': playlist_id,
49afc1d8	251	'count': 1,
add96eb9	252	'sort': 'TITLE',
49afc1d8 FB	253	})
	254
	255	return self.playlist_result(
add96eb9	256	map(self._extract_episode, episodes_data['search-results']['result']), playlist_id,
49afc1d8	257	traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))