]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/tubetugraz.py
fix motherless
[yt-dlp.git] / yt_dlp / extractor / tubetugraz.py
CommitLineData
49afc1d8
FB
1from .common import InfoExtractor
2from ..utils import (
3 float_or_none,
4 parse_resolution,
5 traverse_obj,
6 urlencode_postdata,
7 variadic,
8)
9
10
11class TubeTuGrazBaseIE(InfoExtractor):
12 _NETRC_MACHINE = 'tubetugraz'
13
14 _API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
15 _FORMAT_TYPES = ('presentation', 'presenter')
16
17 def _perform_login(self, username, password):
18 urlh = self._request_webpage(
19 'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
20 None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
21 if not urlh:
22 return
23
6aaf96a3 24 response = self._download_webpage_handle(
3d2623a8 25 urlh.url, None, fatal=False, headers={'referer': urlh.url},
f44cb4e7
FB
26 note='logging in', errnote='unable to log in',
27 data=urlencode_postdata({
49afc1d8
FB
28 'lang': 'de',
29 '_eventId_proceed': '',
30 'j_username': username,
add96eb9 31 'j_password': password,
49afc1d8 32 }))
6aaf96a3 33 if not response:
34 return
35
36 content, urlh = response
37 if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
f44cb4e7 38 return
49afc1d8 39
f44cb4e7
FB
40 if not self._html_search_regex(
41 r'<p\b[^>]*>(Bitte geben Sie einen OTP-Wert ein:)</p>',
42 content, 'TFA prompt', default=None):
49afc1d8 43 self.report_warning('unable to login: incorrect password')
f44cb4e7
FB
44 return
45
6aaf96a3 46 urlh = self._request_webpage(
3d2623a8 47 urlh.url, None, fatal=False, headers={'referer': urlh.url},
f44cb4e7
FB
48 note='logging in with TFA', errnote='unable to log in with TFA',
49 data=urlencode_postdata({
50 'lang': 'de',
51 '_eventId_proceed': '',
52 'j_tokenNumber': self._get_tfa_info(),
53 }))
3d2623a8 54 if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
f44cb4e7
FB
55 return
56
57 self.report_warning('unable to login: incorrect TFA code')
49afc1d8
FB
58
59 def _extract_episode(self, episode_info):
add96eb9 60 video_id = episode_info.get('id')
49afc1d8 61 formats = list(self._extract_formats(
add96eb9 62 traverse_obj(episode_info, ('mediapackage', 'media', 'track')), video_id))
49afc1d8
FB
63
64 title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
65 series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
66 creator = ', '.join(variadic(traverse_obj(
67 episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
68 return {
add96eb9 69 'id': video_id,
49afc1d8
FB
70 'title': title,
71 'creator': creator or None,
72 'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
73 'series': series_title,
74 'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
75 'episode': series_title and title,
add96eb9 76 'formats': formats,
49afc1d8
FB
77 }
78
add96eb9 79 def _set_format_type(self, formats, fmt_type):
49afc1d8 80 for f in formats:
add96eb9 81 f['format_note'] = fmt_type
82 if not fmt_type.startswith(self._FORMAT_TYPES[0]):
49afc1d8
FB
83 f['preference'] = -2
84 return formats
85
add96eb9 86 def _extract_formats(self, format_list, video_id):
49afc1d8
FB
87 has_hls, has_dash = False, False
88
89 for format_info in format_list or []:
90 url = traverse_obj(format_info, ('tags', 'url'), 'url')
91 if url is None:
92 continue
93
add96eb9 94 fmt_type = format_info.get('type') or 'unknown'
49afc1d8
FB
95 transport = (format_info.get('transport') or 'https').lower()
96
97 if transport == 'https':
98 formats = [{
99 'url': url,
100 'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
101 'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
102 'fps': traverse_obj(format_info, ('video', 'framerate')),
103 **parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
104 }]
105 elif transport == 'hls':
106 has_hls, formats = True, self._extract_m3u8_formats(
add96eb9 107 url, video_id, 'mp4', fatal=False, note=f'downloading {fmt_type} HLS manifest')
49afc1d8
FB
108 elif transport == 'dash':
109 has_dash, formats = True, self._extract_mpd_formats(
add96eb9 110 url, video_id, fatal=False, note=f'downloading {fmt_type} DASH manifest')
49afc1d8
FB
111 else:
112 # RTMP, HDS, SMOOTH, and unknown formats
113 # - RTMP url fails on every tested entry until now
114 # - HDS url 404's on every tested entry until now
115 # - SMOOTH url 404's on every tested entry until now
116 continue
117
add96eb9 118 yield from self._set_format_type(formats, fmt_type)
49afc1d8
FB
119
120 # TODO: Add test for these
add96eb9 121 for fmt_type in self._FORMAT_TYPES:
49afc1d8
FB
122 if not has_hls:
123 hls_formats = self._extract_m3u8_formats(
add96eb9 124 f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/playlist.m3u8',
125 video_id, 'mp4', fatal=False, note=f'Downloading {fmt_type} HLS manifest', errnote=False) or []
126 yield from self._set_format_type(hls_formats, fmt_type)
49afc1d8
FB
127
128 if not has_dash:
129 dash_formats = self._extract_mpd_formats(
add96eb9 130 f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/manifest_mpm4sav_mvlist.mpd',
131 video_id, fatal=False, note=f'Downloading {fmt_type} DASH manifest', errnote=False)
132 yield from self._set_format_type(dash_formats, fmt_type)
49afc1d8
FB
133
134
135class TubeTuGrazIE(TubeTuGrazBaseIE):
136 IE_DESC = 'tube.tugraz.at'
137
138 _VALID_URL = r'''(?x)
139 https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
140 (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
141 '''
142 _TESTS = [
143 {
144 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
145 'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
146 'info_dict': {
147 'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
148 'ext': 'mp4',
149 'title': '#6 (23.11.2017)',
150 'episode': '#6 (23.11.2017)',
151 'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
152 'creator': 'Safran C',
153 'duration': 3295818,
154 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
add96eb9 155 },
49afc1d8
FB
156 }, {
157 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
158 'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
159 'info_dict': {
160 'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
161 'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
162 'ext': 'mp4',
163 },
164 'expected_warnings': ['Extractor failed to obtain "title"'],
add96eb9 165 },
49afc1d8
FB
166 ]
167
168 def _real_extract(self, url):
169 video_id = self._match_id(url)
170 episode_data = self._download_json(
171 self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
172
173 episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
174 return self._extract_episode(episode_info)
175
176
177class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
178 _VALID_URL = r'''(?x)
179 https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
180 (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
181 '''
182 _TESTS = [{
183 'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
184 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
185 'info_dict': {
186 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
187 'title': '[209351] Strassenwesen',
188 },
189 'playlist': [
190 {
191 'info_dict': {
192 'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
193 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
194 'ext': 'mp4',
195 'title': '#4 Detailprojekt',
196 'episode': '#4 Detailprojekt',
197 'series': '[209351] Strassenwesen',
198 'creator': 'Neuhold R',
199 'duration': 6127024,
add96eb9 200 },
49afc1d8
FB
201 },
202 {
203 'info_dict': {
204 'id': '87350498-799a-44d3-863f-d1518a98b114',
205 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
206 'ext': 'mp4',
207 'title': '#3 Generelles Projekt',
208 'episode': '#3 Generelles Projekt',
209 'series': '[209351] Strassenwesen',
210 'creator': 'Neuhold R',
211 'duration': 5374422,
add96eb9 212 },
49afc1d8
FB
213 },
214 {
215 'info_dict': {
216 'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
217 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
218 'ext': 'mp4',
219 'title': '#2 Vorprojekt',
220 'episode': '#2 Vorprojekt',
221 'series': '[209351] Strassenwesen',
222 'creator': 'Neuhold R',
223 'duration': 5566404,
add96eb9 224 },
49afc1d8
FB
225 },
226 {
227 'info_dict': {
228 'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
229 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
230 'ext': 'mp4',
231 'title': '#1 Variantenstudium',
232 'episode': '#1 Variantenstudium',
233 'series': '[209351] Strassenwesen',
234 'creator': 'Neuhold R',
235 'duration': 5420200,
add96eb9 236 },
237 },
49afc1d8 238 ],
add96eb9 239 'min_playlist_count': 4,
49afc1d8
FB
240 }]
241
242 def _real_extract(self, url):
add96eb9 243 playlist_id = self._match_id(url)
244 episodes_data = self._download_json(
245 self._API_EPISODE, playlist_id, query={'sid': playlist_id}, note='Downloading episode list')
49afc1d8 246 series_data = self._download_json(
add96eb9 247 'https://tube.tugraz.at/series/series.json', playlist_id, fatal=False,
49afc1d8
FB
248 note='downloading series metadata', errnote='failed to download series metadata',
249 query={
add96eb9 250 'seriesId': playlist_id,
49afc1d8 251 'count': 1,
add96eb9 252 'sort': 'TITLE',
49afc1d8
FB
253 })
254
255 return self.playlist_result(
add96eb9 256 map(self._extract_episode, episodes_data['search-results']['result']), playlist_id,
49afc1d8 257 traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))