]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/tubetugraz.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / tubetugraz.py
1 from .common import InfoExtractor
2 from ..utils import (
3 float_or_none,
4 parse_resolution,
5 traverse_obj,
6 urlencode_postdata,
7 variadic,
8 )
9
10
11 class TubeTuGrazBaseIE(InfoExtractor):
12 _NETRC_MACHINE = 'tubetugraz'
13
14 _API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
15 _FORMAT_TYPES = ('presentation', 'presenter')
16
17 def _perform_login(self, username, password):
18 urlh = self._request_webpage(
19 'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
20 None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
21 if not urlh:
22 return
23
24 content, urlh = self._download_webpage_handle(
25 urlh.url, None, fatal=False, headers={'referer': urlh.url},
26 note='logging in', errnote='unable to log in',
27 data=urlencode_postdata({
28 'lang': 'de',
29 '_eventId_proceed': '',
30 'j_username': username,
31 'j_password': password,
32 }))
33 if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
34 return
35
36 if not self._html_search_regex(
37 r'<p\b[^>]*>(Bitte geben Sie einen OTP-Wert ein:)</p>',
38 content, 'TFA prompt', default=None):
39 self.report_warning('unable to login: incorrect password')
40 return
41
42 content, urlh = self._download_webpage_handle(
43 urlh.url, None, fatal=False, headers={'referer': urlh.url},
44 note='logging in with TFA', errnote='unable to log in with TFA',
45 data=urlencode_postdata({
46 'lang': 'de',
47 '_eventId_proceed': '',
48 'j_tokenNumber': self._get_tfa_info(),
49 }))
50 if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
51 return
52
53 self.report_warning('unable to login: incorrect TFA code')
54
55 def _extract_episode(self, episode_info):
56 video_id = episode_info.get('id')
57 formats = list(self._extract_formats(
58 traverse_obj(episode_info, ('mediapackage', 'media', 'track')), video_id))
59
60 title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
61 series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
62 creator = ', '.join(variadic(traverse_obj(
63 episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
64 return {
65 'id': video_id,
66 'title': title,
67 'creator': creator or None,
68 'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
69 'series': series_title,
70 'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
71 'episode': series_title and title,
72 'formats': formats,
73 }
74
75 def _set_format_type(self, formats, fmt_type):
76 for f in formats:
77 f['format_note'] = fmt_type
78 if not fmt_type.startswith(self._FORMAT_TYPES[0]):
79 f['preference'] = -2
80 return formats
81
82 def _extract_formats(self, format_list, video_id):
83 has_hls, has_dash = False, False
84
85 for format_info in format_list or []:
86 url = traverse_obj(format_info, ('tags', 'url'), 'url')
87 if url is None:
88 continue
89
90 fmt_type = format_info.get('type') or 'unknown'
91 transport = (format_info.get('transport') or 'https').lower()
92
93 if transport == 'https':
94 formats = [{
95 'url': url,
96 'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
97 'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
98 'fps': traverse_obj(format_info, ('video', 'framerate')),
99 **parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
100 }]
101 elif transport == 'hls':
102 has_hls, formats = True, self._extract_m3u8_formats(
103 url, video_id, 'mp4', fatal=False, note=f'downloading {fmt_type} HLS manifest')
104 elif transport == 'dash':
105 has_dash, formats = True, self._extract_mpd_formats(
106 url, video_id, fatal=False, note=f'downloading {fmt_type} DASH manifest')
107 else:
108 # RTMP, HDS, SMOOTH, and unknown formats
109 # - RTMP url fails on every tested entry until now
110 # - HDS url 404's on every tested entry until now
111 # - SMOOTH url 404's on every tested entry until now
112 continue
113
114 yield from self._set_format_type(formats, fmt_type)
115
116 # TODO: Add test for these
117 for fmt_type in self._FORMAT_TYPES:
118 if not has_hls:
119 hls_formats = self._extract_m3u8_formats(
120 f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/playlist.m3u8',
121 video_id, 'mp4', fatal=False, note=f'Downloading {fmt_type} HLS manifest', errnote=False) or []
122 yield from self._set_format_type(hls_formats, fmt_type)
123
124 if not has_dash:
125 dash_formats = self._extract_mpd_formats(
126 f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/manifest_mpm4sav_mvlist.mpd',
127 video_id, fatal=False, note=f'Downloading {fmt_type} DASH manifest', errnote=False)
128 yield from self._set_format_type(dash_formats, fmt_type)
129
130
131 class TubeTuGrazIE(TubeTuGrazBaseIE):
132 IE_DESC = 'tube.tugraz.at'
133
134 _VALID_URL = r'''(?x)
135 https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
136 (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
137 '''
138 _TESTS = [
139 {
140 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
141 'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
142 'info_dict': {
143 'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
144 'ext': 'mp4',
145 'title': '#6 (23.11.2017)',
146 'episode': '#6 (23.11.2017)',
147 'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
148 'creator': 'Safran C',
149 'duration': 3295818,
150 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
151 },
152 }, {
153 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
154 'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
155 'info_dict': {
156 'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
157 'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
158 'ext': 'mp4',
159 },
160 'expected_warnings': ['Extractor failed to obtain "title"'],
161 },
162 ]
163
164 def _real_extract(self, url):
165 video_id = self._match_id(url)
166 episode_data = self._download_json(
167 self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
168
169 episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
170 return self._extract_episode(episode_info)
171
172
173 class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
174 _VALID_URL = r'''(?x)
175 https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
176 (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
177 '''
178 _TESTS = [{
179 'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
180 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
181 'info_dict': {
182 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
183 'title': '[209351] Strassenwesen',
184 },
185 'playlist': [
186 {
187 'info_dict': {
188 'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
189 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
190 'ext': 'mp4',
191 'title': '#4 Detailprojekt',
192 'episode': '#4 Detailprojekt',
193 'series': '[209351] Strassenwesen',
194 'creator': 'Neuhold R',
195 'duration': 6127024,
196 },
197 },
198 {
199 'info_dict': {
200 'id': '87350498-799a-44d3-863f-d1518a98b114',
201 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
202 'ext': 'mp4',
203 'title': '#3 Generelles Projekt',
204 'episode': '#3 Generelles Projekt',
205 'series': '[209351] Strassenwesen',
206 'creator': 'Neuhold R',
207 'duration': 5374422,
208 },
209 },
210 {
211 'info_dict': {
212 'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
213 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
214 'ext': 'mp4',
215 'title': '#2 Vorprojekt',
216 'episode': '#2 Vorprojekt',
217 'series': '[209351] Strassenwesen',
218 'creator': 'Neuhold R',
219 'duration': 5566404,
220 },
221 },
222 {
223 'info_dict': {
224 'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
225 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
226 'ext': 'mp4',
227 'title': '#1 Variantenstudium',
228 'episode': '#1 Variantenstudium',
229 'series': '[209351] Strassenwesen',
230 'creator': 'Neuhold R',
231 'duration': 5420200,
232 },
233 },
234 ],
235 'min_playlist_count': 4,
236 }]
237
238 def _real_extract(self, url):
239 playlist_id = self._match_id(url)
240 episodes_data = self._download_json(
241 self._API_EPISODE, playlist_id, query={'sid': playlist_id}, note='Downloading episode list')
242 series_data = self._download_json(
243 'https://tube.tugraz.at/series/series.json', playlist_id, fatal=False,
244 note='downloading series metadata', errnote='failed to download series metadata',
245 query={
246 'seriesId': playlist_id,
247 'count': 1,
248 'sort': 'TITLE',
249 })
250
251 return self.playlist_result(
252 map(self._extract_episode, episodes_data['search-results']['result']), playlist_id,
253 traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))