]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/tubetugraz.py
[extractor/youtube] Fix `live_status` extraction for playlist videos
[yt-dlp.git] / yt_dlp / extractor / tubetugraz.py
CommitLineData
49afc1d8
FB
1from .common import InfoExtractor
2from ..utils import (
3 float_or_none,
4 parse_resolution,
5 traverse_obj,
6 urlencode_postdata,
7 variadic,
8)
9
10
11class TubeTuGrazBaseIE(InfoExtractor):
12 _NETRC_MACHINE = 'tubetugraz'
13
14 _API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
15 _FORMAT_TYPES = ('presentation', 'presenter')
16
17 def _perform_login(self, username, password):
18 urlh = self._request_webpage(
19 'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
20 None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
21 if not urlh:
22 return
23
24 urlh = self._request_webpage(
25 urlh.geturl(), None, fatal=False, headers={'referer': urlh.geturl()},
26 note='logging in', errnote='unable to log in', data=urlencode_postdata({
27 'lang': 'de',
28 '_eventId_proceed': '',
29 'j_username': username,
30 'j_password': password
31 }))
32
33 if urlh and urlh.geturl() != 'https://tube.tugraz.at/paella/ui/index.html':
34 self.report_warning('unable to login: incorrect password')
35
36 def _extract_episode(self, episode_info):
37 id = episode_info.get('id')
38 formats = list(self._extract_formats(
39 traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id))
40 self._sort_formats(formats)
41
42 title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
43 series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
44 creator = ', '.join(variadic(traverse_obj(
45 episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
46 return {
47 'id': id,
48 'title': title,
49 'creator': creator or None,
50 'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
51 'series': series_title,
52 'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
53 'episode': series_title and title,
54 'formats': formats
55 }
56
57 def _set_format_type(self, formats, type):
58 for f in formats:
59 f['format_note'] = type
60 if not type.startswith(self._FORMAT_TYPES[0]):
61 f['preference'] = -2
62 return formats
63
64 def _extract_formats(self, format_list, id):
65 has_hls, has_dash = False, False
66
67 for format_info in format_list or []:
68 url = traverse_obj(format_info, ('tags', 'url'), 'url')
69 if url is None:
70 continue
71
72 type = format_info.get('type') or 'unknown'
73 transport = (format_info.get('transport') or 'https').lower()
74
75 if transport == 'https':
76 formats = [{
77 'url': url,
78 'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
79 'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
80 'fps': traverse_obj(format_info, ('video', 'framerate')),
81 **parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
82 }]
83 elif transport == 'hls':
84 has_hls, formats = True, self._extract_m3u8_formats(
85 url, id, 'mp4', fatal=False, note=f'downloading {type} HLS manifest')
86 elif transport == 'dash':
87 has_dash, formats = True, self._extract_mpd_formats(
88 url, id, fatal=False, note=f'downloading {type} DASH manifest')
89 else:
90 # RTMP, HDS, SMOOTH, and unknown formats
91 # - RTMP url fails on every tested entry until now
92 # - HDS url 404's on every tested entry until now
93 # - SMOOTH url 404's on every tested entry until now
94 continue
95
96 yield from self._set_format_type(formats, type)
97
98 # TODO: Add test for these
99 for type in self._FORMAT_TYPES:
100 if not has_hls:
101 hls_formats = self._extract_m3u8_formats(
102 f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/playlist.m3u8',
103 id, 'mp4', fatal=False, note=f'Downloading {type} HLS manifest', errnote=False) or []
104 yield from self._set_format_type(hls_formats, type)
105
106 if not has_dash:
107 dash_formats = self._extract_mpd_formats(
108 f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/manifest_mpm4sav_mvlist.mpd',
109 id, fatal=False, note=f'Downloading {type} DASH manifest', errnote=False)
110 yield from self._set_format_type(dash_formats, type)
111
112
113class TubeTuGrazIE(TubeTuGrazBaseIE):
114 IE_DESC = 'tube.tugraz.at'
115
116 _VALID_URL = r'''(?x)
117 https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
118 (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
119 '''
120 _TESTS = [
121 {
122 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
123 'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
124 'info_dict': {
125 'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
126 'ext': 'mp4',
127 'title': '#6 (23.11.2017)',
128 'episode': '#6 (23.11.2017)',
129 'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
130 'creator': 'Safran C',
131 'duration': 3295818,
132 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
133 }
134 }, {
135 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
136 'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
137 'info_dict': {
138 'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
139 'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
140 'ext': 'mp4',
141 },
142 'expected_warnings': ['Extractor failed to obtain "title"'],
143 }
144 ]
145
146 def _real_extract(self, url):
147 video_id = self._match_id(url)
148 episode_data = self._download_json(
149 self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
150
151 episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
152 return self._extract_episode(episode_info)
153
154
155class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
156 _VALID_URL = r'''(?x)
157 https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
158 (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
159 '''
160 _TESTS = [{
161 'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
162 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
163 'info_dict': {
164 'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
165 'title': '[209351] Strassenwesen',
166 },
167 'playlist': [
168 {
169 'info_dict': {
170 'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
171 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
172 'ext': 'mp4',
173 'title': '#4 Detailprojekt',
174 'episode': '#4 Detailprojekt',
175 'series': '[209351] Strassenwesen',
176 'creator': 'Neuhold R',
177 'duration': 6127024,
178 }
179 },
180 {
181 'info_dict': {
182 'id': '87350498-799a-44d3-863f-d1518a98b114',
183 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
184 'ext': 'mp4',
185 'title': '#3 Generelles Projekt',
186 'episode': '#3 Generelles Projekt',
187 'series': '[209351] Strassenwesen',
188 'creator': 'Neuhold R',
189 'duration': 5374422,
190 }
191 },
192 {
193 'info_dict': {
194 'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
195 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
196 'ext': 'mp4',
197 'title': '#2 Vorprojekt',
198 'episode': '#2 Vorprojekt',
199 'series': '[209351] Strassenwesen',
200 'creator': 'Neuhold R',
201 'duration': 5566404,
202 }
203 },
204 {
205 'info_dict': {
206 'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
207 'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
208 'ext': 'mp4',
209 'title': '#1 Variantenstudium',
210 'episode': '#1 Variantenstudium',
211 'series': '[209351] Strassenwesen',
212 'creator': 'Neuhold R',
213 'duration': 5420200,
214 }
215 }
216 ],
217 'min_playlist_count': 4
218 }]
219
220 def _real_extract(self, url):
221 id = self._match_id(url)
222 episodes_data = self._download_json(self._API_EPISODE, id, query={'sid': id}, note='Downloading episode list')
223 series_data = self._download_json(
224 'https://tube.tugraz.at/series/series.json', id, fatal=False,
225 note='downloading series metadata', errnote='failed to download series metadata',
226 query={
227 'seriesId': id,
228 'count': 1,
229 'sort': 'TITLE'
230 })
231
232 return self.playlist_result(
233 map(self._extract_episode, episodes_data['search-results']['result']), id,
234 traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))