yt_dlp/extractor/tubetugraz.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     float_or_none,
   4     parse_resolution,
   5     traverse_obj,
   6     urlencode_postdata,
   7     variadic,
   8 )
   9
  10
  11 class TubeTuGrazBaseIE(InfoExtractor):
  12     _NETRC_MACHINE = 'tubetugraz'
  13
  14     _API_EPISODE = 'https://tube.tugraz.at/search/episode.json'
  15     _FORMAT_TYPES = ('presentation', 'presenter')
  16
  17     def _perform_login(self, username, password):
  18         urlh = self._request_webpage(
  19             'https://tube.tugraz.at/Shibboleth.sso/Login?target=/paella/ui/index.html',
  20             None, fatal=False, note='downloading login page', errnote='unable to fetch login page')
  21         if not urlh:
  22             return
  23
  24         urlh = self._request_webpage(
  25             urlh.geturl(), None, fatal=False, headers={'referer': urlh.geturl()},
  26             note='logging in', errnote='unable to log in', data=urlencode_postdata({
  27                 'lang': 'de',
  28                 '_eventId_proceed': '',
  29                 'j_username': username,
  30                 'j_password': password
  31             }))
  32
  33         if urlh and urlh.geturl() != 'https://tube.tugraz.at/paella/ui/index.html':
  34             self.report_warning('unable to login: incorrect password')
  35
  36     def _extract_episode(self, episode_info):
  37         id = episode_info.get('id')
  38         formats = list(self._extract_formats(
  39             traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id))
  40
  41         title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
  42         series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
  43         creator = ', '.join(variadic(traverse_obj(
  44             episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default='')))
  45         return {
  46             'id': id,
  47             'title': title,
  48             'creator': creator or None,
  49             'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'),
  50             'series': series_title,
  51             'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'),
  52             'episode': series_title and title,
  53             'formats': formats
  54         }
  55
  56     def _set_format_type(self, formats, type):
  57         for f in formats:
  58             f['format_note'] = type
  59             if not type.startswith(self._FORMAT_TYPES[0]):
  60                 f['preference'] = -2
  61         return formats
  62
  63     def _extract_formats(self, format_list, id):
  64         has_hls, has_dash = False, False
  65
  66         for format_info in format_list or []:
  67             url = traverse_obj(format_info, ('tags', 'url'), 'url')
  68             if url is None:
  69                 continue
  70
  71             type = format_info.get('type') or 'unknown'
  72             transport = (format_info.get('transport') or 'https').lower()
  73
  74             if transport == 'https':
  75                 formats = [{
  76                     'url': url,
  77                     'abr': float_or_none(traverse_obj(format_info, ('audio', 'bitrate')), 1000),
  78                     'vbr': float_or_none(traverse_obj(format_info, ('video', 'bitrate')), 1000),
  79                     'fps': traverse_obj(format_info, ('video', 'framerate')),
  80                     **parse_resolution(traverse_obj(format_info, ('video', 'resolution'))),
  81                 }]
  82             elif transport == 'hls':
  83                 has_hls, formats = True, self._extract_m3u8_formats(
  84                     url, id, 'mp4', fatal=False, note=f'downloading {type} HLS manifest')
  85             elif transport == 'dash':
  86                 has_dash, formats = True, self._extract_mpd_formats(
  87                     url, id, fatal=False, note=f'downloading {type} DASH manifest')
  88             else:
  89                 # RTMP, HDS, SMOOTH, and unknown formats
  90                 # - RTMP url fails on every tested entry until now
  91                 # - HDS url 404's on every tested entry until now
  92                 # - SMOOTH url 404's on every tested entry until now
  93                 continue
  94
  95             yield from self._set_format_type(formats, type)
  96
  97         # TODO: Add test for these
  98         for type in self._FORMAT_TYPES:
  99             if not has_hls:
 100                 hls_formats = self._extract_m3u8_formats(
 101                     f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/playlist.m3u8',
 102                     id, 'mp4', fatal=False, note=f'Downloading {type} HLS manifest', errnote=False) or []
 103                 yield from self._set_format_type(hls_formats, type)
 104
 105             if not has_dash:
 106                 dash_formats = self._extract_mpd_formats(
 107                     f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/manifest_mpm4sav_mvlist.mpd',
 108                     id, fatal=False, note=f'Downloading {type} DASH manifest', errnote=False)
 109                 yield from self._set_format_type(dash_formats, type)
 110
 111
 112 class TubeTuGrazIE(TubeTuGrazBaseIE):
 113     IE_DESC = 'tube.tugraz.at'
 114
 115     _VALID_URL = r'''(?x)
 116         https?://tube\.tugraz\.at/paella/ui/watch.html\?id=
 117         (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
 118     '''
 119     _TESTS = [
 120         {
 121             'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=f2634392-e40e-4ac7-9ddc-47764aa23d40',
 122             'md5': 'a23a3d5c9aaca2b84932fdba66e17145',
 123             'info_dict': {
 124                 'id': 'f2634392-e40e-4ac7-9ddc-47764aa23d40',
 125                 'ext': 'mp4',
 126                 'title': '#6 (23.11.2017)',
 127                 'episode': '#6 (23.11.2017)',
 128                 'series': '[INB03001UF] Einführung in die strukturierte Programmierung',
 129                 'creator': 'Safran C',
 130                 'duration': 3295818,
 131                 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34',
 132             }
 133         }, {
 134             'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238',
 135             'md5': 'de0d854a56bf7318d2b693fe1adb89a5',
 136             'info_dict': {
 137                 'id': '2df6d787-e56a-428d-8ef4-d57f07eef238',
 138                 'title': 'TubeTuGraz video #2df6d787-e56a-428d-8ef4-d57f07eef238',
 139                 'ext': 'mp4',
 140             },
 141             'expected_warnings': ['Extractor failed to obtain "title"'],
 142         }
 143     ]
 144
 145     def _real_extract(self, url):
 146         video_id = self._match_id(url)
 147         episode_data = self._download_json(
 148             self._API_EPISODE, video_id, query={'id': video_id, 'limit': 1}, note='Downloading episode metadata')
 149
 150         episode_info = traverse_obj(episode_data, ('search-results', 'result'), default={'id': video_id})
 151         return self._extract_episode(episode_info)
 152
 153
 154 class TubeTuGrazSeriesIE(TubeTuGrazBaseIE):
 155     _VALID_URL = r'''(?x)
 156         https?://tube\.tugraz\.at/paella/ui/browse\.html\?series=
 157         (?P<id>[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12})
 158     '''
 159     _TESTS = [{
 160         'url': 'https://tube.tugraz.at/paella/ui/browse.html?series=0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
 161         'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
 162         'info_dict': {
 163             'id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
 164             'title': '[209351] Strassenwesen',
 165         },
 166         'playlist': [
 167             {
 168                 'info_dict': {
 169                     'id': 'ee17ce5d-34e2-48b7-a76a-fed148614e11',
 170                     'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
 171                     'ext': 'mp4',
 172                     'title': '#4 Detailprojekt',
 173                     'episode': '#4 Detailprojekt',
 174                     'series': '[209351] Strassenwesen',
 175                     'creator': 'Neuhold R',
 176                     'duration': 6127024,
 177                 }
 178             },
 179             {
 180                 'info_dict': {
 181                     'id': '87350498-799a-44d3-863f-d1518a98b114',
 182                     'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
 183                     'ext': 'mp4',
 184                     'title': '#3 Generelles Projekt',
 185                     'episode': '#3 Generelles Projekt',
 186                     'series': '[209351] Strassenwesen',
 187                     'creator': 'Neuhold R',
 188                     'duration': 5374422,
 189                 }
 190             },
 191             {
 192                 'info_dict': {
 193                     'id': '778599ea-489e-4189-9e05-3b4888e19bcd',
 194                     'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
 195                     'ext': 'mp4',
 196                     'title': '#2 Vorprojekt',
 197                     'episode': '#2 Vorprojekt',
 198                     'series': '[209351] Strassenwesen',
 199                     'creator': 'Neuhold R',
 200                     'duration': 5566404,
 201                 }
 202             },
 203             {
 204                 'info_dict': {
 205                     'id': '75e4c71c-d99d-4e56-b0e6-4f2bcdf11f29',
 206                     'series_id': '0e6351b7-c372-491e-8a49-2c9b7e21c5a6',
 207                     'ext': 'mp4',
 208                     'title': '#1 Variantenstudium',
 209                     'episode': '#1 Variantenstudium',
 210                     'series': '[209351] Strassenwesen',
 211                     'creator': 'Neuhold R',
 212                     'duration': 5420200,
 213                 }
 214             }
 215         ],
 216         'min_playlist_count': 4
 217     }]
 218
 219     def _real_extract(self, url):
 220         id = self._match_id(url)
 221         episodes_data = self._download_json(self._API_EPISODE, id, query={'sid': id}, note='Downloading episode list')
 222         series_data = self._download_json(
 223             'https://tube.tugraz.at/series/series.json', id, fatal=False,
 224             note='downloading series metadata', errnote='failed to download series metadata',
 225             query={
 226                 'seriesId': id,
 227                 'count': 1,
 228                 'sort': 'TITLE'
 229             })
 230
 231         return self.playlist_result(
 232             map(self._extract_episode, episodes_data['search-results']['result']), id,
 233             traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value')))