]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/opencast.py
[youtube] Prefer UTC upload date for videos (#2223)
[yt-dlp.git] / yt_dlp / extractor / opencast.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 determine_ext,
9 ExtractorError,
10 int_or_none,
11 parse_iso8601,
12 traverse_obj,
13 variadic,
14 )
15
16
17 class OpencastBaseIE(InfoExtractor):
18 _INSTANCES_RE = r'''(?:
19 opencast\.informatik\.kit\.edu|
20 electures\.uni-muenster\.de|
21 oc-presentation\.ltcc\.tuwien\.ac\.at|
22 medien\.ph-noe\.ac\.at|
23 oc-video\.ruhr-uni-bochum\.de|
24 oc-video1\.ruhr-uni-bochum\.de|
25 opencast\.informatik\.uni-goettingen\.de|
26 heicast\.uni-heidelberg\.de|
27 opencast\.hawk\.de:8080|
28 opencast\.hs-osnabrueck\.de|
29 video[0-9]+\.virtuos\.uni-osnabrueck\.de|
30 opencast\.uni-koeln\.de|
31 media\.opencast\.hochschule-rhein-waal\.de|
32 matterhorn\.dce\.harvard\.edu|
33 hs-harz\.opencast\.uni-halle\.de|
34 videocampus\.urz\.uni-leipzig\.de|
35 media\.uct\.ac\.za|
36 vid\.igb\.illinois\.edu|
37 cursosabertos\.c3sl\.ufpr\.br|
38 mcmedia\.missioncollege\.org|
39 clases\.odon\.edu\.uy
40 )'''
41 _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
42
43 def _call_api(self, host, video_id, **kwargs):
44 return self._download_json(self._API_BASE % (host, video_id), video_id, **kwargs)
45
46 def _parse_mediapackage(self, video):
47 video_id = video.get('id')
48 if video_id is None:
49 raise ExtractorError('Video id was not found')
50
51 formats = []
52 for track in variadic(traverse_obj(video, ('media', 'track')) or []):
53 href = track.get('url')
54 if href is None:
55 continue
56 ext = determine_ext(href, None)
57
58 transport = track.get('transport')
59
60 if transport == 'DASH' or ext == 'mpd':
61 formats.extend(self._extract_mpd_formats_and_subtitles(href, video_id, mpd_id='dash', fatal=False))
62 elif transport == 'HLS' or ext == 'm3u8':
63 formats.extend(self._extract_m3u8_formats_and_subtitles(
64 href, video_id, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False))
65 elif transport == 'HDS' or ext == 'f4m':
66 formats.extend(self._extract_f4m_formats(href, video_id, f4m_id='hds', fatal=False))
67 elif transport == 'SMOOTH':
68 formats.extend(self._extract_ism_formats(href, video_id, ism_id='smooth', fatal=False))
69 elif ext == 'smil':
70 formats.extend(self._extract_smil_formats(href, video_id, fatal=False))
71 else:
72 track_obj = {
73 'url': href,
74 'ext': ext,
75 'format_note': track.get('transport'),
76 'resolution': traverse_obj(track, ('video', 'resolution')),
77 'fps': int_or_none(traverse_obj(track, ('video', 'framerate'))),
78 'vbr': int_or_none(traverse_obj(track, ('video', 'bitrate')), scale=1000),
79 'vcodec': traverse_obj(track, ('video', 'encoder', 'type')) if track.get('video') else 'none',
80 'abr': int_or_none(traverse_obj(track, ('audio', 'bitrate')), scale=1000),
81 'asr': int_or_none(traverse_obj(track, ('audio', 'samplingrate'))),
82 'acodec': traverse_obj(track, ('audio', 'encoder', 'type')) if track.get('audio') else 'none',
83 }
84
85 if transport == 'RTMP':
86 m_obj = re.search(r'(?:rtmp://[^/]+/(?P<app>[^/]+))/(?P<ext>.+):(?P<playpath>.+)', href)
87 if not m_obj:
88 continue
89 track_obj.update({
90 'app': m_obj.group('app'),
91 'ext': m_obj.group('ext'),
92 'play_path': m_obj.group('ext') + ':' + m_obj.group('playpath'),
93 'rtmp_live': True,
94 'preference': -2,
95 })
96 formats.append(track_obj)
97
98 self._sort_formats(formats)
99
100 return {
101 'id': video_id,
102 'formats': formats,
103 'title': video.get('title'),
104 'series': video.get('seriestitle'),
105 'season_id': video.get('series'),
106 'creator': traverse_obj(video, ('creators', 'creator')),
107 'timestamp': parse_iso8601(video.get('start')),
108 'thumbnail': traverse_obj(video, ('attachments', 'attachment', ..., 'url'), get_all=False),
109 }
110
111
112 class OpencastIE(OpencastBaseIE):
113 _VALID_URL = r'''(?x)
114 https?://(?P<host>%s)/paella/ui/watch.html\?.*?
115 id=(?P<id>%s)
116 ''' % (OpencastBaseIE._INSTANCES_RE, OpencastBaseIE._UUID_RE)
117
118 _API_BASE = 'https://%s/search/episode.json?id=%s'
119
120 _TESTS = [
121 {
122 'url': 'https://oc-video1.ruhr-uni-bochum.de/paella/ui/watch.html?id=ed063cd5-72c8-46b5-a60a-569243edcea8',
123 'md5': '554c8e99a90f7be7e874619fcf2a3bc9',
124 'info_dict': {
125 'id': 'ed063cd5-72c8-46b5-a60a-569243edcea8',
126 'ext': 'mp4',
127 'title': '11 - Kryptographie - 24.11.2015',
128 'thumbnail': r're:^https?://.*\.jpg$',
129 'timestamp': 1606208400,
130 'upload_date': '20201124',
131 },
132 }
133 ]
134
135 def _real_extract(self, url):
136 host, video_id = self._match_valid_url(url).group('host', 'id')
137 return self._parse_mediapackage(
138 self._call_api(host, video_id)['search-results']['result']['mediapackage'])
139
140
141 class OpencastPlaylistIE(OpencastBaseIE):
142 _VALID_URL = r'''(?x)
143 https?://(?P<host>%s)/engage/ui/index.html\?.*?
144 epFrom=(?P<id>%s)
145 ''' % (OpencastBaseIE._INSTANCES_RE, OpencastBaseIE._UUID_RE)
146
147 _API_BASE = 'https://%s/search/episode.json?sid=%s'
148
149 _TESTS = [
150 {
151 'url': 'https://oc-video1.ruhr-uni-bochum.de/engage/ui/index.html?epFrom=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
152 'info_dict': {
153 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
154 'title': 'Kryptographie - WiSe 15/16',
155 },
156 'playlist_mincount': 28,
157 },
158 {
159 'url': 'https://oc-video.ruhr-uni-bochum.de/engage/ui/index.html?e=1&p=1&epFrom=b1a54262-3684-403f-9731-8e77c3766f9a',
160 'info_dict': {
161 'id': 'b1a54262-3684-403f-9731-8e77c3766f9a',
162 'title': 'inSTUDIES-Social movements and prefigurative politics in a global perspective',
163 },
164 'playlist_mincount': 6,
165 },
166 ]
167
168 def _real_extract(self, url):
169 host, video_id = self._match_valid_url(url).group('host', 'id')
170
171 entries = [
172 self._parse_mediapackage(episode['mediapackage'])
173 for episode in variadic(self._call_api(host, video_id)['search-results']['result'])
174 if episode.get('mediapackage')
175 ]
176
177 return self.playlist_result(entries, video_id, traverse_obj(entries, (0, 'series')))