]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/videocampus_sachsen.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / videocampus_sachsen.py
CommitLineData
1db14612 1import functools
10fa2471
F
2import re
3
72e995f1 4from .common import InfoExtractor
3d2623a8 5from ..networking.exceptions import HTTPError
1db14612 6from ..utils import ExtractorError, OnDemandPagedList, urlencode_postdata
72e995f1
F
7
8
9class VideocampusSachsenIE(InfoExtractor):
74900105 10 IE_NAME = 'ViMP'
10fa2471 11 _INSTANCES = (
74900105 12 'bergauf.tv',
10fa2471
F
13 'campus.demo.vimp.com',
14 'corporate.demo.vimp.com',
15 'dancehalldatabase.com',
74900105 16 'drehzahl.tv',
10fa2471
F
17 'educhannel.hs-gesundheit.de',
18 'emedia.ls.haw-hamburg.de',
19 'globale-evolution.net',
74900105
F
20 'hohu.tv',
21 'htvideos.hightechhigh.org',
10fa2471
F
22 'k210039.vimp.mivitec.net',
23 'media.cmslegal.com',
24 'media.hs-furtwangen.de',
25 'media.hwr-berlin.de',
26 'mediathek.dkfz.de',
27 'mediathek.htw-berlin.de',
28 'mediathek.polizei-bw.de',
29 'medien.hs-merseburg.de',
30 'mportal.europa-uni.de',
31 'pacific.demo.vimp.com',
32 'slctv.com',
74900105 33 'streaming.prairiesouth.ca',
10fa2471
F
34 'tube.isbonline.cn',
35 'univideo.uni-kassel.de',
36 'ursula2.genetics.emory.edu',
37 'ursulablicklevideoarchiv.com',
38 'v.agrarumweltpaedagogik.at',
39 'video.eplay-tv.de',
40 'video.fh-dortmund.de',
41 'video.hs-offenburg.de',
42 'video.hs-pforzheim.de',
43 'video.hspv.nrw.de',
44 'video.irtshdf.fr',
45 'video.pareygo.de',
46 'video.tu-freiberg.de',
47 'videocampus.sachsen.de',
48 'videoportal.uni-freiburg.de',
49 'videoportal.vm.uni-freiburg.de',
50 'videos.duoc.cl',
51 'videos.uni-paderborn.de',
52 'vimp-bemus.udk-berlin.de',
53 'vimp.aekwl.de',
54 'vimp.hs-mittweida.de',
55 'vimp.oth-regensburg.de',
56 'vimp.ph-heidelberg.de',
57 'vimp.sma-events.com',
58 'vimp.weka-fachmedien.de',
59 'webtv.univ-montp3.fr',
60 'www.b-tu.de/media',
74900105 61 'www.bergauf.tv',
10fa2471
F
62 'www.bigcitytv.de',
63 'www.cad-videos.de',
74900105 64 'www.drehzahl.tv',
10fa2471 65 'www.fh-bielefeld.de/medienportal',
74900105 66 'www.hohu.tv',
10fa2471
F
67 'www.orvovideo.com',
68 'www.rwe.tv',
74900105 69 'www.salzi.tv',
10fa2471
F
70 'www.wenglor-media.com',
71 'www2.univ-sba.dz',
72 )
add96eb9 73 _VALID_URL = r'''(?x)https?://(?P<host>{})/(?:
72e995f1 74 m/(?P<tmp_id>[0-9a-f]+)|
add96eb9 75 (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{{32}})|
76 media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{{32}}&?)
77 )'''.format('|'.join(map(re.escape, _INSTANCES)))
72e995f1
F
78
79 _TESTS = [
80 {
81 'url': 'https://videocampus.sachsen.de/m/e0d6c8ce6e394c188f1342f1ab7c50ed6fc4490b808699801def5cb2e46d76ca7367f622a9f516c542ffb805b24d6b643bd7c81f385acaac4c59081b87a2767b',
82 'info_dict': {
83 'id': 'e6b9349905c1628631f175712250f2a1',
84 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
10fa2471 85 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
74900105 86 'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
72e995f1
F
87 'ext': 'mp4',
88 },
89 },
90 {
91 'url': 'https://videocampus.sachsen.de/video/Was-ist-selbstgesteuertes-Lernen/fc99c527e4205b121cb7c74433469262',
92 'info_dict': {
93 'id': 'fc99c527e4205b121cb7c74433469262',
94 'title': 'Was ist selbstgesteuertes Lernen?',
10fa2471 95 'description': 'md5:196aa3b0509a526db62f84679522a2f5',
74900105 96 'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
72e995f1
F
97 'display_id': 'Was-ist-selbstgesteuertes-Lernen',
98 'ext': 'mp4',
99 },
100 },
101 {
102 'url': 'https://videocampus.sachsen.de/category/video/Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht/09d4ed029002eb1bdda610f1103dd54c/100',
103 'info_dict': {
104 'id': '09d4ed029002eb1bdda610f1103dd54c',
105 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
10fa2471 106 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
74900105 107 'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
72e995f1
F
108 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
109 'ext': 'mp4',
110 },
111 },
10fa2471
F
112 {
113 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3',
114 'info_dict': {
115 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4',
116 'id': '0183356e41af7bfb83d7667b20d9b6a3',
117 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
118 'description': 'md5:508958bd93e0ca002ac731d94182a54f',
74900105 119 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
10fa2471
F
120 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
121 'ext': 'mp4',
add96eb9 122 },
10fa2471
F
123 },
124 {
125 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c',
126 'info_dict': {
127 'id': 'c8816f1cc942c12b6cce57c835cffd7c',
128 'title': 'Preisverleihung »Produkte des Jahres 2022«',
129 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
74900105 130 'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
10fa2471
F
131 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
132 'ext': 'mp4',
133 },
134 },
72e995f1
F
135 {
136 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
137 'info_dict': {
138 'id': 'fc99c527e4205b121cb7c74433469262',
139 'title': 'Was ist selbstgesteuertes Lernen?',
140 'ext': 'mp4',
141 },
74900105 142 },
72e995f1
F
143 ]
144
145 def _real_extract(self, url):
10fa2471
F
146 host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
147 'host', 'id', 'tmp_id', 'display_id', 'embed_id')
148 webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
149
150 if not video_id:
151 video_id = embed_id or self._html_search_regex(
152 rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?',
153 webpage, 'video_id')
72e995f1 154
10fa2471
F
155 if not (display_id or tmp_id):
156 # Title, description from embedded page's meta wouldn't be correct
74900105 157 title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
10fa2471 158 description = None
74900105 159 thumbnail = None
10fa2471
F
160 else:
161 title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
162 description = self._html_search_meta(
74900105
F
163 ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
164 thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
10fa2471
F
165
166 formats, subtitles = [], {}
167 try:
168 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
169 f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
170 video_id, 'mp4', m3u8_id='hls', fatal=True)
171 except ExtractorError as e:
3d2623a8 172 if not isinstance(e.cause, HTTPError) or e.cause.status not in (404, 500):
10fa2471
F
173 raise
174
175 formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
72e995f1
F
176
177 return {
178 'id': video_id,
179 'title': title,
10fa2471 180 'description': description,
74900105 181 'thumbnail': thumbnail,
10fa2471 182 'display_id': display_id,
72e995f1 183 'formats': formats,
74900105 184 'subtitles': subtitles,
72e995f1 185 }
1db14612
F
186
187
188class ViMPPlaylistIE(InfoExtractor):
189 IE_NAME = 'ViMP:Playlist'
add96eb9 190 _VALID_URL = r'''(?x)(?P<host>https?://(?:{}))/(?:
1db14612
F
191 album/view/aid/(?P<album_id>[0-9]+)|
192 (?P<mode>category|channel)/(?P<name>[\w-]+)/(?P<id>[0-9]+)
add96eb9 193 )'''.format('|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES)))
1db14612
F
194
195 _TESTS = [{
196 'url': 'https://vimp.oth-regensburg.de/channel/Designtheorie-1-SoSe-2020/3',
197 'info_dict': {
198 'id': 'channel-3',
199 'title': 'Designtheorie 1 SoSe 2020 :: Channels :: ViMP OTH Regensburg',
200 },
201 'playlist_mincount': 9,
202 }, {
203 'url': 'https://www.fh-bielefeld.de/medienportal/album/view/aid/208',
204 'info_dict': {
205 'id': 'album-208',
206 'title': 'KG Praktikum ABT/MEC :: Playlists :: FH-Medienportal',
207 },
208 'playlist_mincount': 4,
209 }, {
210 'url': 'https://videocampus.sachsen.de/category/online-tutorials-onyx/91',
211 'info_dict': {
212 'id': 'category-91',
213 'title': 'Online-Seminare ONYX - BPS - Bildungseinrichtungen - VCS',
214 },
215 'playlist_mincount': 7,
216 }]
217 _PAGE_SIZE = 10
218
add96eb9 219 def _fetch_page(self, host, url_part, playlist_id, data, page):
1db14612 220 webpage = self._download_webpage(
add96eb9 221 f'{host}/media/ajax/component/boxList/{url_part}', playlist_id,
1db14612
F
222 query={'page': page, 'page_only': 1}, data=urlencode_postdata(data))
223 urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage)
224
225 for url in urls:
226 yield self.url_result(host + url, VideocampusSachsenIE)
227
228 def _real_extract(self, url):
add96eb9 229 host, album_id, mode, name, playlist_id = self._match_valid_url(url).group(
1db14612
F
230 'host', 'album_id', 'mode', 'name', 'id')
231
add96eb9 232 webpage = self._download_webpage(url, album_id or playlist_id, fatal=False) or ''
1db14612
F
233 title = (self._html_search_meta('title', webpage, fatal=False)
234 or self._html_extract_title(webpage))
235
236 url_part = (f'aid/{album_id}' if album_id
add96eb9 237 else f'category/{name}/category_id/{playlist_id}' if mode == 'category'
238 else f'title/{name}/channel/{playlist_id}')
1db14612
F
239
240 mode = mode or 'album'
241 data = {
242 'vars[mode]': mode,
add96eb9 243 f'vars[{mode}]': album_id or playlist_id,
1db14612 244 'vars[context]': '4' if album_id else '1' if mode == 'category' else '3',
add96eb9 245 'vars[context_id]': album_id or playlist_id,
1db14612
F
246 'vars[layout]': 'thumb',
247 'vars[per_page][thumb]': str(self._PAGE_SIZE),
248 }
249
250 return self.playlist_result(
251 OnDemandPagedList(functools.partial(
add96eb9 252 self._fetch_page, host, url_part, album_id or playlist_id, data), self._PAGE_SIZE),
253 playlist_title=title, id=f'{mode}-{album_id or playlist_id}')