]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/videocampus_sachsen.py
[extractor/youtube] Bring back `_extract_chapters_from_description`
[yt-dlp.git] / yt_dlp / extractor / videocampus_sachsen.py
1 import re
2
3 from .common import InfoExtractor
4 from ..compat import compat_HTTPError
5 from ..utils import ExtractorError
6
7
8 class VideocampusSachsenIE(InfoExtractor):
9 IE_NAME = 'Vimp'
10 _INSTANCES = (
11 'campus.demo.vimp.com',
12 'corporate.demo.vimp.com',
13 'dancehalldatabase.com',
14 'educhannel.hs-gesundheit.de',
15 'emedia.ls.haw-hamburg.de',
16 'globale-evolution.net',
17 'k210039.vimp.mivitec.net',
18 'media.cmslegal.com',
19 'media.hs-furtwangen.de',
20 'media.hwr-berlin.de',
21 'mediathek.dkfz.de',
22 'mediathek.htw-berlin.de',
23 'mediathek.polizei-bw.de',
24 'medien.hs-merseburg.de',
25 'mportal.europa-uni.de',
26 'pacific.demo.vimp.com',
27 'slctv.com',
28 'tube.isbonline.cn',
29 'univideo.uni-kassel.de',
30 'ursula2.genetics.emory.edu',
31 'ursulablicklevideoarchiv.com',
32 'v.agrarumweltpaedagogik.at',
33 'video.eplay-tv.de',
34 'video.fh-dortmund.de',
35 'video.hs-offenburg.de',
36 'video.hs-pforzheim.de',
37 'video.hspv.nrw.de',
38 'video.irtshdf.fr',
39 'video.pareygo.de',
40 'video.tu-freiberg.de',
41 'videocampus.sachsen.de',
42 'videoportal.uni-freiburg.de',
43 'videoportal.vm.uni-freiburg.de',
44 'videos.duoc.cl',
45 'videos.uni-paderborn.de',
46 'vimp-bemus.udk-berlin.de',
47 'vimp.aekwl.de',
48 'vimp.hs-mittweida.de',
49 'vimp.oth-regensburg.de',
50 'vimp.ph-heidelberg.de',
51 'vimp.sma-events.com',
52 'vimp.weka-fachmedien.de',
53 'webtv.univ-montp3.fr',
54 'www.b-tu.de/media',
55 'www.bigcitytv.de',
56 'www.cad-videos.de',
57 'www.fh-bielefeld.de/medienportal',
58 'www.orvovideo.com',
59 'www.rwe.tv',
60 'www.wenglor-media.com',
61 'www2.univ-sba.dz',
62 )
63 _VALID_URL = r'''(?x)https?://(?P<host>%s)/(?:
64 m/(?P<tmp_id>[0-9a-f]+)|
65 (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})|
66 media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{32}&?)
67 )''' % ('|'.join(map(re.escape, _INSTANCES)))
68
69 _TESTS = [
70 {
71 'url': 'https://videocampus.sachsen.de/m/e0d6c8ce6e394c188f1342f1ab7c50ed6fc4490b808699801def5cb2e46d76ca7367f622a9f516c542ffb805b24d6b643bd7c81f385acaac4c59081b87a2767b',
72 'info_dict': {
73 'id': 'e6b9349905c1628631f175712250f2a1',
74 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
75 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
76 'ext': 'mp4',
77 },
78 },
79 {
80 'url': 'https://videocampus.sachsen.de/video/Was-ist-selbstgesteuertes-Lernen/fc99c527e4205b121cb7c74433469262',
81 'info_dict': {
82 'id': 'fc99c527e4205b121cb7c74433469262',
83 'title': 'Was ist selbstgesteuertes Lernen?',
84 'description': 'md5:196aa3b0509a526db62f84679522a2f5',
85 'display_id': 'Was-ist-selbstgesteuertes-Lernen',
86 'ext': 'mp4',
87 },
88 },
89 {
90 'url': 'https://videocampus.sachsen.de/category/video/Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht/09d4ed029002eb1bdda610f1103dd54c/100',
91 'info_dict': {
92 'id': '09d4ed029002eb1bdda610f1103dd54c',
93 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
94 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
95 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
96 'ext': 'mp4',
97 },
98 },
99 {
100 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3',
101 'info_dict': {
102 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4',
103 'id': '0183356e41af7bfb83d7667b20d9b6a3',
104 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
105 'description': 'md5:508958bd93e0ca002ac731d94182a54f',
106 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
107 'ext': 'mp4',
108 }
109 },
110 {
111 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c',
112 'info_dict': {
113 'id': 'c8816f1cc942c12b6cce57c835cffd7c',
114 'title': 'Preisverleihung »Produkte des Jahres 2022«',
115 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
116 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
117 'ext': 'mp4',
118 },
119 },
120 {
121 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
122 'info_dict': {
123 'id': 'fc99c527e4205b121cb7c74433469262',
124 'title': 'Was ist selbstgesteuertes Lernen?',
125 'ext': 'mp4',
126 },
127 }
128 ]
129
130 def _real_extract(self, url):
131 host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
132 'host', 'id', 'tmp_id', 'display_id', 'embed_id')
133 webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
134
135 if not video_id:
136 video_id = embed_id or self._html_search_regex(
137 rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?',
138 webpage, 'video_id')
139
140 if not (display_id or tmp_id):
141 # Title, description from embedded page's meta wouldn't be correct
142 title = self._html_search_regex(r'<img[^>]* title="([^"<]+)"', webpage, 'title', fatal=False)
143 description = None
144 else:
145 title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
146 description = self._html_search_meta(
147 ('og:description', 'twitter:description', 'description'), webpage, default=None)
148
149 formats, subtitles = [], {}
150 try:
151 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
152 f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
153 video_id, 'mp4', m3u8_id='hls', fatal=True)
154 except ExtractorError as e:
155 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (404, 500):
156 raise
157
158 formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
159 self._sort_formats(formats)
160
161 return {
162 'id': video_id,
163 'title': title,
164 'description': description,
165 'display_id': display_id,
166 'formats': formats,
167 'subtitles': subtitles
168 }