]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/videocampus_sachsen.py
[ie/crunchyroll] Fix stream extraction (#10005)
[yt-dlp.git] / yt_dlp / extractor / videocampus_sachsen.py
index fe9e061ae2e4842e78e5bfbf76f040a20113df5e..37bc7d7181620dee1cf1f8a1a7881f7d6664fdab 100644 (file)
@@ -1,11 +1,80 @@
+import functools
+import re
+
 from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import ExtractorError, OnDemandPagedList, urlencode_postdata
 
 
 class VideocampusSachsenIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://videocampus\.sachsen\.de/(?:
+    IE_NAME = 'ViMP'
+    _INSTANCES = (
+        'bergauf.tv',
+        'campus.demo.vimp.com',
+        'corporate.demo.vimp.com',
+        'dancehalldatabase.com',
+        'drehzahl.tv',
+        'educhannel.hs-gesundheit.de',
+        'emedia.ls.haw-hamburg.de',
+        'globale-evolution.net',
+        'hohu.tv',
+        'htvideos.hightechhigh.org',
+        'k210039.vimp.mivitec.net',
+        'media.cmslegal.com',
+        'media.hs-furtwangen.de',
+        'media.hwr-berlin.de',
+        'mediathek.dkfz.de',
+        'mediathek.htw-berlin.de',
+        'mediathek.polizei-bw.de',
+        'medien.hs-merseburg.de',
+        'mportal.europa-uni.de',
+        'pacific.demo.vimp.com',
+        'slctv.com',
+        'streaming.prairiesouth.ca',
+        'tube.isbonline.cn',
+        'univideo.uni-kassel.de',
+        'ursula2.genetics.emory.edu',
+        'ursulablicklevideoarchiv.com',
+        'v.agrarumweltpaedagogik.at',
+        'video.eplay-tv.de',
+        'video.fh-dortmund.de',
+        'video.hs-offenburg.de',
+        'video.hs-pforzheim.de',
+        'video.hspv.nrw.de',
+        'video.irtshdf.fr',
+        'video.pareygo.de',
+        'video.tu-freiberg.de',
+        'videocampus.sachsen.de',
+        'videoportal.uni-freiburg.de',
+        'videoportal.vm.uni-freiburg.de',
+        'videos.duoc.cl',
+        'videos.uni-paderborn.de',
+        'vimp-bemus.udk-berlin.de',
+        'vimp.aekwl.de',
+        'vimp.hs-mittweida.de',
+        'vimp.oth-regensburg.de',
+        'vimp.ph-heidelberg.de',
+        'vimp.sma-events.com',
+        'vimp.weka-fachmedien.de',
+        'webtv.univ-montp3.fr',
+        'www.b-tu.de/media',
+        'www.bergauf.tv',
+        'www.bigcitytv.de',
+        'www.cad-videos.de',
+        'www.drehzahl.tv',
+        'www.fh-bielefeld.de/medienportal',
+        'www.hohu.tv',
+        'www.orvovideo.com',
+        'www.rwe.tv',
+        'www.salzi.tv',
+        'www.wenglor-media.com',
+        'www2.univ-sba.dz',
+    )
+    _VALID_URL = r'''(?x)https?://(?P<host>%s)/(?:
         m/(?P<tmp_id>[0-9a-f]+)|
-        (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})
-    )'''
+        (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})|
+        media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{32}&?)
+    )''' % ('|'.join(map(re.escape, _INSTANCES)))
 
     _TESTS = [
         {
@@ -13,6 +82,8 @@ class VideocampusSachsenIE(InfoExtractor):
             'info_dict': {
                 'id': 'e6b9349905c1628631f175712250f2a1',
                 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+                'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+                'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
                 'ext': 'mp4',
             },
         },
@@ -21,6 +92,8 @@ class VideocampusSachsenIE(InfoExtractor):
             'info_dict': {
                 'id': 'fc99c527e4205b121cb7c74433469262',
                 'title': 'Was ist selbstgesteuertes Lernen?',
+                'description': 'md5:196aa3b0509a526db62f84679522a2f5',
+                'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
                 'display_id': 'Was-ist-selbstgesteuertes-Lernen',
                 'ext': 'mp4',
             },
@@ -30,66 +103,151 @@ class VideocampusSachsenIE(InfoExtractor):
             'info_dict': {
                 'id': '09d4ed029002eb1bdda610f1103dd54c',
                 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
+                'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
+                'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
                 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
                 'ext': 'mp4',
             },
         },
+        {
+            'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3',
+            'info_dict': {
+                'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4',
+                'id': '0183356e41af7bfb83d7667b20d9b6a3',
+                'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
+                'description': 'md5:508958bd93e0ca002ac731d94182a54f',
+                'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
+                'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
+                'ext': 'mp4',
+            }
+        },
+        {
+            'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c',
+            'info_dict': {
+                'id': 'c8816f1cc942c12b6cce57c835cffd7c',
+                'title': 'Preisverleihung »Produkte des Jahres 2022«',
+                'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
+                'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
+                'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
+                'ext': 'mp4',
+            },
+        },
+        {
+            'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
+            'info_dict': {
+                'id': 'fc99c527e4205b121cb7c74433469262',
+                'title': 'Was ist selbstgesteuertes Lernen?',
+                'ext': 'mp4',
+            },
+        },
     ]
 
     def _real_extract(self, url):
-        video_id, tmp_id, display_id = self._match_valid_url(url).group('id', 'tmp_id', 'display_id')
+        host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
+            'host', 'id', 'tmp_id', 'display_id', 'embed_id')
         webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
 
-        if not tmp_id:
-            video_id = self._html_search_regex(
-                r'src="https?://videocampus\.sachsen\.de/media/embed\?key=([0-9a-f]+)&',
+        if not video_id:
+            video_id = embed_id or self._html_search_regex(
+                rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?',
                 webpage, 'video_id')
 
-        title = self._html_search_regex(
-            (r'<h1>(?P<content>[^<]+)</h1>', *self._meta_regex('title')),
-            webpage, 'title', group='content', fatal=False)
+        if not (display_id or tmp_id):
+            # Title, description from embedded page's meta wouldn't be correct
+            title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
+            description = None
+            thumbnail = None
+        else:
+            title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
+            description = self._html_search_meta(
+                ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
+            thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
+
+        formats, subtitles = [], {}
+        try:
+            formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+                f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
+                video_id, 'mp4', m3u8_id='hls', fatal=True)
+        except ExtractorError as e:
+            if not isinstance(e.cause, HTTPError) or e.cause.status not in (404, 500):
+                raise
 
-        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
-            f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
-            video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
-        self._sort_formats(formats)
+        formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
 
         return {
             'id': video_id,
             'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
             'display_id': display_id,
             'formats': formats,
-            'subtitles': subtitles
+            'subtitles': subtitles,
         }
 
 
-class VideocampusSachsenEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://videocampus.sachsen.de/media/embed\?key=(?P<id>[0-9a-f]+)'
+class ViMPPlaylistIE(InfoExtractor):
+    IE_NAME = 'ViMP:Playlist'
+    _VALID_URL = r'''(?x)(?P<host>https?://(?:%s))/(?:
+        album/view/aid/(?P<album_id>[0-9]+)|
+        (?P<mode>category|channel)/(?P<name>[\w-]+)/(?P<id>[0-9]+)
+    )''' % '|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES))
 
-    _TESTS = [
-        {
-            'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
-            'info_dict': {
-                'id': 'fc99c527e4205b121cb7c74433469262',
-                'title': 'Was ist selbstgesteuertes Lernen?',
-                'ext': 'mp4',
-            },
-        }
-    ]
+    _TESTS = [{
+        'url': 'https://vimp.oth-regensburg.de/channel/Designtheorie-1-SoSe-2020/3',
+        'info_dict': {
+            'id': 'channel-3',
+            'title': 'Designtheorie 1 SoSe 2020 :: Channels :: ViMP OTH Regensburg',
+        },
+        'playlist_mincount': 9,
+    }, {
+        'url': 'https://www.fh-bielefeld.de/medienportal/album/view/aid/208',
+        'info_dict': {
+            'id': 'album-208',
+            'title': 'KG Praktikum ABT/MEC :: Playlists :: FH-Medienportal',
+        },
+        'playlist_mincount': 4,
+    }, {
+        'url': 'https://videocampus.sachsen.de/category/online-tutorials-onyx/91',
+        'info_dict': {
+            'id': 'category-91',
+            'title': 'Online-Seminare ONYX - BPS - Bildungseinrichtungen - VCS',
+        },
+        'playlist_mincount': 7,
+    }]
+    _PAGE_SIZE = 10
+
+    def _fetch_page(self, host, url_part, id, data, page):
+        webpage = self._download_webpage(
+            f'{host}/media/ajax/component/boxList/{url_part}', id,
+            query={'page': page, 'page_only': 1}, data=urlencode_postdata(data))
+        urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage)
+
+        for url in urls:
+            yield self.url_result(host + url, VideocampusSachsenIE)
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        host, album_id, mode, name, id = self._match_valid_url(url).group(
+            'host', 'album_id', 'mode', 'name', 'id')
 
-        webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<img[^>]*title="([^"<]+)"', webpage, 'title', fatal=False)
-        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
-            f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
-            video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
-        self._sort_formats(formats)
+        webpage = self._download_webpage(url, album_id or id, fatal=False) or ''
+        title = (self._html_search_meta('title', webpage, fatal=False)
+                 or self._html_extract_title(webpage))
 
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'subtitles': subtitles,
+        url_part = (f'aid/{album_id}' if album_id
+                    else f'category/{name}/category_id/{id}' if mode == 'category'
+                    else f'title/{name}/channel/{id}')
+
+        mode = mode or 'album'
+        data = {
+            'vars[mode]': mode,
+            f'vars[{mode}]': album_id or id,
+            'vars[context]': '4' if album_id else '1' if mode == 'category' else '3',
+            'vars[context_id]': album_id or id,
+            'vars[layout]': 'thumb',
+            'vars[per_page][thumb]': str(self._PAGE_SIZE),
         }
+
+        return self.playlist_result(
+            OnDemandPagedList(functools.partial(
+                self._fetch_page, host, url_part, album_id or id, data), self._PAGE_SIZE),
+            playlist_title=title, id=f'{mode}-{album_id or id}')