]> jfr.im git - yt-dlp.git/commitdiff
[mediasite] Extract slides (#343)
authorfelix <redacted>
Sun, 23 May 2021 16:34:53 +0000 (18:34 +0200)
committerpukkandan <redacted>
Sun, 13 Jun 2021 15:06:40 +0000 (20:36 +0530)
Fixes:
https://github.com/ytdl-org/youtube-dl/issues/4974#issue-58006762
https://github.com/ytdl-org/youtube-dl/issues/4540#issuecomment-69574231
https://github.com/ytdl-org/youtube-dl/pull/11185#issuecomment-335554239

Authored by: fstirlitz

yt_dlp/extractor/mediasite.py

index 5d083a1cd1c9fc76c05f6824080d11dd23d8f904..c62233ab7c7c7ff157f81efcaf9f3b32c8102f93 100644 (file)
@@ -122,6 +122,52 @@ def _extract_urls(webpage):
                 r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
                 webpage)]
 
+    def __extract_slides(self, *, stream_id, snum, Stream, duration, images):
+        slide_base_url = Stream['SlideBaseUrl']
+
+        fname_template = Stream['SlideImageFileNameTemplate']
+        if fname_template != 'slide_{0:D4}.jpg':
+            self.report_warning('Unusual slide file name template; report a bug if slide downloading fails')
+        fname_template = re.sub(r'\{0:D([0-9]+)\}', r'{0:0\1}', fname_template)
+
+        fragments = []
+        for i, slide in enumerate(Stream['Slides']):
+            if i == 0:
+                if slide['Time'] > 0:
+                    default_slide = images.get('DefaultSlide')
+                    if default_slide is None:
+                        default_slide = images.get('DefaultStreamImage')
+                    if default_slide is not None:
+                        default_slide = default_slide['ImageFilename']
+                    if default_slide is not None:
+                        fragments.append({
+                            'path': default_slide,
+                            'duration': slide['Time'] / 1000,
+                        })
+
+            next_time = try_get(None, [
+                lambda _: Stream['Slides'][i + 1]['Time'],
+                lambda _: duration,
+                lambda _: slide['Time'],
+            ], expected_type=(int, float))
+
+            fragments.append({
+                'path': fname_template.format(slide.get('Number', i + 1)),
+                'duration': (next_time - slide['Time']) / 1000
+            })
+
+        return {
+            'format_id': '%s-%u.slides' % (stream_id, snum),
+            'ext': 'mhtml',
+            'url': slide_base_url,
+            'protocol': 'mhtml',
+            'acodec': 'none',
+            'vcodec': 'none',
+            'format_note': 'Slides',
+            'fragments': fragments,
+            'fragment_base_url': slide_base_url,
+        }
+
     def _real_extract(self, url):
         url, data = unsmuggle_url(url, {})
         mobj = re.match(self._VALID_URL, url)
@@ -198,10 +244,15 @@ def _real_extract(self, url):
                         'ext': mimetype2ext(VideoUrl.get('MimeType')),
                     })
 
-            # TODO: if Stream['HasSlideContent']:
-            # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
-            # from Stream['Slides']
-            # this will require writing a custom downloader...
+            if Stream.get('HasSlideContent', False):
+                images = player_options['PlayerLayoutOptions']['Images']
+                stream_formats.append(self.__extract_slides(
+                    stream_id=stream_id,
+                    snum=snum,
+                    Stream=Stream,
+                    duration=presentation.get('Duration'),
+                    images=images,
+                ))
 
             # disprefer 'secondary' streams
             if stream_type != 0: