]> jfr.im git - yt-dlp.git/blobdiff - youtube_dlc/extractor/wdr.py
Update to ytdl-2021.01.03
[yt-dlp.git] / youtube_dlc / extractor / wdr.py
index 44d4a13cac006ac650c6a07b1a604729c25c5205..5cb5924f8041ac13fc97ee8f9f736617f6a627eb 100644 (file)
@@ -17,6 +17,7 @@
     unified_strdate,
     update_url_query,
     urlhandle_detect_ext,
+    url_or_none,
 )
 
 
@@ -42,15 +43,15 @@ def _real_extract(self, url):
         is_live = metadata.get('mediaType') == 'live'
 
         tracker_data = metadata['trackerData']
+        title = tracker_data['trackerClipTitle']
         media_resource = metadata['mediaResource']
 
         formats = []
-        subtitles = {}
 
         # check if the metadata contains a direct URL to a file
-        for kind, media_resource in media_resource.items():
+        for kind, media in media_resource.items():
             if kind == 'captionsHash':
-                for ext, url in media_resource.items():
+                for ext, url in media.items():
                     subtitles.setdefault('de', []).append({
                         'url': url,
                         'ext': ext,
@@ -59,8 +60,10 @@ def _real_extract(self, url):
 
             if kind not in ('dflt', 'alt'):
                 continue
+            if not isinstance(media, dict):
+                continue
 
-            for tag_name, medium_url in media_resource.items():
+            for tag_name, medium_url in media.items():
                 if tag_name not in ('videoURL', 'audioURL'):
                     continue
 
@@ -90,7 +93,23 @@ def _real_extract(self, url):
 
         self._sort_formats(formats)
 
-        title = tracker_data['trackerClipTitle']
+        subtitles = {}
+        caption_url = media_resource.get('captionURL')
+        if caption_url:
+            subtitles['de'] = [{
+                'url': caption_url,
+                'ext': 'ttml',
+            }]
+        captions_hash = media_resource.get('captionsHash')
+        if isinstance(captions_hash, dict):
+            for ext, format_url in captions_hash.items():
+                format_url = url_or_none(format_url)
+                if not format_url:
+                    continue
+                subtitles.setdefault('de', []).append({
+                    'url': format_url,
+                    'ext': determine_ext(format_url, None) or ext,
+                })
 
         return {
             'id': tracker_data.get('trackerClipId', video_id),
@@ -106,7 +125,7 @@ def _real_extract(self, url):
 class WDRPageIE(InfoExtractor):
     _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
     _PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
-    _VALID_URL = r'https?://(?:www\d?\.)?(?:wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
+    _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
 
     _TESTS = [
         {
@@ -213,7 +232,11 @@ class WDRPageIE(InfoExtractor):
         {
             'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
             'only_matching': True,
-        }
+        },
+        {
+            'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):