Update to ytdl-2021.01.03

[yt-dlp.git] / youtube_dlc / extractor / wdr.py
diff --git a/youtube_dlc/extractor/wdr.py b/youtube_dlc/extractor/wdr.py

index 44d4a13cac006ac650c6a07b1a604729c25c5205..5cb5924f8041ac13fc97ee8f9f736617f6a627eb 100644 (file)
--- a/youtube_dlc/extractor/wdr.py
+++ b/youtube_dlc/extractor/wdr.py
@@ -17,6 +17,7 @@
      unified_strdate,
      update_url_query,
      urlhandle_detect_ext,
+    url_or_none,
  )
  
  
@@ -42,15 +43,15 @@ def _real_extract(self, url):
          is_live = metadata.get('mediaType') == 'live'
  
          tracker_data = metadata['trackerData']
+        title = tracker_data['trackerClipTitle']
          media_resource = metadata['mediaResource']
  
          formats = []
-        subtitles = {}
  
          # check if the metadata contains a direct URL to a file
-        for kind, media_resource in media_resource.items():
+        for kind, media in media_resource.items():
              if kind == 'captionsHash':
-                for ext, url in media_resource.items():
+                for ext, url in media.items():
                      subtitles.setdefault('de', []).append({
                          'url': url,
                          'ext': ext,
@@ -59,8 +60,10 @@ def _real_extract(self, url):
  
              if kind not in ('dflt', 'alt'):
                  continue
+            if not isinstance(media, dict):
+                continue
  
-            for tag_name, medium_url in media_resource.items():
+            for tag_name, medium_url in media.items():
                  if tag_name not in ('videoURL', 'audioURL'):
                      continue
  
@@ -90,7 +93,23 @@ def _real_extract(self, url):
  
          self._sort_formats(formats)
  
-        title = tracker_data['trackerClipTitle']
+        subtitles = {}
+        caption_url = media_resource.get('captionURL')
+        if caption_url:
+            subtitles['de'] = [{
+                'url': caption_url,
+                'ext': 'ttml',
+            }]
+        captions_hash = media_resource.get('captionsHash')
+        if isinstance(captions_hash, dict):
+            for ext, format_url in captions_hash.items():
+                format_url = url_or_none(format_url)
+                if not format_url:
+                    continue
+                subtitles.setdefault('de', []).append({
+                    'url': format_url,
+                    'ext': determine_ext(format_url, None) or ext,
+                })
  
          return {
              'id': tracker_data.get('trackerClipId', video_id),
@@ -106,7 +125,7 @@ def _real_extract(self, url):
  class WDRPageIE(InfoExtractor):
      _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
      _PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
-    _VALID_URL = r'https?://(?:www\d?\.)?(?:wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
+    _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
  
      _TESTS = [
          {
@@ -213,7 +232,11 @@ class WDRPageIE(InfoExtractor):
          {
              'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
              'only_matching': True,
-        }
+        },
+        {
+            'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
+            'only_matching': True,
+        },
      ]
  
      def _real_extract(self, url):