[extractor/JWPlatform] Fix extractor (#5112)

[yt-dlp.git] / yt_dlp / extractor / jwplatform.py
diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py

index d6b8420a87f4fd3a3e359c75c89ee03f8ce21f04..c9496894309245165e2b01baae390550aa3f27ab 100644 (file)
--- a/yt_dlp/extractor/jwplatform.py
+++ b/yt_dlp/extractor/jwplatform.py
@@ -22,13 +22,42 @@ class JWPlatformIE(InfoExtractor):
          'only_matching': True,
      }]
  
+    _WEBPAGE_TESTS = [{
+        # JWPlatform iframe
+        'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
+        'info_dict': {
+            'id': 'AG26UQXM',
+            'ext': 'mp4',
+            'upload_date': '20160719',
+            'timestamp': 1468923808,
+            'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
+            'description': '',
+            'duration': 294.0,
+        },
+    }, {
+        # Player url not surrounded by quotes
+        'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
+        'info_dict': {
+            'id': 'R10NQdhY',
+            'title': 'Playgirl',
+            'ext': 'mp4',
+            'upload_date': '20220624',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
+            'timestamp': 1656064800,
+            'description': 'BRD 1966, Will Tremper',
+            'duration': 5146.0,
+        },
+        'params': {'allowed_extractors': ['generic', 'jwplatform']},
+    }]
+
      @classmethod
      def _extract_embed_urls(cls, url, webpage):
          for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
              # <input value=URL> is used by hyland.com
              # if we find <iframe>, dont look for <input>
              ret = re.findall(
-                r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
+                r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
                  webpage)
              if ret:
                  return ret