]> jfr.im git - yt-dlp.git/commitdiff
[extractor/JWPlatform] Fix extractor (#5112)
authorcoletdjnz <redacted>
Mon, 3 Oct 2022 19:37:48 +0000 (08:37 +1300)
committerGitHub <redacted>
Mon, 3 Oct 2022 19:37:48 +0000 (19:37 +0000)
Fix bitrate and filesize extraction and support embeds with unquoted urls.

Related: #5106

Authored by: coletdjnz

yt_dlp/extractor/common.py
yt_dlp/extractor/generic.py
yt_dlp/extractor/jwplatform.py

index caec0ccf62bea3f4b1225c572de23f140ad27dd8..0700b4767b07b5eec9096e23c8a7d73645b03c3d 100644 (file)
@@ -3587,7 +3587,8 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
                     'url': source_url,
                     'width': int_or_none(source.get('width')),
                     'height': height,
-                    'tbr': int_or_none(source.get('bitrate')),
+                    'tbr': int_or_none(source.get('bitrate'), scale=1000),
+                    'filesize': int_or_none(source.get('filesize')),
                     'ext': ext,
                 }
                 if source_url.startswith('rtmp'):
index 73aefc78297b1a916802be71972f40700787de56..73422f937c35f639244c440d0d63b90101961fde 100644 (file)
@@ -1071,18 +1071,6 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             }
         },
-        {
-            # JWPlatform iframe
-            'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
-            'info_dict': {
-                'id': 'AG26UQXM',
-                'ext': 'mp4',
-                'upload_date': '20160719',
-                'timestamp': 468923808,
-                'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
-            },
-            'add_ie': ['JWPlatform'],
-        },
         {
             # Video.js embed, multiple formats
             'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
index d6b8420a87f4fd3a3e359c75c89ee03f8ce21f04..c9496894309245165e2b01baae390550aa3f27ab 100644 (file)
@@ -22,13 +22,42 @@ class JWPlatformIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    _WEBPAGE_TESTS = [{
+        # JWPlatform iframe
+        'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
+        'info_dict': {
+            'id': 'AG26UQXM',
+            'ext': 'mp4',
+            'upload_date': '20160719',
+            'timestamp': 1468923808,
+            'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
+            'description': '',
+            'duration': 294.0,
+        },
+    }, {
+        # Player url not surrounded by quotes
+        'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
+        'info_dict': {
+            'id': 'R10NQdhY',
+            'title': 'Playgirl',
+            'ext': 'mp4',
+            'upload_date': '20220624',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
+            'timestamp': 1656064800,
+            'description': 'BRD 1966, Will Tremper',
+            'duration': 5146.0,
+        },
+        'params': {'allowed_extractors': ['generic', 'jwplatform']},
+    }]
+
     @classmethod
     def _extract_embed_urls(cls, url, webpage):
         for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
             # <input value=URL> is used by hyland.com
             # if we find <iframe>, dont look for <input>
             ret = re.findall(
-                r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
+                r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
                 webpage)
             if ret:
                 return ret