]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/wsj.py
[ie/generic] Add `key_query` extractor-arg
[yt-dlp.git] / yt_dlp / extractor / wsj.py
index 67236f377d2714b517b1f27e687258547c75ac62..7b3f6aa2ae0ee039e7433e03a9d2b578264eabc6 100644 (file)
@@ -1,10 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 from .common import InfoExtractor
 from ..utils import (
-    int_or_none,
     float_or_none,
+    int_or_none,
     unified_strdate,
 )
 
@@ -79,13 +76,12 @@ def _real_extract(self, url):
             tbr = int_or_none(v.get('bitrate'))
             formats.append({
                 'url': mp4_url,
-                'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+                'format_id': 'http' + (f'-{tbr}' if tbr else ''),
                 'tbr': tbr,
                 'width': int_or_none(v.get('width')),
                 'height': int_or_none(v.get('height')),
                 'fps': float_or_none(v.get('fps')),
             })
-        self._sort_formats(formats)
 
         return {
             'id': video_id,
@@ -112,12 +108,13 @@ class WSJArticleIE(InfoExtractor):
             'upload_date': '20170221',
             'uploader_id': 'ralcaraz',
             'title': 'Bao Bao the Panda Leaves for China',
-        }
+        },
     }
 
     def _real_extract(self, url):
         article_id = self._match_id(url)
         webpage = self._download_webpage(url, article_id)
         video_id = self._search_regex(
-            r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
-        return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
+            r'(?:id=["\']video|video-|iframe\.html\?guid=|data-src=["\'])([a-fA-F0-9-]{36})',
+            webpage, 'video id')
+        return self.url_result(f'wsj:{video_id}', WSJIE.ie_key(), video_id)