]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/npr.py
[extractor/nebula] Add nebula.tv (#4918)
[yt-dlp.git] / yt_dlp / extractor / npr.py
index 9d1122f0c723524e250201643257085520a05743..e677e862d59cb7b7a0c424466800b937f2090c5c 100644 (file)
@@ -1,11 +1,5 @@
-from __future__ import unicode_literals
-
 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    qualities,
-    url_or_none,
-)
+from ..utils import int_or_none, qualities, traverse_obj, url_or_none
 
 
 class NprIE(InfoExtractor):
@@ -53,6 +47,15 @@ class NprIE(InfoExtractor):
         # multimedia, no formats, stream
         'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
         'only_matching': True,
+    }, {
+        'url': 'https://www.npr.org/2022/03/15/1084896560/bonobo-tiny-desk-home-concert',
+        'info_dict': {
+            'id': '1086468851',
+            'ext': 'mp4',
+            'title': 'Bonobo: Tiny Desk (Home) Concert',
+            'duration': 1061,
+            'thumbnail': r're:^https?://media.npr.org/assets/img/.*\.jpg$',
+        },
     }]
 
     def _real_extract(self, url):
@@ -91,7 +94,8 @@ def _real_extract(self, url):
                     elif format_id == 'smil':
                         smil_formats = self._extract_smil_formats(
                             format_url, media_id, transform_source=lambda s: s.replace(
-                                'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'))
+                                'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'),
+                            fatal=False)
                         self._check_formats(smil_formats, media_id)
                         formats.extend(smil_formats)
                     else:
@@ -111,6 +115,12 @@ def _real_extract(self, url):
                 formats.extend(self._extract_m3u8_formats(
                     stream_url, stream_id, 'mp4', 'm3u8_native',
                     m3u8_id='hls', fatal=False))
+
+            if not formats:
+                raw_json_ld = self._yield_json_ld(self._download_webpage(url, playlist_id), playlist_id, fatal=False)
+                m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False)
+                formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
+
             self._sort_formats(formats)
 
             entries.append({