]>
Commit | Line | Data |
---|---|---|
76048b23 | 1 | from .common import InfoExtractor |
51d3045d S |
2 | from ..utils import ( |
3 | int_or_none, | |
4 | qualities, | |
7bf27721 | 5 | url_or_none, |
51d3045d | 6 | ) |
76048b23 | 7 | |
76048b23 | 8 | |
51d3045d | 9 | class NprIE(InfoExtractor): |
68867668 | 10 | _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P<id>\d+)' |
51d3045d | 11 | _TESTS = [{ |
68867668 | 12 | 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more', |
51d3045d S |
13 | 'info_dict': { |
14 | 'id': '449974205', | |
15 | 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' | |
16 | }, | |
17 | 'playlist_count': 7, | |
18 | }, { | |
68867668 | 19 | 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz', |
51d3045d S |
20 | 'info_dict': { |
21 | 'id': '446928052', | |
22 | 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'" | |
23 | }, | |
24 | 'playlist': [{ | |
25 | 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', | |
26 | 'info_dict': { | |
27 | 'id': '446929930', | |
28 | 'ext': 'mp3', | |
29 | 'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)', | |
30 | 'duration': 402, | |
31 | }, | |
32 | }], | |
68867668 | 33 | }, { |
a0566bbf | 34 | # multimedia, not media title |
68867668 RA |
35 | 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert', |
36 | 'info_dict': { | |
37 | 'id': '533198237', | |
38 | 'title': 'Tigers Jaw: Tiny Desk Concert', | |
39 | }, | |
40 | 'playlist': [{ | |
41 | 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', | |
42 | 'info_dict': { | |
43 | 'id': '533201718', | |
44 | 'ext': 'mp4', | |
45 | 'title': 'Tigers Jaw: Tiny Desk Concert', | |
46 | 'duration': 402, | |
47 | }, | |
48 | }], | |
49 | 'expected_warnings': ['Failed to download m3u8 information'], | |
7bf27721 S |
50 | }, { |
51 | # multimedia, no formats, stream | |
52 | 'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert', | |
53 | 'only_matching': True, | |
51d3045d | 54 | }] |
76048b23 | 55 | |
56 | def _real_extract(self, url): | |
51d3045d | 57 | playlist_id = self._match_id(url) |
76048b23 | 58 | |
68867668 RA |
59 | story = self._download_json( |
60 | 'http://api.npr.org/query', playlist_id, query={ | |
51d3045d | 61 | 'id': playlist_id, |
68867668 | 62 | 'fields': 'audio,multimedia,title', |
51d3045d S |
63 | 'format': 'json', |
64 | 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010', | |
68867668 RA |
65 | })['list']['story'][0] |
66 | playlist_title = story.get('title', {}).get('$text') | |
76048b23 | 67 | |
68867668 | 68 | KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3') |
51d3045d | 69 | quality = qualities(KNOWN_FORMATS) |
76048b23 | 70 | |
51d3045d | 71 | entries = [] |
68867668 RA |
72 | for media in story.get('audio', []) + story.get('multimedia', []): |
73 | media_id = media['id'] | |
74 | ||
51d3045d | 75 | formats = [] |
68867668 | 76 | for format_id, formats_entry in media.get('format', {}).items(): |
51d3045d S |
77 | if not formats_entry: |
78 | continue | |
79 | if isinstance(formats_entry, list): | |
80 | formats_entry = formats_entry[0] | |
81 | format_url = formats_entry.get('$text') | |
82 | if not format_url: | |
83 | continue | |
84 | if format_id in KNOWN_FORMATS: | |
68867668 RA |
85 | if format_id == 'm3u8': |
86 | formats.extend(self._extract_m3u8_formats( | |
87 | format_url, media_id, 'mp4', 'm3u8_native', | |
88 | m3u8_id='hls', fatal=False)) | |
89 | elif format_id == 'smil': | |
90 | smil_formats = self._extract_smil_formats( | |
91 | format_url, media_id, transform_source=lambda s: s.replace( | |
774a46c5 | 92 | 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'), |
93 | fatal=False) | |
68867668 RA |
94 | self._check_formats(smil_formats, media_id) |
95 | formats.extend(smil_formats) | |
96 | else: | |
97 | formats.append({ | |
98 | 'url': format_url, | |
99 | 'format_id': format_id, | |
100 | 'quality': quality(format_id), | |
101 | }) | |
7bf27721 S |
102 | for stream_id, stream_entry in media.get('stream', {}).items(): |
103 | if not isinstance(stream_entry, dict): | |
104 | continue | |
105 | if stream_id != 'hlsUrl': | |
106 | continue | |
107 | stream_url = url_or_none(stream_entry.get('$text')) | |
108 | if not stream_url: | |
109 | continue | |
110 | formats.extend(self._extract_m3u8_formats( | |
111 | stream_url, stream_id, 'mp4', 'm3u8_native', | |
112 | m3u8_id='hls', fatal=False)) | |
51d3045d | 113 | self._sort_formats(formats) |
68867668 | 114 | |
51d3045d | 115 | entries.append({ |
68867668 RA |
116 | 'id': media_id, |
117 | 'title': media.get('title', {}).get('$text') or playlist_title, | |
118 | 'thumbnail': media.get('altImageUrl', {}).get('$text'), | |
119 | 'duration': int_or_none(media.get('duration', {}).get('$text')), | |
51d3045d S |
120 | 'formats': formats, |
121 | }) | |
76048b23 | 122 | |
51d3045d | 123 | return self.playlist_result(entries, playlist_id, playlist_title) |