]>
Commit | Line | Data |
---|---|---|
76048b23 | 1 | from __future__ import unicode_literals |
2 | ||
76048b23 | 3 | from .common import InfoExtractor |
51d3045d S |
4 | from ..utils import ( |
5 | int_or_none, | |
6 | qualities, | |
7bf27721 | 7 | url_or_none, |
51d3045d | 8 | ) |
76048b23 | 9 | |
76048b23 | 10 | |
51d3045d | 11 | class NprIE(InfoExtractor): |
68867668 | 12 | _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P<id>\d+)' |
51d3045d | 13 | _TESTS = [{ |
68867668 | 14 | 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more', |
51d3045d S |
15 | 'info_dict': { |
16 | 'id': '449974205', | |
17 | 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' | |
18 | }, | |
19 | 'playlist_count': 7, | |
20 | }, { | |
68867668 | 21 | 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz', |
51d3045d S |
22 | 'info_dict': { |
23 | 'id': '446928052', | |
24 | 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'" | |
25 | }, | |
26 | 'playlist': [{ | |
27 | 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', | |
28 | 'info_dict': { | |
29 | 'id': '446929930', | |
30 | 'ext': 'mp3', | |
31 | 'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)', | |
32 | 'duration': 402, | |
33 | }, | |
34 | }], | |
68867668 | 35 | }, { |
a0566bbf | 36 | # multimedia, not media title |
68867668 RA |
37 | 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert', |
38 | 'info_dict': { | |
39 | 'id': '533198237', | |
40 | 'title': 'Tigers Jaw: Tiny Desk Concert', | |
41 | }, | |
42 | 'playlist': [{ | |
43 | 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', | |
44 | 'info_dict': { | |
45 | 'id': '533201718', | |
46 | 'ext': 'mp4', | |
47 | 'title': 'Tigers Jaw: Tiny Desk Concert', | |
48 | 'duration': 402, | |
49 | }, | |
50 | }], | |
51 | 'expected_warnings': ['Failed to download m3u8 information'], | |
7bf27721 S |
52 | }, { |
53 | # multimedia, no formats, stream | |
54 | 'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert', | |
55 | 'only_matching': True, | |
51d3045d | 56 | }] |
76048b23 | 57 | |
58 | def _real_extract(self, url): | |
51d3045d | 59 | playlist_id = self._match_id(url) |
76048b23 | 60 | |
68867668 RA |
61 | story = self._download_json( |
62 | 'http://api.npr.org/query', playlist_id, query={ | |
51d3045d | 63 | 'id': playlist_id, |
68867668 | 64 | 'fields': 'audio,multimedia,title', |
51d3045d S |
65 | 'format': 'json', |
66 | 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010', | |
68867668 RA |
67 | })['list']['story'][0] |
68 | playlist_title = story.get('title', {}).get('$text') | |
76048b23 | 69 | |
68867668 | 70 | KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3') |
51d3045d | 71 | quality = qualities(KNOWN_FORMATS) |
76048b23 | 72 | |
51d3045d | 73 | entries = [] |
68867668 RA |
74 | for media in story.get('audio', []) + story.get('multimedia', []): |
75 | media_id = media['id'] | |
76 | ||
51d3045d | 77 | formats = [] |
68867668 | 78 | for format_id, formats_entry in media.get('format', {}).items(): |
51d3045d S |
79 | if not formats_entry: |
80 | continue | |
81 | if isinstance(formats_entry, list): | |
82 | formats_entry = formats_entry[0] | |
83 | format_url = formats_entry.get('$text') | |
84 | if not format_url: | |
85 | continue | |
86 | if format_id in KNOWN_FORMATS: | |
68867668 RA |
87 | if format_id == 'm3u8': |
88 | formats.extend(self._extract_m3u8_formats( | |
89 | format_url, media_id, 'mp4', 'm3u8_native', | |
90 | m3u8_id='hls', fatal=False)) | |
91 | elif format_id == 'smil': | |
92 | smil_formats = self._extract_smil_formats( | |
93 | format_url, media_id, transform_source=lambda s: s.replace( | |
774a46c5 | 94 | 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'), |
95 | fatal=False) | |
68867668 RA |
96 | self._check_formats(smil_formats, media_id) |
97 | formats.extend(smil_formats) | |
98 | else: | |
99 | formats.append({ | |
100 | 'url': format_url, | |
101 | 'format_id': format_id, | |
102 | 'quality': quality(format_id), | |
103 | }) | |
7bf27721 S |
104 | for stream_id, stream_entry in media.get('stream', {}).items(): |
105 | if not isinstance(stream_entry, dict): | |
106 | continue | |
107 | if stream_id != 'hlsUrl': | |
108 | continue | |
109 | stream_url = url_or_none(stream_entry.get('$text')) | |
110 | if not stream_url: | |
111 | continue | |
112 | formats.extend(self._extract_m3u8_formats( | |
113 | stream_url, stream_id, 'mp4', 'm3u8_native', | |
114 | m3u8_id='hls', fatal=False)) | |
51d3045d | 115 | self._sort_formats(formats) |
68867668 | 116 | |
51d3045d | 117 | entries.append({ |
68867668 RA |
118 | 'id': media_id, |
119 | 'title': media.get('title', {}).get('$text') or playlist_title, | |
120 | 'thumbnail': media.get('altImageUrl', {}).get('$text'), | |
121 | 'duration': int_or_none(media.get('duration', {}).get('$text')), | |
51d3045d S |
122 | 'formats': formats, |
123 | }) | |
76048b23 | 124 | |
51d3045d | 125 | return self.playlist_result(entries, playlist_id, playlist_title) |