]>
Commit | Line | Data |
---|---|---|
76048b23 | 1 | from __future__ import unicode_literals |
2 | ||
76048b23 | 3 | from .common import InfoExtractor |
51d3045d S |
4 | from ..utils import ( |
5 | int_or_none, | |
6 | qualities, | |
7 | ) | |
76048b23 | 8 | |
76048b23 | 9 | |
51d3045d | 10 | class NprIE(InfoExtractor): |
68867668 | 11 | _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P<id>\d+)' |
51d3045d | 12 | _TESTS = [{ |
68867668 | 13 | 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more', |
51d3045d S |
14 | 'info_dict': { |
15 | 'id': '449974205', | |
16 | 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' | |
17 | }, | |
18 | 'playlist_count': 7, | |
19 | }, { | |
68867668 | 20 | 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz', |
51d3045d S |
21 | 'info_dict': { |
22 | 'id': '446928052', | |
23 | 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'" | |
24 | }, | |
25 | 'playlist': [{ | |
26 | 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', | |
27 | 'info_dict': { | |
28 | 'id': '446929930', | |
29 | 'ext': 'mp3', | |
30 | 'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)', | |
31 | 'duration': 402, | |
32 | }, | |
33 | }], | |
68867668 RA |
34 | }, { |
35 | # mutlimedia, not media title | |
36 | 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert', | |
37 | 'info_dict': { | |
38 | 'id': '533198237', | |
39 | 'title': 'Tigers Jaw: Tiny Desk Concert', | |
40 | }, | |
41 | 'playlist': [{ | |
42 | 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', | |
43 | 'info_dict': { | |
44 | 'id': '533201718', | |
45 | 'ext': 'mp4', | |
46 | 'title': 'Tigers Jaw: Tiny Desk Concert', | |
47 | 'duration': 402, | |
48 | }, | |
49 | }], | |
50 | 'expected_warnings': ['Failed to download m3u8 information'], | |
51d3045d | 51 | }] |
76048b23 | 52 | |
53 | def _real_extract(self, url): | |
51d3045d | 54 | playlist_id = self._match_id(url) |
76048b23 | 55 | |
68867668 RA |
56 | story = self._download_json( |
57 | 'http://api.npr.org/query', playlist_id, query={ | |
51d3045d | 58 | 'id': playlist_id, |
68867668 | 59 | 'fields': 'audio,multimedia,title', |
51d3045d S |
60 | 'format': 'json', |
61 | 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010', | |
68867668 RA |
62 | })['list']['story'][0] |
63 | playlist_title = story.get('title', {}).get('$text') | |
76048b23 | 64 | |
68867668 | 65 | KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3') |
51d3045d | 66 | quality = qualities(KNOWN_FORMATS) |
76048b23 | 67 | |
51d3045d | 68 | entries = [] |
68867668 RA |
69 | for media in story.get('audio', []) + story.get('multimedia', []): |
70 | media_id = media['id'] | |
71 | ||
51d3045d | 72 | formats = [] |
68867668 | 73 | for format_id, formats_entry in media.get('format', {}).items(): |
51d3045d S |
74 | if not formats_entry: |
75 | continue | |
76 | if isinstance(formats_entry, list): | |
77 | formats_entry = formats_entry[0] | |
78 | format_url = formats_entry.get('$text') | |
79 | if not format_url: | |
80 | continue | |
81 | if format_id in KNOWN_FORMATS: | |
68867668 RA |
82 | if format_id == 'm3u8': |
83 | formats.extend(self._extract_m3u8_formats( | |
84 | format_url, media_id, 'mp4', 'm3u8_native', | |
85 | m3u8_id='hls', fatal=False)) | |
86 | elif format_id == 'smil': | |
87 | smil_formats = self._extract_smil_formats( | |
88 | format_url, media_id, transform_source=lambda s: s.replace( | |
89 | 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/')) | |
90 | self._check_formats(smil_formats, media_id) | |
91 | formats.extend(smil_formats) | |
92 | else: | |
93 | formats.append({ | |
94 | 'url': format_url, | |
95 | 'format_id': format_id, | |
96 | 'quality': quality(format_id), | |
97 | }) | |
51d3045d | 98 | self._sort_formats(formats) |
68867668 | 99 | |
51d3045d | 100 | entries.append({ |
68867668 RA |
101 | 'id': media_id, |
102 | 'title': media.get('title', {}).get('$text') or playlist_title, | |
103 | 'thumbnail': media.get('altImageUrl', {}).get('$text'), | |
104 | 'duration': int_or_none(media.get('duration', {}).get('$text')), | |
51d3045d S |
105 | 'formats': formats, |
106 | }) | |
76048b23 | 107 | |
51d3045d | 108 | return self.playlist_result(entries, playlist_id, playlist_title) |