]>
Commit | Line | Data |
---|---|---|
bc2ca1bb | 1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | clean_podcast_url, | |
9 | int_or_none, | |
10 | parse_iso8601, | |
11 | strip_or_none, | |
12 | try_get, | |
13 | urlencode_postdata, | |
14 | ) | |
15 | ||
16 | ||
17 | class SimplecastBaseIE(InfoExtractor): | |
18 | _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' | |
19 | _API_BASE = 'https://api.simplecast.com/' | |
20 | ||
21 | def _call_api(self, path_tmpl, video_id): | |
22 | return self._download_json( | |
23 | self._API_BASE + path_tmpl % video_id, video_id) | |
24 | ||
25 | def _call_search_api(self, resource, resource_id, resource_url): | |
26 | return self._download_json( | |
27 | 'https://api.simplecast.com/%ss/search' % resource, resource_id, | |
28 | data=urlencode_postdata({'url': resource_url})) | |
29 | ||
30 | def _parse_episode(self, episode): | |
31 | episode_id = episode['id'] | |
32 | title = episode['title'].strip() | |
33 | audio_file = episode.get('audio_file') or {} | |
34 | audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url'] | |
35 | ||
36 | season = episode.get('season') or {} | |
37 | season_href = season.get('href') | |
38 | season_id = None | |
39 | if season_href: | |
40 | season_id = self._search_regex( | |
41 | r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX, | |
42 | season_href, 'season id', default=None) | |
43 | ||
44 | webpage_url = episode.get('episode_url') | |
45 | channel_url = None | |
46 | if webpage_url: | |
47 | channel_url = self._search_regex( | |
48 | r'(https?://[^/]+\.simplecast\.com)', | |
49 | webpage_url, 'channel url', default=None) | |
50 | ||
51 | return { | |
52 | 'id': episode_id, | |
53 | 'display_id': episode.get('slug'), | |
54 | 'title': title, | |
55 | 'url': clean_podcast_url(audio_file_url), | |
56 | 'webpage_url': webpage_url, | |
57 | 'channel_url': channel_url, | |
58 | 'series': try_get(episode, lambda x: x['podcast']['title']), | |
59 | 'season_number': int_or_none(season.get('number')), | |
60 | 'season_id': season_id, | |
61 | 'thumbnail': episode.get('image_url'), | |
62 | 'episode_id': episode_id, | |
63 | 'episode_number': int_or_none(episode.get('number')), | |
64 | 'description': strip_or_none(episode.get('description')), | |
65 | 'timestamp': parse_iso8601(episode.get('published_at')), | |
66 | 'duration': int_or_none(episode.get('duration')), | |
67 | 'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')), | |
68 | } | |
69 | ||
70 | ||
71 | class SimplecastIE(SimplecastBaseIE): | |
72 | IE_NAME = 'simplecast' | |
73 | _VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX | |
74 | _COMMON_TEST_INFO = { | |
75 | 'display_id': 'errant-signal-chris-franklin-new-wave-video-essays', | |
76 | 'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', | |
77 | 'ext': 'mp3', | |
78 | 'title': 'Errant Signal - Chris Franklin & New Wave Video Essays', | |
79 | 'episode_number': 1, | |
80 | 'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', | |
81 | 'description': 'md5:34752789d3d2702e2d2c975fbd14f357', | |
82 | 'season_number': 1, | |
83 | 'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13', | |
84 | 'series': 'The RE:BIND.io Podcast', | |
85 | 'duration': 5343, | |
86 | 'timestamp': 1580979475, | |
87 | 'upload_date': '20200206', | |
88 | 'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays', | |
89 | 'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$', | |
90 | } | |
91 | _TESTS = [{ | |
92 | 'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876', | |
93 | 'md5': '8c93be7be54251bf29ee97464eabe61c', | |
94 | 'info_dict': _COMMON_TEST_INFO, | |
95 | }, { | |
96 | 'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876', | |
97 | 'only_matching': True, | |
98 | }] | |
99 | ||
100 | @staticmethod | |
101 | def _extract_urls(webpage): | |
102 | return re.findall( | |
103 | r'''(?x)<iframe[^>]+src=["\'] | |
104 | ( | |
105 | https?://(?:embed\.simplecast\.com/[0-9a-f]{8}| | |
106 | player\.simplecast\.com/%s | |
107 | ))''' % SimplecastBaseIE._UUID_REGEX, webpage) | |
108 | ||
109 | def _real_extract(self, url): | |
110 | episode_id = self._match_id(url) | |
111 | episode = self._call_api('episodes/%s', episode_id) | |
112 | return self._parse_episode(episode) | |
113 | ||
114 | ||
115 | class SimplecastEpisodeIE(SimplecastBaseIE): | |
116 | IE_NAME = 'simplecast:episode' | |
117 | _VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)' | |
118 | _TEST = { | |
119 | 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays', | |
120 | 'md5': '8c93be7be54251bf29ee97464eabe61c', | |
121 | 'info_dict': SimplecastIE._COMMON_TEST_INFO, | |
122 | } | |
123 | ||
124 | def _real_extract(self, url): | |
125 | mobj = re.match(self._VALID_URL, url) | |
126 | episode = self._call_search_api( | |
127 | 'episode', mobj.group(1), mobj.group(0)) | |
128 | return self._parse_episode(episode) | |
129 | ||
130 | ||
131 | class SimplecastPodcastIE(SimplecastBaseIE): | |
132 | IE_NAME = 'simplecast:podcast' | |
133 | _VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)' | |
134 | _TESTS = [{ | |
135 | 'url': 'https://the-re-bind-io-podcast.simplecast.com', | |
136 | 'playlist_mincount': 33, | |
137 | 'info_dict': { | |
138 | 'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c', | |
139 | 'title': 'The RE:BIND.io Podcast', | |
140 | }, | |
141 | }, { | |
142 | 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes', | |
143 | 'only_matching': True, | |
144 | }] | |
145 | ||
146 | def _real_extract(self, url): | |
147 | subdomain = self._match_id(url) | |
148 | site = self._call_search_api('site', subdomain, url) | |
149 | podcast = site['podcast'] | |
150 | podcast_id = podcast['id'] | |
151 | podcast_title = podcast.get('title') | |
152 | ||
153 | def entries(): | |
154 | episodes = self._call_api('podcasts/%s/episodes', podcast_id) | |
155 | for episode in (episodes.get('collection') or []): | |
156 | info = self._parse_episode(episode) | |
157 | info['series'] = podcast_title | |
158 | yield info | |
159 | ||
160 | return self.playlist_result(entries(), podcast_id, podcast_title) |