]>
Commit | Line | Data |
---|---|---|
bc2ca1bb | 1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
3 | clean_podcast_url, | |
4 | int_or_none, | |
5 | parse_iso8601, | |
6 | strip_or_none, | |
7 | try_get, | |
8 | urlencode_postdata, | |
9 | ) | |
10 | ||
11 | ||
12 | class SimplecastBaseIE(InfoExtractor): | |
13 | _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' | |
14 | _API_BASE = 'https://api.simplecast.com/' | |
15 | ||
16 | def _call_api(self, path_tmpl, video_id): | |
17 | return self._download_json( | |
18 | self._API_BASE + path_tmpl % video_id, video_id) | |
19 | ||
20 | def _call_search_api(self, resource, resource_id, resource_url): | |
21 | return self._download_json( | |
22 | 'https://api.simplecast.com/%ss/search' % resource, resource_id, | |
23 | data=urlencode_postdata({'url': resource_url})) | |
24 | ||
25 | def _parse_episode(self, episode): | |
26 | episode_id = episode['id'] | |
27 | title = episode['title'].strip() | |
28 | audio_file = episode.get('audio_file') or {} | |
29 | audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url'] | |
30 | ||
31 | season = episode.get('season') or {} | |
32 | season_href = season.get('href') | |
33 | season_id = None | |
34 | if season_href: | |
35 | season_id = self._search_regex( | |
36 | r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX, | |
37 | season_href, 'season id', default=None) | |
38 | ||
39 | webpage_url = episode.get('episode_url') | |
40 | channel_url = None | |
41 | if webpage_url: | |
42 | channel_url = self._search_regex( | |
43 | r'(https?://[^/]+\.simplecast\.com)', | |
44 | webpage_url, 'channel url', default=None) | |
45 | ||
46 | return { | |
47 | 'id': episode_id, | |
48 | 'display_id': episode.get('slug'), | |
49 | 'title': title, | |
50 | 'url': clean_podcast_url(audio_file_url), | |
51 | 'webpage_url': webpage_url, | |
52 | 'channel_url': channel_url, | |
53 | 'series': try_get(episode, lambda x: x['podcast']['title']), | |
54 | 'season_number': int_or_none(season.get('number')), | |
55 | 'season_id': season_id, | |
56 | 'thumbnail': episode.get('image_url'), | |
57 | 'episode_id': episode_id, | |
58 | 'episode_number': int_or_none(episode.get('number')), | |
59 | 'description': strip_or_none(episode.get('description')), | |
60 | 'timestamp': parse_iso8601(episode.get('published_at')), | |
61 | 'duration': int_or_none(episode.get('duration')), | |
62 | 'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')), | |
63 | } | |
64 | ||
65 | ||
66 | class SimplecastIE(SimplecastBaseIE): | |
67 | IE_NAME = 'simplecast' | |
68 | _VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX | |
bfd973ec | 69 | _EMBED_REGEX = [rf'''(?x)<iframe[^>]+src=["\'] |
70 | (?P<url>https?://(?: | |
71 | embed\.simplecast\.com/[0-9a-f]{8}| | |
72 | player\.simplecast\.com/{SimplecastBaseIE._UUID_REGEX} | |
73 | ))'''] | |
bc2ca1bb | 74 | _COMMON_TEST_INFO = { |
75 | 'display_id': 'errant-signal-chris-franklin-new-wave-video-essays', | |
76 | 'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', | |
77 | 'ext': 'mp3', | |
78 | 'title': 'Errant Signal - Chris Franklin & New Wave Video Essays', | |
79 | 'episode_number': 1, | |
80 | 'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', | |
81 | 'description': 'md5:34752789d3d2702e2d2c975fbd14f357', | |
82 | 'season_number': 1, | |
83 | 'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13', | |
84 | 'series': 'The RE:BIND.io Podcast', | |
85 | 'duration': 5343, | |
86 | 'timestamp': 1580979475, | |
87 | 'upload_date': '20200206', | |
88 | 'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays', | |
89 | 'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$', | |
90 | } | |
91 | _TESTS = [{ | |
92 | 'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876', | |
93 | 'md5': '8c93be7be54251bf29ee97464eabe61c', | |
94 | 'info_dict': _COMMON_TEST_INFO, | |
95 | }, { | |
96 | 'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876', | |
97 | 'only_matching': True, | |
98 | }] | |
99 | ||
bc2ca1bb | 100 | def _real_extract(self, url): |
101 | episode_id = self._match_id(url) | |
102 | episode = self._call_api('episodes/%s', episode_id) | |
103 | return self._parse_episode(episode) | |
104 | ||
105 | ||
106 | class SimplecastEpisodeIE(SimplecastBaseIE): | |
107 | IE_NAME = 'simplecast:episode' | |
108 | _VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)' | |
109 | _TEST = { | |
110 | 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays', | |
111 | 'md5': '8c93be7be54251bf29ee97464eabe61c', | |
112 | 'info_dict': SimplecastIE._COMMON_TEST_INFO, | |
113 | } | |
114 | ||
115 | def _real_extract(self, url): | |
5ad28e7f | 116 | mobj = self._match_valid_url(url) |
bc2ca1bb | 117 | episode = self._call_search_api( |
118 | 'episode', mobj.group(1), mobj.group(0)) | |
119 | return self._parse_episode(episode) | |
120 | ||
121 | ||
122 | class SimplecastPodcastIE(SimplecastBaseIE): | |
123 | IE_NAME = 'simplecast:podcast' | |
124 | _VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)' | |
125 | _TESTS = [{ | |
126 | 'url': 'https://the-re-bind-io-podcast.simplecast.com', | |
127 | 'playlist_mincount': 33, | |
128 | 'info_dict': { | |
129 | 'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c', | |
130 | 'title': 'The RE:BIND.io Podcast', | |
131 | }, | |
132 | }, { | |
133 | 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes', | |
134 | 'only_matching': True, | |
135 | }] | |
136 | ||
137 | def _real_extract(self, url): | |
138 | subdomain = self._match_id(url) | |
139 | site = self._call_search_api('site', subdomain, url) | |
140 | podcast = site['podcast'] | |
141 | podcast_id = podcast['id'] | |
142 | podcast_title = podcast.get('title') | |
143 | ||
144 | def entries(): | |
145 | episodes = self._call_api('podcasts/%s/episodes', podcast_id) | |
146 | for episode in (episodes.get('collection') or []): | |
147 | info = self._parse_episode(episode) | |
148 | info['series'] = podcast_title | |
149 | yield info | |
150 | ||
151 | return self.playlist_result(entries(), podcast_id, podcast_title) |