]>
Commit | Line | Data |
---|---|---|
2c98d998 | 1 | from .common import InfoExtractor |
2 | from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call | |
3 | ||
4 | ||
5 | def result_from_props(props, episode_id=None): | |
6 | return { | |
7 | 'id': props.get('podcast_id') or episode_id, | |
8 | 'title': props.get('title'), | |
9 | 'url': props['mediaURL'], | |
10 | 'ext': 'mp3', | |
11 | 'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']), | |
12 | 'timestamp': props.get('timestamp'), | |
13 | 'duration': int_or_none(props.get('duration')), | |
14 | } | |
15 | ||
16 | ||
17 | class PodbayFMIE(InfoExtractor): | |
18 | _VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$' | |
19 | _TESTS = [{ | |
20 | 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400', | |
21 | 'md5': '98b41285dcf7989d105a4ed0404054cf', | |
22 | 'info_dict': { | |
23 | 'id': '1647338400', | |
24 | 'title': 'Part One: Kissinger', | |
25 | 'ext': 'mp3', | |
26 | 'thumbnail': r're:^https?://.*\.jpg', | |
27 | 'timestamp': 1647338400, | |
28 | 'duration': 5001, | |
29 | 'upload_date': '20220315', | |
30 | }, | |
31 | }] | |
32 | ||
33 | def _real_extract(self, url): | |
34 | episode_id = self._match_id(url) | |
35 | webpage = self._download_webpage(url, episode_id) | |
36 | data = self._search_nextjs_data(webpage, episode_id) | |
37 | return result_from_props(data['props']['pageProps']['episode'], episode_id) | |
38 | ||
39 | ||
40 | class PodbayFMChannelIE(InfoExtractor): | |
41 | _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$' | |
42 | _TESTS = [{ | |
43 | 'url': 'https://podbay.fm/p/behind-the-bastards', | |
44 | 'info_dict': { | |
45 | 'id': 'behind-the-bastards', | |
46 | 'title': 'Behind the Bastards', | |
47 | }, | |
48 | }] | |
49 | _PAGE_SIZE = 10 | |
50 | ||
51 | def _fetch_page(self, channel_id, pagenum): | |
52 | return self._download_json( | |
53 | f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}', | |
54 | channel_id)['podcast'] | |
55 | ||
56 | @staticmethod | |
57 | def _results_from_page(channel_id, page): | |
58 | return [{ | |
59 | **result_from_props(e), | |
60 | 'extractor': PodbayFMIE.IE_NAME, | |
61 | 'extractor_key': PodbayFMIE.ie_key(), | |
62 | # somehow they use timestamps as the episode identifier | |
63 | 'webpage_url': f'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}', | |
64 | } for e in page['episodes']] | |
65 | ||
66 | def _real_extract(self, url): | |
67 | channel_id = self._match_id(url) | |
68 | ||
69 | first_page = self._fetch_page(channel_id, 0) | |
70 | entries = OnDemandPagedList( | |
71 | lambda pagenum: self._results_from_page( | |
72 | channel_id, self._fetch_page(channel_id, pagenum) if pagenum else first_page), | |
73 | self._PAGE_SIZE) | |
74 | ||
75 | return self.playlist_result(entries, channel_id, first_page.get('title')) |