]>
Commit | Line | Data |
---|---|---|
2c98d998 | 1 | from .common import InfoExtractor |
d4b52ce3 | 2 | from ..utils import ( |
3 | OnDemandPagedList, | |
4 | clean_html, | |
5 | int_or_none, | |
6 | jwt_decode_hs256, | |
7 | url_or_none, | |
8 | ) | |
9 | from ..utils.traversal import traverse_obj | |
2c98d998 | 10 | |
11 | ||
d4b52ce3 | 12 | def result_from_props(props): |
2c98d998 | 13 | return { |
d4b52ce3 | 14 | **traverse_obj(props, { |
15 | 'id': ('_id', {str}), | |
16 | 'title': ('title', {str}), | |
17 | 'url': ('mediaURL', {url_or_none}), | |
18 | 'description': ('description', {clean_html}), | |
19 | 'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}), | |
20 | 'timestamp': ('timestamp', {int_or_none}), | |
21 | 'duration': ('duration', {int_or_none}), | |
22 | }), | |
2c98d998 | 23 | 'ext': 'mp3', |
d4b52ce3 | 24 | 'vcodec': 'none', |
2c98d998 | 25 | } |
26 | ||
27 | ||
28 | class PodbayFMIE(InfoExtractor): | |
d4b52ce3 | 29 | _VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)' |
2c98d998 | 30 | _TESTS = [{ |
31 | 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400', | |
d4b52ce3 | 32 | 'md5': '895ac8505de349515f5ee8a4a3195c93', |
2c98d998 | 33 | 'info_dict': { |
d4b52ce3 | 34 | 'id': '62306451f4a48e58d0c4d6a8', |
2c98d998 | 35 | 'title': 'Part One: Kissinger', |
36 | 'ext': 'mp3', | |
d4b52ce3 | 37 | 'description': r're:^We begin our epic six part series on Henry Kissinger.+', |
2c98d998 | 38 | 'thumbnail': r're:^https?://.*\.jpg', |
39 | 'timestamp': 1647338400, | |
40 | 'duration': 5001, | |
41 | 'upload_date': '20220315', | |
42 | }, | |
43 | }] | |
44 | ||
45 | def _real_extract(self, url): | |
46 | episode_id = self._match_id(url) | |
47 | webpage = self._download_webpage(url, episode_id) | |
48 | data = self._search_nextjs_data(webpage, episode_id) | |
d4b52ce3 | 49 | return result_from_props(data['props']['pageProps']['episode']) |
2c98d998 | 50 | |
51 | ||
52 | class PodbayFMChannelIE(InfoExtractor): | |
d4b52ce3 | 53 | _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])' |
2c98d998 | 54 | _TESTS = [{ |
55 | 'url': 'https://podbay.fm/p/behind-the-bastards', | |
56 | 'info_dict': { | |
57 | 'id': 'behind-the-bastards', | |
58 | 'title': 'Behind the Bastards', | |
59 | }, | |
d4b52ce3 | 60 | 'playlist_mincount': 21, |
2c98d998 | 61 | }] |
62 | _PAGE_SIZE = 10 | |
63 | ||
64 | def _fetch_page(self, channel_id, pagenum): | |
65 | return self._download_json( | |
66 | f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}', | |
d4b52ce3 | 67 | f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast'] |
2c98d998 | 68 | |
69 | @staticmethod | |
70 | def _results_from_page(channel_id, page): | |
71 | return [{ | |
72 | **result_from_props(e), | |
73 | 'extractor': PodbayFMIE.IE_NAME, | |
74 | 'extractor_key': PodbayFMIE.ie_key(), | |
75 | # somehow they use timestamps as the episode identifier | |
76 | 'webpage_url': f'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}', | |
77 | } for e in page['episodes']] | |
78 | ||
79 | def _real_extract(self, url): | |
80 | channel_id = self._match_id(url) | |
81 | ||
82 | first_page = self._fetch_page(channel_id, 0) | |
83 | entries = OnDemandPagedList( | |
84 | lambda pagenum: self._results_from_page( | |
85 | channel_id, self._fetch_page(channel_id, pagenum) if pagenum else first_page), | |
86 | self._PAGE_SIZE) | |
87 | ||
88 | return self.playlist_result(entries, channel_id, first_page.get('title')) |