]>
Commit | Line | Data |
---|---|---|
4bf72cc1 C |
1 | import functools |
2 | import json | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | OnDemandPagedList, | |
7 | float_or_none, | |
8 | str_or_none, | |
9 | str_to_int, | |
10 | traverse_obj, | |
11 | unified_timestamp, | |
12 | ) | |
13 | ||
14 | ||
15 | class PodchaserIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://(?:www\.)?podchaser\.com/podcasts/[\w-]+-(?P<podcast_id>\d+)(?:/episodes/[\w-]+-(?P<id>\d+))?' | |
17 | _PAGE_SIZE = 100 | |
18 | _TESTS = [{ | |
19 | 'url': 'https://www.podchaser.com/podcasts/cum-town-36924/episodes/ep-285-freeze-me-off-104365585', | |
20 | 'info_dict': { | |
21 | 'id': '104365585', | |
22 | 'title': 'Ep. 285 – freeze me off', | |
23 | 'description': 'cam ahn', | |
24 | 'thumbnail': r're:^https?://.*\.jpg$', | |
25 | 'ext': 'mp3', | |
26 | 'categories': ['Comedy'], | |
27 | 'tags': ['comedy', 'dark humor'], | |
28 | 'series': 'Cum Town', | |
29 | 'duration': 3708, | |
30 | 'timestamp': 1636531259, | |
31 | 'upload_date': '20211110', | |
32 | 'rating': 4.0 | |
33 | } | |
34 | }, { | |
35 | 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853', | |
36 | 'info_dict': { | |
37 | 'id': '28853', | |
38 | 'title': 'The Bone Zone', | |
39 | 'description': 'Podcast by The Bone Zone', | |
40 | }, | |
41 | 'playlist_count': 275 | |
42 | }, { | |
43 | 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes', | |
44 | 'info_dict': { | |
45 | 'id': '699349', | |
46 | 'title': 'Sean Carroll\'s Mindscape: Science, Society, Philosophy, Culture, Arts, and Ideas', | |
47 | 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1' | |
48 | }, | |
49 | 'playlist_mincount': 225 | |
50 | }] | |
51 | ||
52 | @staticmethod | |
53 | def _parse_episode(episode, podcast): | |
54 | return { | |
55 | 'id': str(episode.get('id')), | |
56 | 'title': episode.get('title'), | |
57 | 'description': episode.get('description'), | |
58 | 'url': episode.get('audio_url'), | |
59 | 'thumbnail': episode.get('image_url'), | |
60 | 'duration': str_to_int(episode.get('length')), | |
61 | 'timestamp': unified_timestamp(episode.get('air_date')), | |
62 | 'rating': float_or_none(episode.get('rating')), | |
63 | 'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))), | |
64 | 'tags': traverse_obj(podcast, ('tags', ..., 'text')), | |
65 | 'series': podcast.get('title'), | |
66 | } | |
67 | ||
68 | def _call_api(self, path, *args, **kwargs): | |
69 | return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs) | |
70 | ||
71 | def _fetch_page(self, podcast_id, podcast, page): | |
72 | json_response = self._call_api( | |
73 | 'list/episode', podcast_id, | |
74 | headers={'Content-Type': 'application/json;charset=utf-8'}, | |
75 | data=json.dumps({ | |
76 | 'start': page * self._PAGE_SIZE, | |
77 | 'count': self._PAGE_SIZE, | |
78 | 'sort_order': 'SORT_ORDER_RECENT', | |
79 | 'filters': { | |
80 | 'podcast_id': podcast_id | |
81 | }, | |
82 | 'options': {} | |
83 | }).encode()) | |
84 | ||
85 | for episode in json_response['entities']: | |
86 | yield self._parse_episode(episode, podcast) | |
87 | ||
88 | def _real_extract(self, url): | |
89 | podcast_id, episode_id = self._match_valid_url(url).group('podcast_id', 'id') | |
90 | podcast = self._call_api(f'podcasts/{podcast_id}', episode_id or podcast_id) | |
91 | if not episode_id: | |
92 | return self.playlist_result( | |
93 | OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE), | |
94 | str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description')) | |
95 | ||
96 | episode = self._call_api(f'episodes/{episode_id}', episode_id) | |
97 | return self._parse_episode(episode, podcast) |