]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import ( | |
3 | ExtractorError, | |
4 | clean_html, | |
5 | clean_podcast_url, | |
6 | int_or_none, | |
7 | str_or_none, | |
8 | try_get, | |
9 | url_or_none, | |
10 | ) | |
11 | ||
12 | ||
13 | class StitcherBaseIE(InfoExtractor): | |
14 | _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/' | |
15 | ||
16 | def _call_api(self, path, video_id, query): | |
17 | resp = self._download_json( | |
18 | 'https://api.prod.stitcher.com/' + path, | |
19 | video_id, query=query) | |
20 | error_massage = try_get(resp, lambda x: x['errors'][0]['message']) | |
21 | if error_massage: | |
22 | raise ExtractorError(error_massage, expected=True) | |
23 | return resp['data'] | |
24 | ||
25 | def _extract_description(self, data): | |
26 | return clean_html(data.get('html_description') or data.get('description')) | |
27 | ||
28 | def _extract_audio_url(self, episode): | |
29 | return url_or_none(episode.get('audio_url') or episode.get('guid')) | |
30 | ||
31 | def _extract_show_info(self, show): | |
32 | return { | |
33 | 'thumbnail': show.get('image_base_url'), | |
34 | 'series': show.get('title'), | |
35 | } | |
36 | ||
37 | def _extract_episode(self, episode, audio_url, show_info): | |
38 | info = { | |
39 | 'id': str(episode['id']), | |
40 | 'display_id': episode.get('slug'), | |
41 | 'title': episode['title'].strip(), | |
42 | 'description': self._extract_description(episode), | |
43 | 'duration': int_or_none(episode.get('duration')), | |
44 | 'url': clean_podcast_url(audio_url), | |
45 | 'vcodec': 'none', | |
46 | 'timestamp': int_or_none(episode.get('date_published')), | |
47 | 'season_number': int_or_none(episode.get('season')), | |
48 | 'season_id': str_or_none(episode.get('season_id')), | |
49 | } | |
50 | info.update(show_info) | |
51 | return info | |
52 | ||
53 | ||
54 | class StitcherIE(StitcherBaseIE): | |
55 | _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)' | |
56 | _TESTS = [{ | |
57 | 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true', | |
58 | 'md5': 'e9635098e0da10b21a0e2b85585530f6', | |
59 | 'info_dict': { | |
60 | 'id': '40789481', | |
61 | 'ext': 'mp3', | |
62 | 'title': 'Machine Learning Mastery and Cancer Clusters', | |
63 | 'description': 'md5:547adb4081864be114ae3831b4c2b42f', | |
64 | 'duration': 1604, | |
65 | 'thumbnail': r're:^https?://.*\.jpg', | |
66 | 'upload_date': '20151008', | |
67 | 'timestamp': 1444285800, | |
68 | 'series': 'Talking Machines', | |
69 | }, | |
70 | }, { | |
71 | 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true', | |
72 | 'info_dict': { | |
73 | 'id': '40846275', | |
74 | 'display_id': 'the-rare-hourlong-comedy-plus', | |
75 | 'ext': 'mp3', | |
76 | 'title': "The CW's 'Crazy Ex-Girlfriend'", | |
77 | 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17', | |
78 | 'duration': 2235, | |
79 | 'thumbnail': r're:^https?://.*\.jpg', | |
80 | }, | |
81 | 'params': { | |
82 | 'skip_download': True, | |
83 | }, | |
84 | 'skip': 'Page Not Found', | |
85 | }, { | |
86 | # escaped title | |
87 | 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true', | |
88 | 'only_matching': True, | |
89 | }, { | |
90 | 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true', | |
91 | 'only_matching': True, | |
92 | }, { | |
93 | 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584', | |
94 | 'only_matching': True, | |
95 | }] | |
96 | ||
97 | def _real_extract(self, url): | |
98 | audio_id = self._match_id(url) | |
99 | data = self._call_api( | |
100 | 'shows/episodes', audio_id, {'episode_ids': audio_id}) | |
101 | episode = data['episodes'][0] | |
102 | audio_url = self._extract_audio_url(episode) | |
103 | if not audio_url: | |
104 | self.raise_login_required() | |
105 | show = try_get(data, lambda x: x['shows'][0], dict) or {} | |
106 | return self._extract_episode( | |
107 | episode, audio_url, self._extract_show_info(show)) | |
108 | ||
109 | ||
110 | class StitcherShowIE(StitcherBaseIE): | |
111 | _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)' | |
112 | _TESTS = [{ | |
113 | 'url': 'http://www.stitcher.com/podcast/the-talking-machines', | |
114 | 'info_dict': { | |
115 | 'id': 'the-talking-machines', | |
116 | 'title': 'Talking Machines', | |
117 | 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b', | |
118 | }, | |
119 | 'playlist_mincount': 106, | |
120 | }, { | |
121 | 'url': 'https://www.stitcher.com/show/the-talking-machines', | |
122 | 'only_matching': True, | |
123 | }] | |
124 | ||
125 | def _real_extract(self, url): | |
126 | show_slug = self._match_id(url) | |
127 | data = self._call_api( | |
128 | f'search/show/{show_slug}/allEpisodes', show_slug, {'count': 10000}) | |
129 | show = try_get(data, lambda x: x['shows'][0], dict) or {} | |
130 | show_info = self._extract_show_info(show) | |
131 | ||
132 | entries = [] | |
133 | for episode in (data.get('episodes') or []): | |
134 | audio_url = self._extract_audio_url(episode) | |
135 | if not audio_url: | |
136 | continue | |
137 | entries.append(self._extract_episode(episode, audio_url, show_info)) | |
138 | ||
139 | return self.playlist_result( | |
140 | entries, show_slug, show.get('title'), | |
141 | self._extract_description(show)) |