]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..compat import compat_str | |
3 | from ..utils import ( | |
4 | clean_html, | |
5 | clean_podcast_url, | |
6 | ExtractorError, | |
7 | int_or_none, | |
8 | str_or_none, | |
9 | try_get, | |
10 | url_or_none, | |
11 | ) | |
12 | ||
13 | ||
14 | class StitcherBaseIE(InfoExtractor): | |
15 | _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/' | |
16 | ||
17 | def _call_api(self, path, video_id, query): | |
18 | resp = self._download_json( | |
19 | 'https://api.prod.stitcher.com/' + path, | |
20 | video_id, query=query) | |
21 | error_massage = try_get(resp, lambda x: x['errors'][0]['message']) | |
22 | if error_massage: | |
23 | raise ExtractorError(error_massage, expected=True) | |
24 | return resp['data'] | |
25 | ||
26 | def _extract_description(self, data): | |
27 | return clean_html(data.get('html_description') or data.get('description')) | |
28 | ||
29 | def _extract_audio_url(self, episode): | |
30 | return url_or_none(episode.get('audio_url') or episode.get('guid')) | |
31 | ||
32 | def _extract_show_info(self, show): | |
33 | return { | |
34 | 'thumbnail': show.get('image_base_url'), | |
35 | 'series': show.get('title'), | |
36 | } | |
37 | ||
38 | def _extract_episode(self, episode, audio_url, show_info): | |
39 | info = { | |
40 | 'id': compat_str(episode['id']), | |
41 | 'display_id': episode.get('slug'), | |
42 | 'title': episode['title'].strip(), | |
43 | 'description': self._extract_description(episode), | |
44 | 'duration': int_or_none(episode.get('duration')), | |
45 | 'url': clean_podcast_url(audio_url), | |
46 | 'vcodec': 'none', | |
47 | 'timestamp': int_or_none(episode.get('date_published')), | |
48 | 'season_number': int_or_none(episode.get('season')), | |
49 | 'season_id': str_or_none(episode.get('season_id')), | |
50 | } | |
51 | info.update(show_info) | |
52 | return info | |
53 | ||
54 | ||
55 | class StitcherIE(StitcherBaseIE): | |
56 | _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)' | |
57 | _TESTS = [{ | |
58 | 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true', | |
59 | 'md5': 'e9635098e0da10b21a0e2b85585530f6', | |
60 | 'info_dict': { | |
61 | 'id': '40789481', | |
62 | 'ext': 'mp3', | |
63 | 'title': 'Machine Learning Mastery and Cancer Clusters', | |
64 | 'description': 'md5:547adb4081864be114ae3831b4c2b42f', | |
65 | 'duration': 1604, | |
66 | 'thumbnail': r're:^https?://.*\.jpg', | |
67 | 'upload_date': '20151008', | |
68 | 'timestamp': 1444285800, | |
69 | 'series': 'Talking Machines', | |
70 | }, | |
71 | }, { | |
72 | 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true', | |
73 | 'info_dict': { | |
74 | 'id': '40846275', | |
75 | 'display_id': 'the-rare-hourlong-comedy-plus', | |
76 | 'ext': 'mp3', | |
77 | 'title': "The CW's 'Crazy Ex-Girlfriend'", | |
78 | 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17', | |
79 | 'duration': 2235, | |
80 | 'thumbnail': r're:^https?://.*\.jpg', | |
81 | }, | |
82 | 'params': { | |
83 | 'skip_download': True, | |
84 | }, | |
85 | 'skip': 'Page Not Found', | |
86 | }, { | |
87 | # escaped title | |
88 | 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true', | |
89 | 'only_matching': True, | |
90 | }, { | |
91 | 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true', | |
92 | 'only_matching': True, | |
93 | }, { | |
94 | 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584', | |
95 | 'only_matching': True, | |
96 | }] | |
97 | ||
98 | def _real_extract(self, url): | |
99 | audio_id = self._match_id(url) | |
100 | data = self._call_api( | |
101 | 'shows/episodes', audio_id, {'episode_ids': audio_id}) | |
102 | episode = data['episodes'][0] | |
103 | audio_url = self._extract_audio_url(episode) | |
104 | if not audio_url: | |
105 | self.raise_login_required() | |
106 | show = try_get(data, lambda x: x['shows'][0], dict) or {} | |
107 | return self._extract_episode( | |
108 | episode, audio_url, self._extract_show_info(show)) | |
109 | ||
110 | ||
111 | class StitcherShowIE(StitcherBaseIE): | |
112 | _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)' | |
113 | _TESTS = [{ | |
114 | 'url': 'http://www.stitcher.com/podcast/the-talking-machines', | |
115 | 'info_dict': { | |
116 | 'id': 'the-talking-machines', | |
117 | 'title': 'Talking Machines', | |
118 | 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b', | |
119 | }, | |
120 | 'playlist_mincount': 106, | |
121 | }, { | |
122 | 'url': 'https://www.stitcher.com/show/the-talking-machines', | |
123 | 'only_matching': True, | |
124 | }] | |
125 | ||
126 | def _real_extract(self, url): | |
127 | show_slug = self._match_id(url) | |
128 | data = self._call_api( | |
129 | 'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000}) | |
130 | show = try_get(data, lambda x: x['shows'][0], dict) or {} | |
131 | show_info = self._extract_show_info(show) | |
132 | ||
133 | entries = [] | |
134 | for episode in (data.get('episodes') or []): | |
135 | audio_url = self._extract_audio_url(episode) | |
136 | if not audio_url: | |
137 | continue | |
138 | entries.append(self._extract_episode(episode, audio_url, show_info)) | |
139 | ||
140 | return self.playlist_result( | |
141 | entries, show_slug, show.get('title'), | |
142 | self._extract_description(show)) |