]>
Commit | Line | Data |
---|---|---|
4211c83a | 1 | from __future__ import unicode_literals |
7308b8cb | 2 | |
4211c83a | 3 | from .common import InfoExtractor |
605d299f | 4 | from ..compat import compat_str |
7308b8cb | 5 | from ..utils import ( |
29f7c58a | 6 | clean_html, |
605d299f | 7 | clean_podcast_url, |
29f7c58a | 8 | ExtractorError, |
7308b8cb | 9 | int_or_none, |
29f7c58a | 10 | str_or_none, |
11 | try_get, | |
605d299f | 12 | url_or_none, |
7308b8cb | 13 | ) |
4211c83a | 14 | |
15 | ||
605d299f | 16 | class StitcherBaseIE(InfoExtractor): |
17 | _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/' | |
18 | ||
19 | def _call_api(self, path, video_id, query): | |
20 | resp = self._download_json( | |
21 | 'https://api.prod.stitcher.com/' + path, | |
22 | video_id, query=query) | |
23 | error_massage = try_get(resp, lambda x: x['errors'][0]['message']) | |
24 | if error_massage: | |
25 | raise ExtractorError(error_massage, expected=True) | |
26 | return resp['data'] | |
27 | ||
28 | def _extract_description(self, data): | |
29 | return clean_html(data.get('html_description') or data.get('description')) | |
30 | ||
31 | def _extract_audio_url(self, episode): | |
32 | return url_or_none(episode.get('audio_url') or episode.get('guid')) | |
33 | ||
34 | def _extract_show_info(self, show): | |
35 | return { | |
36 | 'thumbnail': show.get('image_base_url'), | |
37 | 'series': show.get('title'), | |
38 | } | |
39 | ||
40 | def _extract_episode(self, episode, audio_url, show_info): | |
41 | info = { | |
42 | 'id': compat_str(episode['id']), | |
43 | 'display_id': episode.get('slug'), | |
44 | 'title': episode['title'].strip(), | |
45 | 'description': self._extract_description(episode), | |
46 | 'duration': int_or_none(episode.get('duration')), | |
47 | 'url': clean_podcast_url(audio_url), | |
48 | 'vcodec': 'none', | |
49 | 'timestamp': int_or_none(episode.get('date_published')), | |
50 | 'season_number': int_or_none(episode.get('season')), | |
51 | 'season_id': str_or_none(episode.get('season_id')), | |
52 | } | |
53 | info.update(show_info) | |
54 | return info | |
55 | ||
56 | ||
57 | class StitcherIE(StitcherBaseIE): | |
58 | _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)' | |
7308b8cb | 59 | _TESTS = [{ |
4211c83a | 60 | 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true', |
29f7c58a | 61 | 'md5': 'e9635098e0da10b21a0e2b85585530f6', |
4211c83a | 62 | 'info_dict': { |
63 | 'id': '40789481', | |
64 | 'ext': 'mp3', | |
7308b8cb | 65 | 'title': 'Machine Learning Mastery and Cancer Clusters', |
29f7c58a | 66 | 'description': 'md5:547adb4081864be114ae3831b4c2b42f', |
7308b8cb | 67 | 'duration': 1604, |
ec85ded8 | 68 | 'thumbnail': r're:^https?://.*\.jpg', |
605d299f | 69 | 'upload_date': '20151008', |
70 | 'timestamp': 1444285800, | |
71 | 'series': 'Talking Machines', | |
7308b8cb S |
72 | }, |
73 | }, { | |
74 | 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true', | |
75 | 'info_dict': { | |
76 | 'id': '40846275', | |
77 | 'display_id': 'the-rare-hourlong-comedy-plus', | |
78 | 'ext': 'mp3', | |
79 | 'title': "The CW's 'Crazy Ex-Girlfriend'", | |
80 | 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17', | |
81 | 'duration': 2235, | |
ec85ded8 | 82 | 'thumbnail': r're:^https?://.*\.jpg', |
7308b8cb S |
83 | }, |
84 | 'params': { | |
85 | 'skip_download': True, | |
86 | }, | |
29f7c58a | 87 | 'skip': 'Page Not Found', |
7308b8cb S |
88 | }, { |
89 | # escaped title | |
90 | 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true', | |
91 | 'only_matching': True, | |
92 | }, { | |
93 | 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true', | |
94 | 'only_matching': True, | |
29f7c58a | 95 | }, { |
96 | 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584', | |
97 | 'only_matching': True, | |
7308b8cb | 98 | }] |
4211c83a | 99 | |
100 | def _real_extract(self, url): | |
605d299f | 101 | audio_id = self._match_id(url) |
102 | data = self._call_api( | |
103 | 'shows/episodes', audio_id, {'episode_ids': audio_id}) | |
104 | episode = data['episodes'][0] | |
105 | audio_url = self._extract_audio_url(episode) | |
106 | if not audio_url: | |
107 | self.raise_login_required() | |
108 | show = try_get(data, lambda x: x['shows'][0], dict) or {} | |
109 | return self._extract_episode( | |
110 | episode, audio_url, self._extract_show_info(show)) | |
4211c83a | 111 | |
4211c83a | 112 | |
605d299f | 113 | class StitcherShowIE(StitcherBaseIE): |
114 | _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)' | |
115 | _TESTS = [{ | |
116 | 'url': 'http://www.stitcher.com/podcast/the-talking-machines', | |
117 | 'info_dict': { | |
118 | 'id': 'the-talking-machines', | |
119 | 'title': 'Talking Machines', | |
120 | 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b', | |
121 | }, | |
122 | 'playlist_mincount': 106, | |
123 | }, { | |
124 | 'url': 'https://www.stitcher.com/show/the-talking-machines', | |
125 | 'only_matching': True, | |
126 | }] | |
7308b8cb | 127 | |
605d299f | 128 | def _real_extract(self, url): |
129 | show_slug = self._match_id(url) | |
130 | data = self._call_api( | |
131 | 'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000}) | |
132 | show = try_get(data, lambda x: x['shows'][0], dict) or {} | |
133 | show_info = self._extract_show_info(show) | |
4211c83a | 134 | |
605d299f | 135 | entries = [] |
136 | for episode in (data.get('episodes') or []): | |
137 | audio_url = self._extract_audio_url(episode) | |
138 | if not audio_url: | |
139 | continue | |
140 | entries.append(self._extract_episode(episode, audio_url, show_info)) | |
141 | ||
142 | return self.playlist_result( | |
143 | entries, show_slug, show.get('title'), | |
144 | self._extract_description(show)) |