1 from .common
import InfoExtractor
2 from ..compat
import compat_str
14 class StitcherBaseIE(InfoExtractor
):
15 _VALID_URL_BASE
= r
'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
17 def _call_api(self
, path
, video_id
, query
):
18 resp
= self
._download
_json
(
19 'https://api.prod.stitcher.com/' + path
,
20 video_id
, query
=query
)
21 error_massage
= try_get(resp
, lambda x
: x
['errors'][0]['message'])
23 raise ExtractorError(error_massage
, expected
=True)
26 def _extract_description(self
, data
):
27 return clean_html(data
.get('html_description') or data
.get('description'))
29 def _extract_audio_url(self
, episode
):
30 return url_or_none(episode
.get('audio_url') or episode
.get('guid'))
32 def _extract_show_info(self
, show
):
34 'thumbnail': show
.get('image_base_url'),
35 'series': show
.get('title'),
38 def _extract_episode(self
, episode
, audio_url
, show_info
):
40 'id': compat_str(episode
['id']),
41 'display_id': episode
.get('slug'),
42 'title': episode
['title'].strip(),
43 'description': self
._extract
_description
(episode
),
44 'duration': int_or_none(episode
.get('duration')),
45 'url': clean_podcast_url(audio_url
),
47 'timestamp': int_or_none(episode
.get('date_published')),
48 'season_number': int_or_none(episode
.get('season')),
49 'season_id': str_or_none(episode
.get('season_id')),
51 info
.update(show_info
)
55 class StitcherIE(StitcherBaseIE
):
56 _VALID_URL
= StitcherBaseIE
._VALID
_URL
_BASE
+ r
'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
58 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
59 'md5': 'e9635098e0da10b21a0e2b85585530f6',
63 'title': 'Machine Learning Mastery and Cancer Clusters',
64 'description': 'md5:547adb4081864be114ae3831b4c2b42f',
66 'thumbnail': r
're:^https?://.*\.jpg',
67 'upload_date': '20151008',
68 'timestamp': 1444285800,
69 'series': 'Talking Machines',
72 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
75 'display_id': 'the-rare-hourlong-comedy-plus',
77 'title': "The CW's 'Crazy Ex-Girlfriend'",
78 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
80 'thumbnail': r
're:^https?://.*\.jpg',
83 'skip_download': True,
85 'skip': 'Page Not Found',
88 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
89 'only_matching': True,
91 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
92 'only_matching': True,
94 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
95 'only_matching': True,
98 def _real_extract(self
, url
):
99 audio_id
= self
._match
_id
(url
)
100 data
= self
._call
_api
(
101 'shows/episodes', audio_id
, {'episode_ids': audio_id}
)
102 episode
= data
['episodes'][0]
103 audio_url
= self
._extract
_audio
_url
(episode
)
105 self
.raise_login_required()
106 show
= try_get(data
, lambda x
: x
['shows'][0], dict) or {}
107 return self
._extract
_episode
(
108 episode
, audio_url
, self
._extract
_show
_info
(show
))
111 class StitcherShowIE(StitcherBaseIE
):
112 _VALID_URL
= StitcherBaseIE
._VALID
_URL
_BASE
+ r
'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
114 'url': 'http://www.stitcher.com/podcast/the-talking-machines',
116 'id': 'the-talking-machines',
117 'title': 'Talking Machines',
118 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
120 'playlist_mincount': 106,
122 'url': 'https://www.stitcher.com/show/the-talking-machines',
123 'only_matching': True,
126 def _real_extract(self
, url
):
127 show_slug
= self
._match
_id
(url
)
128 data
= self
._call
_api
(
129 'search/show/%s/allEpisodes' % show_slug
, show_slug
, {'count': 10000}
)
130 show
= try_get(data
, lambda x
: x
['shows'][0], dict) or {}
131 show_info
= self
._extract
_show
_info
(show
)
134 for episode
in (data
.get('episodes') or []):
135 audio_url
= self
._extract
_audio
_url
(episode
)
138 entries
.append(self
._extract
_episode
(episode
, audio_url
, show_info
))
140 return self
.playlist_result(
141 entries
, show_slug
, show
.get('title'),
142 self
._extract
_description
(show
))