1 from __future__
import unicode_literals
3 from .common
import InfoExtractor
4 from ..compat
import compat_str
16 class StitcherBaseIE(InfoExtractor
):
17 _VALID_URL_BASE
= r
'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
19 def _call_api(self
, path
, video_id
, query
):
20 resp
= self
._download
_json
(
21 'https://api.prod.stitcher.com/' + path
,
22 video_id
, query
=query
)
23 error_massage
= try_get(resp
, lambda x
: x
['errors'][0]['message'])
25 raise ExtractorError(error_massage
, expected
=True)
28 def _extract_description(self
, data
):
29 return clean_html(data
.get('html_description') or data
.get('description'))
31 def _extract_audio_url(self
, episode
):
32 return url_or_none(episode
.get('audio_url') or episode
.get('guid'))
34 def _extract_show_info(self
, show
):
36 'thumbnail': show
.get('image_base_url'),
37 'series': show
.get('title'),
40 def _extract_episode(self
, episode
, audio_url
, show_info
):
42 'id': compat_str(episode
['id']),
43 'display_id': episode
.get('slug'),
44 'title': episode
['title'].strip(),
45 'description': self
._extract
_description
(episode
),
46 'duration': int_or_none(episode
.get('duration')),
47 'url': clean_podcast_url(audio_url
),
49 'timestamp': int_or_none(episode
.get('date_published')),
50 'season_number': int_or_none(episode
.get('season')),
51 'season_id': str_or_none(episode
.get('season_id')),
53 info
.update(show_info
)
57 class StitcherIE(StitcherBaseIE
):
58 _VALID_URL
= StitcherBaseIE
._VALID
_URL
_BASE
+ r
'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
60 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
61 'md5': 'e9635098e0da10b21a0e2b85585530f6',
65 'title': 'Machine Learning Mastery and Cancer Clusters',
66 'description': 'md5:547adb4081864be114ae3831b4c2b42f',
68 'thumbnail': r
're:^https?://.*\.jpg',
69 'upload_date': '20151008',
70 'timestamp': 1444285800,
71 'series': 'Talking Machines',
74 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
77 'display_id': 'the-rare-hourlong-comedy-plus',
79 'title': "The CW's 'Crazy Ex-Girlfriend'",
80 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
82 'thumbnail': r
're:^https?://.*\.jpg',
85 'skip_download': True,
87 'skip': 'Page Not Found',
90 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
91 'only_matching': True,
93 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
94 'only_matching': True,
96 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
97 'only_matching': True,
100 def _real_extract(self
, url
):
101 audio_id
= self
._match
_id
(url
)
102 data
= self
._call
_api
(
103 'shows/episodes', audio_id
, {'episode_ids': audio_id}
)
104 episode
= data
['episodes'][0]
105 audio_url
= self
._extract
_audio
_url
(episode
)
107 self
.raise_login_required()
108 show
= try_get(data
, lambda x
: x
['shows'][0], dict) or {}
109 return self
._extract
_episode
(
110 episode
, audio_url
, self
._extract
_show
_info
(show
))
113 class StitcherShowIE(StitcherBaseIE
):
114 _VALID_URL
= StitcherBaseIE
._VALID
_URL
_BASE
+ r
'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
116 'url': 'http://www.stitcher.com/podcast/the-talking-machines',
118 'id': 'the-talking-machines',
119 'title': 'Talking Machines',
120 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
122 'playlist_mincount': 106,
124 'url': 'https://www.stitcher.com/show/the-talking-machines',
125 'only_matching': True,
128 def _real_extract(self
, url
):
129 show_slug
= self
._match
_id
(url
)
130 data
= self
._call
_api
(
131 'search/show/%s/allEpisodes' % show_slug
, show_slug
, {'count': 10000}
)
132 show
= try_get(data
, lambda x
: x
['shows'][0], dict) or {}
133 show_info
= self
._extract
_show
_info
(show
)
136 for episode
in (data
.get('episodes') or []):
137 audio_url
= self
._extract
_audio
_url
(episode
)
140 entries
.append(self
._extract
_episode
(episode
, audio_url
, show_info
))
142 return self
.playlist_result(
143 entries
, show_slug
, show
.get('title'),
144 self
._extract
_description
(show
))