]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/stitcher.py
[adobepass] Add MSO Sling TV (#596)
[yt-dlp.git] / yt_dlp / extractor / stitcher.py
CommitLineData
4211c83a 1from __future__ import unicode_literals
7308b8cb 2
4211c83a 3from .common import InfoExtractor
605d299f 4from ..compat import compat_str
7308b8cb 5from ..utils import (
29f7c58a 6 clean_html,
605d299f 7 clean_podcast_url,
29f7c58a 8 ExtractorError,
7308b8cb 9 int_or_none,
29f7c58a 10 str_or_none,
11 try_get,
605d299f 12 url_or_none,
7308b8cb 13)
4211c83a 14
15
605d299f 16class StitcherBaseIE(InfoExtractor):
17 _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
18
19 def _call_api(self, path, video_id, query):
20 resp = self._download_json(
21 'https://api.prod.stitcher.com/' + path,
22 video_id, query=query)
23 error_massage = try_get(resp, lambda x: x['errors'][0]['message'])
24 if error_massage:
25 raise ExtractorError(error_massage, expected=True)
26 return resp['data']
27
28 def _extract_description(self, data):
29 return clean_html(data.get('html_description') or data.get('description'))
30
31 def _extract_audio_url(self, episode):
32 return url_or_none(episode.get('audio_url') or episode.get('guid'))
33
34 def _extract_show_info(self, show):
35 return {
36 'thumbnail': show.get('image_base_url'),
37 'series': show.get('title'),
38 }
39
40 def _extract_episode(self, episode, audio_url, show_info):
41 info = {
42 'id': compat_str(episode['id']),
43 'display_id': episode.get('slug'),
44 'title': episode['title'].strip(),
45 'description': self._extract_description(episode),
46 'duration': int_or_none(episode.get('duration')),
47 'url': clean_podcast_url(audio_url),
48 'vcodec': 'none',
49 'timestamp': int_or_none(episode.get('date_published')),
50 'season_number': int_or_none(episode.get('season')),
51 'season_id': str_or_none(episode.get('season_id')),
52 }
53 info.update(show_info)
54 return info
55
56
57class StitcherIE(StitcherBaseIE):
58 _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
7308b8cb 59 _TESTS = [{
4211c83a 60 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
29f7c58a 61 'md5': 'e9635098e0da10b21a0e2b85585530f6',
4211c83a 62 'info_dict': {
63 'id': '40789481',
64 'ext': 'mp3',
7308b8cb 65 'title': 'Machine Learning Mastery and Cancer Clusters',
29f7c58a 66 'description': 'md5:547adb4081864be114ae3831b4c2b42f',
7308b8cb 67 'duration': 1604,
ec85ded8 68 'thumbnail': r're:^https?://.*\.jpg',
605d299f 69 'upload_date': '20151008',
70 'timestamp': 1444285800,
71 'series': 'Talking Machines',
7308b8cb
S
72 },
73 }, {
74 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
75 'info_dict': {
76 'id': '40846275',
77 'display_id': 'the-rare-hourlong-comedy-plus',
78 'ext': 'mp3',
79 'title': "The CW's 'Crazy Ex-Girlfriend'",
80 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
81 'duration': 2235,
ec85ded8 82 'thumbnail': r're:^https?://.*\.jpg',
7308b8cb
S
83 },
84 'params': {
85 'skip_download': True,
86 },
29f7c58a 87 'skip': 'Page Not Found',
7308b8cb
S
88 }, {
89 # escaped title
90 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
91 'only_matching': True,
92 }, {
93 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
94 'only_matching': True,
29f7c58a 95 }, {
96 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
97 'only_matching': True,
7308b8cb 98 }]
4211c83a 99
100 def _real_extract(self, url):
605d299f 101 audio_id = self._match_id(url)
102 data = self._call_api(
103 'shows/episodes', audio_id, {'episode_ids': audio_id})
104 episode = data['episodes'][0]
105 audio_url = self._extract_audio_url(episode)
106 if not audio_url:
107 self.raise_login_required()
108 show = try_get(data, lambda x: x['shows'][0], dict) or {}
109 return self._extract_episode(
110 episode, audio_url, self._extract_show_info(show))
4211c83a 111
4211c83a 112
605d299f 113class StitcherShowIE(StitcherBaseIE):
114 _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
115 _TESTS = [{
116 'url': 'http://www.stitcher.com/podcast/the-talking-machines',
117 'info_dict': {
118 'id': 'the-talking-machines',
119 'title': 'Talking Machines',
120 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
121 },
122 'playlist_mincount': 106,
123 }, {
124 'url': 'https://www.stitcher.com/show/the-talking-machines',
125 'only_matching': True,
126 }]
7308b8cb 127
605d299f 128 def _real_extract(self, url):
129 show_slug = self._match_id(url)
130 data = self._call_api(
131 'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000})
132 show = try_get(data, lambda x: x['shows'][0], dict) or {}
133 show_info = self._extract_show_info(show)
4211c83a 134
605d299f 135 entries = []
136 for episode in (data.get('episodes') or []):
137 audio_url = self._extract_audio_url(episode)
138 if not audio_url:
139 continue
140 entries.append(self._extract_episode(episode, audio_url, show_info))
141
142 return self.playlist_result(
143 entries, show_slug, show.get('title'),
144 self._extract_description(show))