]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/stitcher.py
[cleanup] Revert unnecessary changes in 51d9739f8031fb37d8e25b0e9f1abea561e3d2e3
[yt-dlp.git] / yt_dlp / extractor / stitcher.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..compat import compat_str
5 from ..utils import (
6 clean_html,
7 clean_podcast_url,
8 ExtractorError,
9 int_or_none,
10 str_or_none,
11 try_get,
12 url_or_none,
13 )
14
15
16 class StitcherBaseIE(InfoExtractor):
17 _VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
18
19 def _call_api(self, path, video_id, query):
20 resp = self._download_json(
21 'https://api.prod.stitcher.com/' + path,
22 video_id, query=query)
23 error_massage = try_get(resp, lambda x: x['errors'][0]['message'])
24 if error_massage:
25 raise ExtractorError(error_massage, expected=True)
26 return resp['data']
27
28 def _extract_description(self, data):
29 return clean_html(data.get('html_description') or data.get('description'))
30
31 def _extract_audio_url(self, episode):
32 return url_or_none(episode.get('audio_url') or episode.get('guid'))
33
34 def _extract_show_info(self, show):
35 return {
36 'thumbnail': show.get('image_base_url'),
37 'series': show.get('title'),
38 }
39
40 def _extract_episode(self, episode, audio_url, show_info):
41 info = {
42 'id': compat_str(episode['id']),
43 'display_id': episode.get('slug'),
44 'title': episode['title'].strip(),
45 'description': self._extract_description(episode),
46 'duration': int_or_none(episode.get('duration')),
47 'url': clean_podcast_url(audio_url),
48 'vcodec': 'none',
49 'timestamp': int_or_none(episode.get('date_published')),
50 'season_number': int_or_none(episode.get('season')),
51 'season_id': str_or_none(episode.get('season_id')),
52 }
53 info.update(show_info)
54 return info
55
56
57 class StitcherIE(StitcherBaseIE):
58 _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
59 _TESTS = [{
60 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
61 'md5': 'e9635098e0da10b21a0e2b85585530f6',
62 'info_dict': {
63 'id': '40789481',
64 'ext': 'mp3',
65 'title': 'Machine Learning Mastery and Cancer Clusters',
66 'description': 'md5:547adb4081864be114ae3831b4c2b42f',
67 'duration': 1604,
68 'thumbnail': r're:^https?://.*\.jpg',
69 'upload_date': '20151008',
70 'timestamp': 1444285800,
71 'series': 'Talking Machines',
72 },
73 }, {
74 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
75 'info_dict': {
76 'id': '40846275',
77 'display_id': 'the-rare-hourlong-comedy-plus',
78 'ext': 'mp3',
79 'title': "The CW's 'Crazy Ex-Girlfriend'",
80 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
81 'duration': 2235,
82 'thumbnail': r're:^https?://.*\.jpg',
83 },
84 'params': {
85 'skip_download': True,
86 },
87 'skip': 'Page Not Found',
88 }, {
89 # escaped title
90 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
91 'only_matching': True,
92 }, {
93 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
94 'only_matching': True,
95 }, {
96 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
97 'only_matching': True,
98 }]
99
100 def _real_extract(self, url):
101 audio_id = self._match_id(url)
102 data = self._call_api(
103 'shows/episodes', audio_id, {'episode_ids': audio_id})
104 episode = data['episodes'][0]
105 audio_url = self._extract_audio_url(episode)
106 if not audio_url:
107 self.raise_login_required()
108 show = try_get(data, lambda x: x['shows'][0], dict) or {}
109 return self._extract_episode(
110 episode, audio_url, self._extract_show_info(show))
111
112
113 class StitcherShowIE(StitcherBaseIE):
114 _VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
115 _TESTS = [{
116 'url': 'http://www.stitcher.com/podcast/the-talking-machines',
117 'info_dict': {
118 'id': 'the-talking-machines',
119 'title': 'Talking Machines',
120 'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
121 },
122 'playlist_mincount': 106,
123 }, {
124 'url': 'https://www.stitcher.com/show/the-talking-machines',
125 'only_matching': True,
126 }]
127
128 def _real_extract(self, url):
129 show_slug = self._match_id(url)
130 data = self._call_api(
131 'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000})
132 show = try_get(data, lambda x: x['shows'][0], dict) or {}
133 show_info = self._extract_show_info(show)
134
135 entries = []
136 for episode in (data.get('episodes') or []):
137 audio_url = self._extract_audio_url(episode)
138 if not audio_url:
139 continue
140 entries.append(self._extract_episode(episode, audio_url, show_info))
141
142 return self.playlist_result(
143 entries, show_slug, show.get('title'),
144 self._extract_description(show))