]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/spreaker.py
[utils] Better traceback for `ExtractorError`
[yt-dlp.git] / yt_dlp / extractor / spreaker.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 float_or_none,
10 int_or_none,
11 str_or_none,
12 try_get,
13 unified_timestamp,
14 url_or_none,
15 )
16
17
18 def _extract_episode(data, episode_id=None):
19 title = data['title']
20 download_url = data['download_url']
21
22 series = try_get(data, lambda x: x['show']['title'], compat_str)
23 uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
24
25 thumbnails = []
26 for image in ('image_original', 'image_medium', 'image'):
27 image_url = url_or_none(data.get('%s_url' % image))
28 if image_url:
29 thumbnails.append({'url': image_url})
30
31 def stats(key):
32 return int_or_none(try_get(
33 data,
34 (lambda x: x['%ss_count' % key],
35 lambda x: x['stats']['%ss' % key])))
36
37 def duration(key):
38 return float_or_none(data.get(key), scale=1000)
39
40 return {
41 'id': compat_str(episode_id or data['episode_id']),
42 'url': download_url,
43 'display_id': data.get('permalink'),
44 'title': title,
45 'description': data.get('description'),
46 'timestamp': unified_timestamp(data.get('published_at')),
47 'uploader': uploader,
48 'uploader_id': str_or_none(data.get('author_id')),
49 'creator': uploader,
50 'duration': duration('duration') or duration('length'),
51 'view_count': stats('play'),
52 'like_count': stats('like'),
53 'comment_count': stats('message'),
54 'format': 'MPEG Layer 3',
55 'format_id': 'mp3',
56 'container': 'mp3',
57 'ext': 'mp3',
58 'thumbnails': thumbnails,
59 'series': series,
60 'extractor_key': SpreakerIE.ie_key(),
61 }
62
63
64 class SpreakerIE(InfoExtractor):
65 _VALID_URL = r'''(?x)
66 https?://
67 api\.spreaker\.com/
68 (?:
69 (?:download/)?episode|
70 v2/episodes
71 )/
72 (?P<id>\d+)
73 '''
74 _TESTS = [{
75 'url': 'https://api.spreaker.com/episode/12534508',
76 'info_dict': {
77 'id': '12534508',
78 'display_id': 'swm-ep15-how-to-market-your-music-part-2',
79 'ext': 'mp3',
80 'title': 'EP:15 | Music Marketing (Likes) - Part 2',
81 'description': 'md5:0588c43e27be46423e183076fa071177',
82 'timestamp': 1502250336,
83 'upload_date': '20170809',
84 'uploader': 'SWM',
85 'uploader_id': '9780658',
86 'duration': 1063.42,
87 'view_count': int,
88 'like_count': int,
89 'comment_count': int,
90 'series': 'Success With Music (SWM)',
91 },
92 }, {
93 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
94 'only_matching': True,
95 }, {
96 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
97 'only_matching': True,
98 }]
99
100 def _real_extract(self, url):
101 episode_id = self._match_id(url)
102 data = self._download_json(
103 'https://api.spreaker.com/v2/episodes/%s' % episode_id,
104 episode_id)['response']['episode']
105 return _extract_episode(data, episode_id)
106
107
108 class SpreakerPageIE(InfoExtractor):
109 _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
110 _TESTS = [{
111 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
112 'only_matching': True,
113 }]
114
115 def _real_extract(self, url):
116 display_id = self._match_id(url)
117 webpage = self._download_webpage(url, display_id)
118 episode_id = self._search_regex(
119 (r'data-episode_id=["\'](?P<id>\d+)',
120 r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
121 return self.url_result(
122 'https://api.spreaker.com/episode/%s' % episode_id,
123 ie=SpreakerIE.ie_key(), video_id=episode_id)
124
125
126 class SpreakerShowIE(InfoExtractor):
127 _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
128 _TESTS = [{
129 'url': 'https://api.spreaker.com/show/4652058',
130 'info_dict': {
131 'id': '4652058',
132 },
133 'playlist_mincount': 118,
134 }]
135
136 def _entries(self, show_id):
137 for page_num in itertools.count(1):
138 episodes = self._download_json(
139 'https://api.spreaker.com/show/%s/episodes' % show_id,
140 show_id, note='Downloading JSON page %d' % page_num, query={
141 'page': page_num,
142 'max_per_page': 100,
143 })
144 pager = try_get(episodes, lambda x: x['response']['pager'], dict)
145 if not pager:
146 break
147 results = pager.get('results')
148 if not results or not isinstance(results, list):
149 break
150 for result in results:
151 if not isinstance(result, dict):
152 continue
153 yield _extract_episode(result)
154 if page_num == pager.get('last_page'):
155 break
156
157 def _real_extract(self, url):
158 show_id = self._match_id(url)
159 return self.playlist_result(self._entries(show_id), playlist_id=show_id)
160
161
162 class SpreakerShowPageIE(InfoExtractor):
163 _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
164 _TESTS = [{
165 'url': 'https://www.spreaker.com/show/success-with-music',
166 'only_matching': True,
167 }]
168
169 def _real_extract(self, url):
170 display_id = self._match_id(url)
171 webpage = self._download_webpage(url, display_id)
172 show_id = self._search_regex(
173 r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
174 return self.url_result(
175 'https://api.spreaker.com/show/%s' % show_id,
176 ie=SpreakerShowIE.ie_key(), video_id=show_id)