]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/spreaker.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / spreaker.py
CommitLineData
38d70284 1import itertools
2
3from .common import InfoExtractor
38d70284 4from ..utils import (
5 float_or_none,
6 int_or_none,
7 str_or_none,
8 try_get,
9 unified_timestamp,
10 url_or_none,
11)
12
13
14def _extract_episode(data, episode_id=None):
15 title = data['title']
16 download_url = data['download_url']
17
add96eb9 18 series = try_get(data, lambda x: x['show']['title'], str)
19 uploader = try_get(data, lambda x: x['author']['fullname'], str)
38d70284 20
21 thumbnails = []
22 for image in ('image_original', 'image_medium', 'image'):
add96eb9 23 image_url = url_or_none(data.get(f'{image}_url'))
38d70284 24 if image_url:
25 thumbnails.append({'url': image_url})
26
27 def stats(key):
28 return int_or_none(try_get(
29 data,
add96eb9 30 (lambda x: x[f'{key}s_count'],
31 lambda x: x['stats'][f'{key}s'])))
38d70284 32
33 def duration(key):
34 return float_or_none(data.get(key), scale=1000)
35
36 return {
add96eb9 37 'id': str(episode_id or data['episode_id']),
38d70284 38 'url': download_url,
39 'display_id': data.get('permalink'),
40 'title': title,
41 'description': data.get('description'),
42 'timestamp': unified_timestamp(data.get('published_at')),
43 'uploader': uploader,
44 'uploader_id': str_or_none(data.get('author_id')),
45 'creator': uploader,
46 'duration': duration('duration') or duration('length'),
47 'view_count': stats('play'),
48 'like_count': stats('like'),
49 'comment_count': stats('message'),
50 'format': 'MPEG Layer 3',
51 'format_id': 'mp3',
52 'container': 'mp3',
53 'ext': 'mp3',
54 'thumbnails': thumbnails,
55 'series': series,
56 'extractor_key': SpreakerIE.ie_key(),
57 }
58
59
60class SpreakerIE(InfoExtractor):
61 _VALID_URL = r'''(?x)
62 https?://
63 api\.spreaker\.com/
64 (?:
65 (?:download/)?episode|
66 v2/episodes
67 )/
68 (?P<id>\d+)
69 '''
70 _TESTS = [{
71 'url': 'https://api.spreaker.com/episode/12534508',
72 'info_dict': {
73 'id': '12534508',
74 'display_id': 'swm-ep15-how-to-market-your-music-part-2',
75 'ext': 'mp3',
76 'title': 'EP:15 | Music Marketing (Likes) - Part 2',
77 'description': 'md5:0588c43e27be46423e183076fa071177',
78 'timestamp': 1502250336,
79 'upload_date': '20170809',
80 'uploader': 'SWM',
81 'uploader_id': '9780658',
82 'duration': 1063.42,
83 'view_count': int,
84 'like_count': int,
85 'comment_count': int,
86 'series': 'Success With Music (SWM)',
87 },
88 }, {
89 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
90 'only_matching': True,
91 }, {
92 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
93 'only_matching': True,
94 }]
95
96 def _real_extract(self, url):
97 episode_id = self._match_id(url)
98 data = self._download_json(
add96eb9 99 f'https://api.spreaker.com/v2/episodes/{episode_id}',
38d70284 100 episode_id)['response']['episode']
101 return _extract_episode(data, episode_id)
102
103
104class SpreakerPageIE(InfoExtractor):
105 _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
106 _TESTS = [{
107 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
108 'only_matching': True,
109 }]
110
111 def _real_extract(self, url):
112 display_id = self._match_id(url)
113 webpage = self._download_webpage(url, display_id)
114 episode_id = self._search_regex(
115 (r'data-episode_id=["\'](?P<id>\d+)',
116 r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
117 return self.url_result(
add96eb9 118 f'https://api.spreaker.com/episode/{episode_id}',
38d70284 119 ie=SpreakerIE.ie_key(), video_id=episode_id)
120
121
122class SpreakerShowIE(InfoExtractor):
123 _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
124 _TESTS = [{
a62cf342 125 'url': 'https://api.spreaker.com/show/4652058',
38d70284 126 'info_dict': {
127 'id': '4652058',
128 },
129 'playlist_mincount': 118,
130 }]
131
132 def _entries(self, show_id):
133 for page_num in itertools.count(1):
134 episodes = self._download_json(
add96eb9 135 f'https://api.spreaker.com/show/{show_id}/episodes',
136 show_id, note=f'Downloading JSON page {page_num}', query={
38d70284 137 'page': page_num,
138 'max_per_page': 100,
139 })
140 pager = try_get(episodes, lambda x: x['response']['pager'], dict)
141 if not pager:
142 break
143 results = pager.get('results')
144 if not results or not isinstance(results, list):
145 break
146 for result in results:
147 if not isinstance(result, dict):
148 continue
149 yield _extract_episode(result)
150 if page_num == pager.get('last_page'):
151 break
152
153 def _real_extract(self, url):
154 show_id = self._match_id(url)
155 return self.playlist_result(self._entries(show_id), playlist_id=show_id)
156
157
158class SpreakerShowPageIE(InfoExtractor):
159 _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
160 _TESTS = [{
161 'url': 'https://www.spreaker.com/show/success-with-music',
162 'only_matching': True,
163 }]
164
165 def _real_extract(self, url):
166 display_id = self._match_id(url)
167 webpage = self._download_webpage(url, display_id)
168 show_id = self._search_regex(
169 r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
170 return self.url_result(
add96eb9 171 f'https://api.spreaker.com/show/{show_id}',
38d70284 172 ie=SpreakerShowIE.ie_key(), video_id=show_id)