]>
Commit | Line | Data |
---|---|---|
1 | import itertools | |
2 | import urllib.parse | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import clean_html, traverse_obj, unescapeHTML | |
6 | ||
7 | ||
8 | class RadioKapitalBaseIE(InfoExtractor): | |
9 | def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): | |
10 | return self._download_json( | |
11 | f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urllib.parse.urlencode(qs)}', | |
12 | video_id, note=note) | |
13 | ||
14 | def _parse_episode(self, data): | |
15 | release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3]) | |
16 | return { | |
17 | '_type': 'url_transparent', | |
18 | 'url': data['mixcloud_url'], | |
19 | 'ie_key': 'Mixcloud', | |
20 | 'title': unescapeHTML(data['title']), | |
21 | 'description': clean_html(data.get('content')), | |
22 | 'tags': traverse_obj(data, ('tags', ..., 'name')), | |
23 | 'release_date': release, | |
24 | 'series': traverse_obj(data, ('show', 'title')), | |
25 | } | |
26 | ||
27 | ||
28 | class RadioKapitalIE(RadioKapitalBaseIE): | |
29 | IE_NAME = 'radiokapital' | |
30 | _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)' | |
31 | ||
32 | _TESTS = [{ | |
33 | 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial', | |
34 | 'info_dict': { | |
35 | 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20', | |
36 | 'ext': 'm4a', | |
37 | 'title': '#5: It’s okay to\xa0be\xa0immaterial', | |
38 | 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4', | |
39 | 'uploader': 'Radio Kapitał', | |
40 | 'uploader_id': 'radiokapital', | |
41 | 'timestamp': 1621640164, | |
42 | 'upload_date': '20210521', | |
43 | }, | |
44 | }] | |
45 | ||
46 | def _real_extract(self, url): | |
47 | video_id = self._match_id(url) | |
48 | ||
49 | episode = self._call_api('episodes/%s' % video_id, video_id) | |
50 | return self._parse_episode(episode) | |
51 | ||
52 | ||
53 | class RadioKapitalShowIE(RadioKapitalBaseIE): | |
54 | IE_NAME = 'radiokapital:show' | |
55 | _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])' | |
56 | ||
57 | _TESTS = [{ | |
58 | 'url': 'https://radiokapital.pl/shows/wesz', | |
59 | 'info_dict': { | |
60 | 'id': '100', | |
61 | 'title': 'WĘSZ', | |
62 | 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c', | |
63 | }, | |
64 | 'playlist_mincount': 17, | |
65 | }] | |
66 | ||
67 | def _get_episode_list(self, series_id, page_no): | |
68 | return self._call_api( | |
69 | 'episodes', series_id, | |
70 | f'Downloading episode list page #{page_no}', qs={ | |
71 | 'show': series_id, | |
72 | 'page': page_no, | |
73 | }) | |
74 | ||
75 | def _entries(self, series_id): | |
76 | for page_no in itertools.count(1): | |
77 | episode_list = self._get_episode_list(series_id, page_no) | |
78 | yield from (self._parse_episode(ep) for ep in episode_list['items']) | |
79 | if episode_list['next'] is None: | |
80 | break | |
81 | ||
82 | def _real_extract(self, url): | |
83 | series_id = self._match_id(url) | |
84 | ||
85 | show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata') | |
86 | entries = self._entries(series_id) | |
87 | return { | |
88 | '_type': 'playlist', | |
89 | 'entries': entries, | |
90 | 'id': str(show['id']), | |
91 | 'title': show.get('title'), | |
92 | 'description': clean_html(show.get('content')), | |
93 | } |