]>
Commit | Line | Data |
---|---|---|
3f771f75 LL |
1 | # coding: utf-8 |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | clean_html, | |
6 | traverse_obj, | |
7 | unescapeHTML, | |
8 | ) | |
9 | ||
10 | import itertools | |
11 | from urllib.parse import urlencode | |
12 | ||
13 | ||
14 | class RadioKapitalBaseIE(InfoExtractor): | |
15 | def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): | |
16 | return self._download_json( | |
17 | f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}', | |
18 | video_id, note=note) | |
19 | ||
20 | def _parse_episode(self, data): | |
21 | release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3]) | |
22 | return { | |
23 | '_type': 'url_transparent', | |
24 | 'url': data['mixcloud_url'], | |
25 | 'ie_key': 'Mixcloud', | |
26 | 'title': unescapeHTML(data['title']), | |
27 | 'description': clean_html(data.get('content')), | |
28 | 'tags': traverse_obj(data, ('tags', ..., 'name')), | |
29 | 'release_date': release, | |
30 | 'series': traverse_obj(data, ('show', 'title')), | |
31 | } | |
32 | ||
33 | ||
34 | class RadioKapitalIE(RadioKapitalBaseIE): | |
35 | IE_NAME = 'radiokapital' | |
36 | _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)' | |
37 | ||
38 | _TESTS = [{ | |
39 | 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial', | |
40 | 'info_dict': { | |
41 | 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20', | |
42 | 'ext': 'm4a', | |
43 | 'title': '#5: It’s okay to\xa0be\xa0immaterial', | |
44 | 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4', | |
45 | 'uploader': 'Radio Kapitał', | |
46 | 'uploader_id': 'radiokapital', | |
47 | 'timestamp': 1621640164, | |
48 | 'upload_date': '20210521', | |
49 | }, | |
50 | }] | |
51 | ||
52 | def _real_extract(self, url): | |
53 | video_id = self._match_id(url) | |
54 | ||
55 | episode = self._call_api('episodes/%s' % video_id, video_id) | |
56 | return self._parse_episode(episode) | |
57 | ||
58 | ||
59 | class RadioKapitalShowIE(RadioKapitalBaseIE): | |
60 | IE_NAME = 'radiokapital:show' | |
61 | _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])' | |
62 | ||
63 | _TESTS = [{ | |
64 | 'url': 'https://radiokapital.pl/shows/wesz', | |
65 | 'info_dict': { | |
66 | 'id': '100', | |
67 | 'title': 'WĘSZ', | |
68 | 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c', | |
69 | }, | |
70 | 'playlist_mincount': 17, | |
71 | }] | |
72 | ||
73 | def _get_episode_list(self, series_id, page_no): | |
74 | return self._call_api( | |
75 | 'episodes', series_id, | |
76 | f'Downloading episode list page #{page_no}', qs={ | |
77 | 'show': series_id, | |
78 | 'page': page_no, | |
79 | }) | |
80 | ||
81 | def _entries(self, series_id): | |
82 | for page_no in itertools.count(1): | |
83 | episode_list = self._get_episode_list(series_id, page_no) | |
84 | yield from (self._parse_episode(ep) for ep in episode_list['items']) | |
85 | if episode_list['next'] is None: | |
86 | break | |
87 | ||
88 | def _real_extract(self, url): | |
89 | series_id = self._match_id(url) | |
90 | ||
91 | show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata') | |
92 | entries = self._entries(series_id) | |
93 | return { | |
94 | '_type': 'playlist', | |
95 | 'entries': entries, | |
96 | 'id': str(show['id']), | |
97 | 'title': show.get('title'), | |
98 | 'description': clean_html(show.get('content')), | |
99 | } |