]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/radiokapital.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / radiokapital.py
1 import itertools
2 import urllib.parse
3
4 from .common import InfoExtractor
5 from ..utils import clean_html, traverse_obj, unescapeHTML
6
7
8 class RadioKapitalBaseIE(InfoExtractor):
9 def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
10 return self._download_json(
11 f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urllib.parse.urlencode(qs)}',
12 video_id, note=note)
13
14 def _parse_episode(self, data):
15 release = '{}{}{}'.format(data['published'][6:11], data['published'][3:6], data['published'][:3])
16 return {
17 '_type': 'url_transparent',
18 'url': data['mixcloud_url'],
19 'ie_key': 'Mixcloud',
20 'title': unescapeHTML(data['title']),
21 'description': clean_html(data.get('content')),
22 'tags': traverse_obj(data, ('tags', ..., 'name')),
23 'release_date': release,
24 'series': traverse_obj(data, ('show', 'title')),
25 }
26
27
28 class RadioKapitalIE(RadioKapitalBaseIE):
29 IE_NAME = 'radiokapital'
30 _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)'
31
32 _TESTS = [{
33 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial',
34 'info_dict': {
35 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20',
36 'ext': 'm4a',
37 'title': '#5: It’s okay to\xa0be\xa0immaterial',
38 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4',
39 'uploader': 'Radio Kapitał',
40 'uploader_id': 'radiokapital',
41 'timestamp': 1621640164,
42 'upload_date': '20210521',
43 },
44 }]
45
46 def _real_extract(self, url):
47 video_id = self._match_id(url)
48
49 episode = self._call_api(f'episodes/{video_id}', video_id)
50 return self._parse_episode(episode)
51
52
53 class RadioKapitalShowIE(RadioKapitalBaseIE):
54 IE_NAME = 'radiokapital:show'
55 _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])'
56
57 _TESTS = [{
58 'url': 'https://radiokapital.pl/shows/wesz',
59 'info_dict': {
60 'id': '100',
61 'title': 'WĘSZ',
62 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c',
63 },
64 'playlist_mincount': 17,
65 }]
66
67 def _get_episode_list(self, series_id, page_no):
68 return self._call_api(
69 'episodes', series_id,
70 f'Downloading episode list page #{page_no}', qs={
71 'show': series_id,
72 'page': page_no,
73 })
74
75 def _entries(self, series_id):
76 for page_no in itertools.count(1):
77 episode_list = self._get_episode_list(series_id, page_no)
78 yield from (self._parse_episode(ep) for ep in episode_list['items'])
79 if episode_list['next'] is None:
80 break
81
82 def _real_extract(self, url):
83 series_id = self._match_id(url)
84
85 show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata')
86 entries = self._entries(series_id)
87 return {
88 '_type': 'playlist',
89 'entries': entries,
90 'id': str(show['id']),
91 'title': show.get('title'),
92 'description': clean_html(show.get('content')),
93 }