]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/radiokapital.py
[radiokapital] Add extractors (#1401)
[yt-dlp.git] / yt_dlp / extractor / radiokapital.py
CommitLineData
3f771f75
LL
1# coding: utf-8
2
3from .common import InfoExtractor
4from ..utils import (
5 clean_html,
6 traverse_obj,
7 unescapeHTML,
8)
9
10import itertools
11from urllib.parse import urlencode
12
13
14class RadioKapitalBaseIE(InfoExtractor):
15 def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
16 return self._download_json(
17 f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}',
18 video_id, note=note)
19
20 def _parse_episode(self, data):
21 release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3])
22 return {
23 '_type': 'url_transparent',
24 'url': data['mixcloud_url'],
25 'ie_key': 'Mixcloud',
26 'title': unescapeHTML(data['title']),
27 'description': clean_html(data.get('content')),
28 'tags': traverse_obj(data, ('tags', ..., 'name')),
29 'release_date': release,
30 'series': traverse_obj(data, ('show', 'title')),
31 }
32
33
34class RadioKapitalIE(RadioKapitalBaseIE):
35 IE_NAME = 'radiokapital'
36 _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)'
37
38 _TESTS = [{
39 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial',
40 'info_dict': {
41 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20',
42 'ext': 'm4a',
43 'title': '#5: It’s okay to\xa0be\xa0immaterial',
44 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4',
45 'uploader': 'Radio Kapitał',
46 'uploader_id': 'radiokapital',
47 'timestamp': 1621640164,
48 'upload_date': '20210521',
49 },
50 }]
51
52 def _real_extract(self, url):
53 video_id = self._match_id(url)
54
55 episode = self._call_api('episodes/%s' % video_id, video_id)
56 return self._parse_episode(episode)
57
58
59class RadioKapitalShowIE(RadioKapitalBaseIE):
60 IE_NAME = 'radiokapital:show'
61 _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])'
62
63 _TESTS = [{
64 'url': 'https://radiokapital.pl/shows/wesz',
65 'info_dict': {
66 'id': '100',
67 'title': 'WĘSZ',
68 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c',
69 },
70 'playlist_mincount': 17,
71 }]
72
73 def _get_episode_list(self, series_id, page_no):
74 return self._call_api(
75 'episodes', series_id,
76 f'Downloading episode list page #{page_no}', qs={
77 'show': series_id,
78 'page': page_no,
79 })
80
81 def _entries(self, series_id):
82 for page_no in itertools.count(1):
83 episode_list = self._get_episode_list(series_id, page_no)
84 yield from (self._parse_episode(ep) for ep in episode_list['items'])
85 if episode_list['next'] is None:
86 break
87
88 def _real_extract(self, url):
89 series_id = self._match_id(url)
90
91 show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata')
92 entries = self._entries(series_id)
93 return {
94 '_type': 'playlist',
95 'entries': entries,
96 'id': str(show['id']),
97 'title': show.get('title'),
98 'description': clean_html(show.get('content')),
99 }