]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/radiokapital.py
[ie/nytimes] Overhaul extractors (#9075)
[yt-dlp.git] / yt_dlp / extractor / radiokapital.py
CommitLineData
3f771f75
LL
1from .common import InfoExtractor
2from ..utils import (
3 clean_html,
4 traverse_obj,
5 unescapeHTML,
6)
7
8import itertools
9from urllib.parse import urlencode
10
11
12class RadioKapitalBaseIE(InfoExtractor):
13 def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
14 return self._download_json(
15 f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}',
16 video_id, note=note)
17
18 def _parse_episode(self, data):
19 release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3])
20 return {
21 '_type': 'url_transparent',
22 'url': data['mixcloud_url'],
23 'ie_key': 'Mixcloud',
24 'title': unescapeHTML(data['title']),
25 'description': clean_html(data.get('content')),
26 'tags': traverse_obj(data, ('tags', ..., 'name')),
27 'release_date': release,
28 'series': traverse_obj(data, ('show', 'title')),
29 }
30
31
32class RadioKapitalIE(RadioKapitalBaseIE):
33 IE_NAME = 'radiokapital'
34 _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)'
35
36 _TESTS = [{
37 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial',
38 'info_dict': {
39 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20',
40 'ext': 'm4a',
41 'title': '#5: It’s okay to\xa0be\xa0immaterial',
42 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4',
43 'uploader': 'Radio Kapitał',
44 'uploader_id': 'radiokapital',
45 'timestamp': 1621640164,
46 'upload_date': '20210521',
47 },
48 }]
49
50 def _real_extract(self, url):
51 video_id = self._match_id(url)
52
53 episode = self._call_api('episodes/%s' % video_id, video_id)
54 return self._parse_episode(episode)
55
56
57class RadioKapitalShowIE(RadioKapitalBaseIE):
58 IE_NAME = 'radiokapital:show'
59 _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])'
60
61 _TESTS = [{
62 'url': 'https://radiokapital.pl/shows/wesz',
63 'info_dict': {
64 'id': '100',
65 'title': 'WĘSZ',
66 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c',
67 },
68 'playlist_mincount': 17,
69 }]
70
71 def _get_episode_list(self, series_id, page_no):
72 return self._call_api(
73 'episodes', series_id,
74 f'Downloading episode list page #{page_no}', qs={
75 'show': series_id,
76 'page': page_no,
77 })
78
79 def _entries(self, series_id):
80 for page_no in itertools.count(1):
81 episode_list = self._get_episode_list(series_id, page_no)
82 yield from (self._parse_episode(ep) for ep in episode_list['items'])
83 if episode_list['next'] is None:
84 break
85
86 def _real_extract(self, url):
87 series_id = self._match_id(url)
88
89 show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata')
90 entries = self._entries(series_id)
91 return {
92 '_type': 'playlist',
93 'entries': entries,
94 'id': str(show['id']),
95 'title': show.get('title'),
96 'description': clean_html(show.get('content')),
97 }