]>
Commit | Line | Data |
---|---|---|
4cbfa570 | 1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | clean_html, | |
6 | get_element_by_class, | |
7 | get_elements_by_class, | |
8 | join_nonempty, | |
9 | traverse_obj, | |
10 | unified_timestamp, | |
11 | urljoin, | |
12 | ) | |
13 | ||
14 | ||
15 | class CamFMShowIE(InfoExtractor): | |
16 | _VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/shows/(?P<id>[^/]+)' | |
17 | _TESTS = [{ | |
18 | 'playlist_mincount': 5, | |
19 | 'url': 'https://camfm.co.uk/shows/soul-mining/', | |
20 | 'info_dict': { | |
21 | 'id': 'soul-mining', | |
22 | 'thumbnail': 'md5:6a873091f92c936f23bdcce80f75e66a', | |
23 | 'title': 'Soul Mining', | |
24 | 'description': 'Telling the stories of jazz, funk and soul from all corners of the world.', | |
25 | }, | |
26 | }] | |
27 | ||
28 | def _real_extract(self, url): | |
29 | show_id = self._match_id(url) | |
30 | page = self._download_webpage(url, show_id) | |
31 | ||
32 | return { | |
33 | '_type': 'playlist', | |
34 | 'id': show_id, | |
35 | 'entries': [self.url_result(urljoin('https://camfm.co.uk', i), CamFMEpisodeIE) | |
36 | for i in re.findall(r"javascript:popup\('(/player/[^']+)', 'listen'", page)], | |
37 | 'thumbnail': urljoin('https://camfm.co.uk', self._search_regex( | |
38 | r'<img[^>]+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)), | |
39 | 'title': self._html_search_regex('<h1>([^<]+)</h1>', page, 'title', fatal=False), | |
40 | 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page)) | |
41 | } | |
42 | ||
43 | ||
44 | class CamFMEpisodeIE(InfoExtractor): | |
45 | _VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/player/(?P<id>[^/]+)' | |
46 | _TESTS = [{ | |
47 | 'url': 'https://camfm.co.uk/player/43336', | |
48 | 'skip': 'Episode will expire - don\'t actually know when, but it will go eventually', | |
49 | 'info_dict': { | |
50 | 'id': '43336', | |
51 | 'title': 'AITAA: Am I the Agony Aunt? - 19:00 Tue 16/05/2023', | |
52 | 'ext': 'mp3', | |
53 | 'upload_date': '20230516', | |
54 | 'description': 'md5:f165144f94927c0f1bfa2ee6e6ab7bbf', | |
55 | 'timestamp': 1684263600, | |
56 | 'series': 'AITAA: Am I the Agony Aunt?', | |
57 | 'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1', | |
58 | 'categories': ['Entertainment'], | |
59 | } | |
60 | }] | |
61 | ||
62 | def _real_extract(self, url): | |
63 | episode_id = self._match_id(url) | |
64 | page = self._download_webpage(url, episode_id) | |
65 | audios = self._parse_html5_media_entries('https://audio.camfm.co.uk', page, episode_id) | |
66 | ||
67 | caption = get_element_by_class('caption', page) | |
68 | series = clean_html(re.sub(r'<span[^<]+<[^<]+>', '', caption)) | |
69 | ||
70 | card_section = get_element_by_class('card-section', page) | |
71 | date = self._html_search_regex('>Aired at ([^<]+)<', card_section, 'air date', fatal=False) | |
72 | ||
73 | return { | |
74 | 'id': episode_id, | |
75 | 'title': join_nonempty(series, date, delim=' - '), | |
76 | 'formats': traverse_obj(audios, (..., 'formats', ...)), | |
77 | 'timestamp': unified_timestamp(date), # XXX: Does not account for UK's daylight savings | |
78 | 'series': series, | |
79 | 'description': clean_html(re.sub(r'<b>[^<]+</b><br[^>]+/>', '', card_section)), | |
80 | 'thumbnail': urljoin('https://camfm.co.uk', self._search_regex( | |
81 | r'<div[^>]+class="cover-art"[^>]+style="[^"]+url\(\'([^\']+)', | |
82 | page, 'thumbnail', fatal=False)), | |
83 | 'categories': get_elements_by_class('label', caption), | |
84 | 'was_live': True, | |
85 | } |