]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/rinsefm.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / rinsefm.py
CommitLineData
c91af948 1from .common import InfoExtractor
1a36dbad
S
2from ..utils import (
3 MEDIA_EXTENSIONS,
4 determine_ext,
5 parse_iso8601,
6 traverse_obj,
7 url_or_none,
8)
c91af948
TC
9
10
1a36dbad
S
11class RinseFMBaseIE(InfoExtractor):
12 @staticmethod
13 def _parse_entry(entry):
14 return {
15 **traverse_obj(entry, {
16 'id': ('id', {str}),
17 'title': ('title', {str}),
18 'url': ('fileUrl', {url_or_none}),
19 'release_timestamp': ('episodeDate', {parse_iso8601}),
20 'thumbnail': ('featuredImage', 0, 'filename', {str},
21 {lambda x: x and f'https://rinse.imgix.net/media/{x}'}),
22 'webpage_url': ('slug', {str},
23 {lambda x: x and f'https://rinse.fm/episodes/{x}'}),
24 }),
25 'vcodec': 'none',
26 'extractor_key': RinseFMIE.ie_key(),
27 'extractor': RinseFMIE.IE_NAME,
28 }
29
30
31class RinseFMIE(RinseFMBaseIE):
c91af948
TC
32 _VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
33 _TESTS = [{
34 'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
35 'md5': '76ee0b719315617df42e15e710f46c7b',
36 'info_dict': {
37 'id': '1536535',
38 'ext': 'mp3',
39 'title': 'Club Glow - 15/12/2023 - 20:00',
40 'thumbnail': r're:^https://.+\.(?:jpg|JPG)$',
41 'release_timestamp': 1702598400,
42 'release_date': '20231215'
43 }
44 }]
45
46 def _real_extract(self, url):
47 display_id = self._match_id(url)
48 webpage = self._download_webpage(url, display_id)
49 entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
50
1a36dbad
S
51 return self._parse_entry(entry)
52
53
54class RinseFMArtistPlaylistIE(RinseFMBaseIE):
55 _VALID_URL = r'https?://(?:www\.)?rinse\.fm/shows/(?P<id>[^/?#]+)'
56 _TESTS = [{
57 'url': 'https://rinse.fm/shows/resources/',
58 'info_dict': {
59 'id': 'resources',
60 'title': '[re]sources',
61 'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.'
62 },
63 'playlist_mincount': 40
64 }, {
65 'url': 'https://rinse.fm/shows/ivy/',
66 'info_dict': {
67 'id': 'ivy',
68 'title': '[IVY]',
69 'description': 'A dedicated space for DNB/Turbo House and 4x4.'
70 },
71 'playlist_mincount': 7
72 }]
73
74 def _entries(self, data):
75 for episode in traverse_obj(data, (
76 'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio)
77 ):
78 yield self._parse_entry(episode)
79
80 def _real_extract(self, url):
81 playlist_id = self._match_id(url)
82 webpage = self._download_webpage(url, playlist_id)
83 title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
84 description = self._og_search_description(webpage) or self._html_search_meta(
85 'description', webpage)
86 data = self._search_nextjs_data(webpage, playlist_id)
87
88 return self.playlist_result(
89 self._entries(data), playlist_id, title, description=description)