]>
Commit | Line | Data |
---|---|---|
1 | import urllib.parse | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | OnDemandPagedList, | |
6 | determine_ext, | |
7 | parse_iso8601, | |
8 | traverse_obj, | |
9 | ) | |
10 | ||
11 | ||
12 | class TuneInBaseIE(InfoExtractor): | |
13 | _VALID_URL_BASE = r'https?://(?:www\.)?tunein\.com' | |
14 | ||
15 | def _extract_metadata(self, webpage, content_id): | |
16 | return self._search_json(r'window.INITIAL_STATE=', webpage, 'hydration', content_id, fatal=False) | |
17 | ||
18 | def _extract_formats_and_subtitles(self, content_id): | |
19 | streams = self._download_json( | |
20 | f'https://opml.radiotime.com/Tune.ashx?render=json&formats=mp3,aac,ogg,flash,hls&id={content_id}', | |
21 | content_id)['body'] | |
22 | ||
23 | formats, subtitles = [], {} | |
24 | for stream in streams: | |
25 | if stream.get('media_type') == 'hls': | |
26 | fmts, subs = self._extract_m3u8_formats_and_subtitles(stream['url'], content_id, fatal=False) | |
27 | formats.extend(fmts) | |
28 | self._merge_subtitles(subs, target=subtitles) | |
29 | elif determine_ext(stream['url']) == 'pls': | |
30 | playlist_content = self._download_webpage(stream['url'], content_id) | |
31 | formats.append({ | |
32 | 'url': self._search_regex(r'File1=(.*)', playlist_content, 'url', fatal=False), | |
33 | 'abr': stream.get('bitrate'), | |
34 | 'ext': stream.get('media_type'), | |
35 | }) | |
36 | else: | |
37 | formats.append({ | |
38 | 'url': stream['url'], | |
39 | 'abr': stream.get('bitrate'), | |
40 | 'ext': stream.get('media_type'), | |
41 | }) | |
42 | ||
43 | return formats, subtitles | |
44 | ||
45 | ||
46 | class TuneInStationIE(TuneInBaseIE): | |
47 | _VALID_URL = TuneInBaseIE._VALID_URL_BASE + r'(?:/radio/[^?#]+-|/embed/player/)(?P<id>s\d+)' | |
48 | _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/s\d+)'] | |
49 | ||
50 | _TESTS = [{ | |
51 | 'url': 'https://tunein.com/radio/Jazz24-885-s34682/', | |
52 | 'info_dict': { | |
53 | 'id': 's34682', | |
54 | 'title': 're:^Jazz24', | |
55 | 'description': 'md5:d6d0b89063fd68d529fa7058ee98619b', | |
56 | 'thumbnail': 're:^https?://[^?&]+/s34682', | |
57 | 'location': 'Seattle-Tacoma, US', | |
58 | 'ext': 'mp3', | |
59 | 'live_status': 'is_live', | |
60 | }, | |
61 | 'params': { | |
62 | 'skip_download': True, | |
63 | }, | |
64 | }, { | |
65 | 'url': 'https://tunein.com/embed/player/s6404/', | |
66 | 'only_matching': True, | |
67 | }, { | |
68 | 'url': 'https://tunein.com/radio/BBC-Radio-1-988-s24939/', | |
69 | 'info_dict': { | |
70 | 'id': 's24939', | |
71 | 'title': 're:^BBC Radio 1', | |
72 | 'description': 'md5:f3f75f7423398d87119043c26e7bfb84', | |
73 | 'thumbnail': 're:^https?://[^?&]+/s24939', | |
74 | 'location': 'London, UK', | |
75 | 'ext': 'mp3', | |
76 | 'live_status': 'is_live', | |
77 | }, | |
78 | 'params': { | |
79 | 'skip_download': True, | |
80 | }, | |
81 | }] | |
82 | ||
83 | def _real_extract(self, url): | |
84 | station_id = self._match_id(url) | |
85 | ||
86 | webpage = self._download_webpage(url, station_id) | |
87 | metadata = self._extract_metadata(webpage, station_id) | |
88 | ||
89 | formats, subtitles = self._extract_formats_and_subtitles(station_id) | |
90 | return { | |
91 | 'id': station_id, | |
92 | 'title': traverse_obj(metadata, ('profiles', station_id, 'title')), | |
93 | 'description': traverse_obj(metadata, ('profiles', station_id, 'description')), | |
94 | 'thumbnail': traverse_obj(metadata, ('profiles', station_id, 'image')), | |
95 | 'timestamp': parse_iso8601( | |
96 | traverse_obj(metadata, ('profiles', station_id, 'actions', 'play', 'publishTime'))), | |
97 | 'location': traverse_obj( | |
98 | metadata, ('profiles', station_id, 'metadata', 'properties', 'location', 'displayName'), | |
99 | ('profiles', station_id, 'properties', 'location', 'displayName')), | |
100 | 'formats': formats, | |
101 | 'subtitles': subtitles, | |
102 | 'is_live': traverse_obj(metadata, ('profiles', station_id, 'actions', 'play', 'isLive')), | |
103 | } | |
104 | ||
105 | ||
106 | class TuneInPodcastIE(TuneInBaseIE): | |
107 | _VALID_URL = TuneInBaseIE._VALID_URL_BASE + r'/(?:podcasts/[^?#]+-|embed/player/)(?P<id>p\d+)/?(?:#|$)' | |
108 | _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/p\d+)'] | |
109 | ||
110 | _TESTS = [{ | |
111 | 'url': 'https://tunein.com/podcasts/Technology-Podcasts/Artificial-Intelligence-p1153019', | |
112 | 'info_dict': { | |
113 | 'id': 'p1153019', | |
114 | 'title': 'Lex Fridman Podcast', | |
115 | 'description': 'md5:bedc4e5f1c94f7dec6e4317b5654b00d', | |
116 | }, | |
117 | 'playlist_mincount': 200, | |
118 | }, { | |
119 | 'url': 'https://tunein.com/embed/player/p191660/', | |
120 | 'only_matching': True | |
121 | }, { | |
122 | 'url': 'https://tunein.com/podcasts/World-News/BBC-News-p14/', | |
123 | 'info_dict': { | |
124 | 'id': 'p14', | |
125 | 'title': 'BBC News', | |
126 | 'description': 'md5:1218e575eeaff75f48ed978261fa2068', | |
127 | }, | |
128 | 'playlist_mincount': 200, | |
129 | }] | |
130 | ||
131 | _PAGE_SIZE = 30 | |
132 | ||
133 | def _real_extract(self, url): | |
134 | podcast_id = self._match_id(url) | |
135 | ||
136 | webpage = self._download_webpage(url, podcast_id, fatal=False) | |
137 | metadata = self._extract_metadata(webpage, podcast_id) | |
138 | ||
139 | def page_func(page_num): | |
140 | api_response = self._download_json( | |
141 | f'https://api.tunein.com/profiles/{podcast_id}/contents', podcast_id, | |
142 | note=f'Downloading page {page_num + 1}', query={ | |
143 | 'filter': 't:free', | |
144 | 'offset': page_num * self._PAGE_SIZE, | |
145 | 'limit': self._PAGE_SIZE, | |
146 | }) | |
147 | ||
148 | return [ | |
149 | self.url_result( | |
150 | f'https://tunein.com/podcasts/{podcast_id}?topicId={episode["GuideId"][1:]}', | |
151 | TuneInPodcastEpisodeIE, title=episode.get('Title')) | |
152 | for episode in api_response['Items']] | |
153 | ||
154 | entries = OnDemandPagedList(page_func, self._PAGE_SIZE) | |
155 | return self.playlist_result( | |
156 | entries, playlist_id=podcast_id, title=traverse_obj(metadata, ('profiles', podcast_id, 'title')), | |
157 | description=traverse_obj(metadata, ('profiles', podcast_id, 'description'))) | |
158 | ||
159 | ||
160 | class TuneInPodcastEpisodeIE(TuneInBaseIE): | |
161 | _VALID_URL = TuneInBaseIE._VALID_URL_BASE + r'/podcasts/(?:[^?&]+-)?(?P<podcast_id>p\d+)/?\?topicId=(?P<id>\w\d+)' | |
162 | ||
163 | _TESTS = [{ | |
164 | 'url': 'https://tunein.com/podcasts/Technology-Podcasts/Artificial-Intelligence-p1153019/?topicId=236404354', | |
165 | 'info_dict': { | |
166 | 'id': 't236404354', | |
167 | 'title': '#351 \u2013 MrBeast: Future of YouTube, Twitter, TikTok, and Instagram', | |
168 | 'description': 'md5:e1734db6f525e472c0c290d124a2ad77', | |
169 | 'thumbnail': 're:^https?://[^?&]+/p1153019', | |
170 | 'timestamp': 1673458571, | |
171 | 'upload_date': '20230111', | |
172 | 'series_id': 'p1153019', | |
173 | 'series': 'Lex Fridman Podcast', | |
174 | 'ext': 'mp3', | |
175 | }, | |
176 | }] | |
177 | ||
178 | def _real_extract(self, url): | |
179 | podcast_id, episode_id = self._match_valid_url(url).group('podcast_id', 'id') | |
180 | episode_id = f't{episode_id}' | |
181 | ||
182 | webpage = self._download_webpage(url, episode_id) | |
183 | metadata = self._extract_metadata(webpage, episode_id) | |
184 | ||
185 | formats, subtitles = self._extract_formats_and_subtitles(episode_id) | |
186 | return { | |
187 | 'id': episode_id, | |
188 | 'title': traverse_obj(metadata, ('profiles', episode_id, 'title')), | |
189 | 'description': traverse_obj(metadata, ('profiles', episode_id, 'description')), | |
190 | 'thumbnail': traverse_obj(metadata, ('profiles', episode_id, 'image')), | |
191 | 'timestamp': parse_iso8601( | |
192 | traverse_obj(metadata, ('profiles', episode_id, 'actions', 'play', 'publishTime'))), | |
193 | 'series_id': podcast_id, | |
194 | 'series': traverse_obj(metadata, ('profiles', podcast_id, 'title')), | |
195 | 'formats': formats, | |
196 | 'subtitles': subtitles, | |
197 | } | |
198 | ||
199 | ||
200 | class TuneInShortenerIE(InfoExtractor): | |
201 | IE_NAME = 'tunein:shortener' | |
202 | IE_DESC = False # Do not list | |
203 | _VALID_URL = r'https?://tun\.in/(?P<id>[A-Za-z0-9]+)' | |
204 | ||
205 | _TEST = { | |
206 | # test redirection | |
207 | 'url': 'http://tun.in/ser7s', | |
208 | 'info_dict': { | |
209 | 'id': 's34682', | |
210 | 'title': 're:^Jazz24', | |
211 | 'description': 'md5:d6d0b89063fd68d529fa7058ee98619b', | |
212 | 'thumbnail': 're:^https?://[^?&]+/s34682', | |
213 | 'location': 'Seattle-Tacoma, US', | |
214 | 'ext': 'mp3', | |
215 | 'live_status': 'is_live', | |
216 | }, | |
217 | 'params': { | |
218 | 'skip_download': True, # live stream | |
219 | }, | |
220 | } | |
221 | ||
222 | def _real_extract(self, url): | |
223 | redirect_id = self._match_id(url) | |
224 | # The server doesn't support HEAD requests | |
225 | urlh = self._request_webpage( | |
226 | url, redirect_id, note='Downloading redirect page') | |
227 | ||
228 | url = urlh.url | |
229 | url_parsed = urllib.parse.urlparse(url) | |
230 | if url_parsed.port == 443: | |
231 | url = url_parsed._replace(netloc=url_parsed.hostname).url | |
232 | ||
233 | self.to_screen('Following redirect: %s' % url) | |
234 | return self.url_result(url) |