]>
Commit | Line | Data |
---|---|---|
78545664 | 1 | import functools |
2 | import uuid | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | ExtractorError, | |
7 | OnDemandPagedList, | |
8 | int_or_none, | |
9 | month_by_name, | |
10 | parse_duration, | |
11 | try_call, | |
12 | ) | |
13 | ||
14 | ||
15 | class WyborczaVideoIE(InfoExtractor): | |
16 | # this id is not an article id, it has to be extracted from the article | |
17 | _VALID_URL = r'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)' | |
18 | IE_NAME = 'wyborcza:video' | |
19 | _TESTS = [{ | |
20 | 'url': 'wyborcza:video:26207634', | |
21 | 'info_dict': { | |
22 | 'id': '26207634', | |
23 | 'ext': 'mp4', | |
24 | 'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar', | |
25 | 'description': ' ', | |
26 | 'uploader': 'Dorota Roman', | |
27 | 'duration': 2474, | |
28 | 'thumbnail': r're:https://.+\.jpg', | |
29 | }, | |
30 | }, { | |
31 | 'url': 'https://wyborcza.pl/video/26207634', | |
32 | 'only_matching': True, | |
33 | }, { | |
34 | 'url': 'https://wyborcza.pl/api-video/26207634', | |
35 | 'only_matching': True, | |
36 | }] | |
37 | ||
38 | def _real_extract(self, url): | |
39 | video_id = self._match_id(url) | |
40 | meta = self._download_json(f'https://wyborcza.pl/api-video/{video_id}', video_id) | |
41 | ||
42 | formats = [] | |
43 | base_url = meta['redirector'].replace('http://', 'https://') + meta['basePath'] | |
44 | for quality in ('standard', 'high'): | |
45 | if not meta['files'].get(quality): | |
46 | continue | |
47 | formats.append({ | |
48 | 'url': base_url + meta['files'][quality], | |
49 | 'height': int_or_none( | |
50 | self._search_regex( | |
51 | r'p(\d+)[a-z]+\.mp4$', meta['files'][quality], | |
52 | 'mp4 video height', default=None)), | |
53 | 'format_id': quality, | |
54 | }) | |
55 | if meta['files'].get('dash'): | |
56 | formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id)) | |
57 | ||
78545664 | 58 | return { |
59 | 'id': video_id, | |
60 | 'formats': formats, | |
61 | 'title': meta.get('title'), | |
62 | 'description': meta.get('lead'), | |
63 | 'uploader': meta.get('signature'), | |
64 | 'thumbnail': meta.get('imageUrl'), | |
65 | 'duration': meta.get('duration'), | |
66 | } | |
67 | ||
68 | ||
69 | class WyborczaPodcastIE(InfoExtractor): | |
70 | _VALID_URL = r'''(?x) | |
71 | https?://(?:www\.)?(?: | |
72 | wyborcza\.pl/podcast(?:/0,172673\.html)?| | |
73 | wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html | |
74 | )(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))? | |
75 | ''' | |
76 | _TESTS = [{ | |
77 | 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast', | |
78 | 'info_dict': { | |
79 | 'id': '100720', | |
80 | 'ext': 'mp3', | |
81 | 'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ', | |
82 | 'uploader': 'Michał Nogaś ', | |
83 | 'upload_date': '20210117', | |
84 | 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d', | |
85 | 'duration': 3684.0, | |
86 | 'thumbnail': r're:https://.+\.jpg', | |
87 | }, | |
88 | }, { | |
89 | 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673', | |
90 | 'info_dict': { | |
91 | 'id': '100673', | |
92 | 'ext': 'mp3', | |
93 | 'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?', | |
94 | 'uploader': 'Agnieszka Urazińska ', | |
95 | 'upload_date': '20210115', | |
96 | 'description': 'md5:c161dc035f8dbb60077011fc41274899', | |
97 | 'duration': 1803.0, | |
98 | 'thumbnail': r're:https://.+\.jpg', | |
99 | }, | |
100 | }, { | |
101 | 'url': 'https://wyborcza.pl/podcast', | |
102 | 'info_dict': { | |
103 | 'id': '334', | |
104 | 'title': 'Gościnnie: Wyborcza, 8:10', | |
105 | 'series': 'Gościnnie: Wyborcza, 8:10', | |
106 | }, | |
107 | 'playlist_mincount': 370, | |
108 | }, { | |
109 | 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html', | |
110 | 'info_dict': { | |
111 | 'id': '395', | |
112 | 'title': 'Gościnnie: Wysokie Obcasy', | |
113 | 'series': 'Gościnnie: Wysokie Obcasy', | |
114 | }, | |
115 | 'playlist_mincount': 12, | |
116 | }] | |
117 | ||
118 | def _real_extract(self, url): | |
119 | podcast_id = self._match_id(url) | |
120 | ||
121 | if not podcast_id: # playlist | |
122 | podcast_id = '395' if 'wysokieobcasy.pl/' in url else '334' | |
123 | return self.url_result(TokFMAuditionIE._create_url(podcast_id), TokFMAuditionIE, podcast_id) | |
124 | ||
125 | meta = self._download_json('https://wyborcza.pl/api/podcast', podcast_id, | |
126 | query={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None}) | |
127 | ||
128 | day, month, year = self._search_regex(r'^(\d\d?) (\w+) (\d{4})$', meta.get('publishedDate'), | |
129 | 'upload date', group=(1, 2, 3), default=(None, None, None)) | |
130 | return { | |
131 | 'id': podcast_id, | |
132 | 'url': meta['url'], | |
133 | 'title': meta.get('title'), | |
134 | 'description': meta.get('description'), | |
135 | 'thumbnail': meta.get('imageUrl'), | |
136 | 'duration': parse_duration(meta.get('duration')), | |
137 | 'uploader': meta.get('author'), | |
138 | 'upload_date': try_call(lambda: f'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'), | |
139 | } | |
140 | ||
141 | ||
142 | class TokFMPodcastIE(InfoExtractor): | |
143 | _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?' | |
144 | IE_NAME = 'tokfm:podcast' | |
145 | _TESTS = [{ | |
146 | 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych', | |
147 | 'info_dict': { | |
148 | 'id': '91275', | |
149 | 'ext': 'aac', | |
150 | 'title': 'md5:a9b15488009065556900169fb8061cce', | |
151 | 'episode': 'md5:a9b15488009065556900169fb8061cce', | |
152 | 'series': 'Analizy', | |
153 | }, | |
154 | }] | |
155 | ||
156 | def _real_extract(self, url): | |
157 | media_id = self._match_id(url) | |
158 | ||
159 | # in case it breaks see this but it returns a lot of useless data | |
160 | # https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true | |
161 | metadata = self._download_json( | |
162 | f'https://audycje.tokfm.pl/getp/3{media_id}', media_id, 'Downloading podcast metadata') | |
163 | if not metadata: | |
164 | raise ExtractorError('No such podcast', expected=True) | |
165 | metadata = metadata[0] | |
166 | ||
167 | formats = [] | |
168 | for ext in ('aac', 'mp3'): | |
169 | url_data = self._download_json( | |
170 | f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', | |
171 | media_id, 'Downloading podcast %s URL' % ext) | |
172 | # prevents inserting the mp3 (default) multiple times | |
173 | if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: | |
174 | formats.append({ | |
175 | 'url': url_data['link_ssl'], | |
176 | 'ext': ext, | |
177 | 'vcodec': 'none', | |
178 | 'acodec': ext, | |
179 | }) | |
180 | ||
78545664 | 181 | return { |
182 | 'id': media_id, | |
183 | 'formats': formats, | |
184 | 'title': metadata.get('podcast_name'), | |
185 | 'series': metadata.get('series_name'), | |
186 | 'episode': metadata.get('podcast_name'), | |
187 | } | |
188 | ||
189 | ||
190 | class TokFMAuditionIE(InfoExtractor): | |
191 | _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?' | |
192 | IE_NAME = 'tokfm:audition' | |
193 | _TESTS = [{ | |
194 | 'url': 'https://audycje.tokfm.pl/audycja/218,Analizy', | |
195 | 'info_dict': { | |
196 | 'id': '218', | |
197 | 'title': 'Analizy', | |
198 | 'series': 'Analizy', | |
199 | }, | |
200 | 'playlist_count': 1635, | |
201 | }] | |
202 | ||
203 | _PAGE_SIZE = 30 | |
204 | _HEADERS = { | |
205 | 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36', | |
206 | } | |
207 | ||
208 | @staticmethod | |
209 | def _create_url(id): | |
210 | return f'https://audycje.tokfm.pl/audycja/{id}' | |
211 | ||
212 | def _real_extract(self, url): | |
213 | audition_id = self._match_id(url) | |
214 | ||
215 | data = self._download_json( | |
216 | f'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}', | |
217 | audition_id, 'Downloading audition metadata', headers=self._HEADERS) | |
218 | if not data: | |
219 | raise ExtractorError('No such audition', expected=True) | |
220 | data = data[0] | |
221 | ||
222 | entries = OnDemandPagedList(functools.partial( | |
223 | self._fetch_page, audition_id, data), self._PAGE_SIZE) | |
224 | ||
225 | return { | |
226 | '_type': 'playlist', | |
227 | 'id': audition_id, | |
228 | 'title': data.get('series_name'), | |
229 | 'series': data.get('series_name'), | |
230 | 'entries': entries, | |
231 | } | |
232 | ||
233 | def _fetch_page(self, audition_id, data, page): | |
234 | for retry in self.RetryManager(): | |
235 | podcast_page = self._download_json( | |
236 | f'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true', | |
237 | audition_id, f'Downloading podcast list page {page + 1}', headers=self._HEADERS) | |
238 | if not podcast_page: | |
239 | retry.error = ExtractorError('Agora returned empty page', expected=True) | |
240 | ||
241 | for podcast in podcast_page: | |
242 | yield { | |
243 | '_type': 'url_transparent', | |
244 | 'url': podcast['podcast_sharing_url'], | |
245 | 'ie_key': TokFMPodcastIE.ie_key(), | |
246 | 'title': podcast.get('podcast_name'), | |
247 | 'episode': podcast.get('podcast_name'), | |
248 | 'description': podcast.get('podcast_description'), | |
249 | 'timestamp': int_or_none(podcast.get('podcast_timestamp')), | |
250 | 'series': data.get('series_name'), | |
251 | } |