]>
Commit | Line | Data |
---|---|---|
78545664 | 1 | import functools |
2 | import uuid | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | ExtractorError, | |
7 | OnDemandPagedList, | |
8 | int_or_none, | |
9 | month_by_name, | |
10 | parse_duration, | |
11 | try_call, | |
12 | ) | |
13 | ||
14 | ||
15 | class WyborczaVideoIE(InfoExtractor): | |
16 | # this id is not an article id, it has to be extracted from the article | |
17 | _VALID_URL = r'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)' | |
18 | IE_NAME = 'wyborcza:video' | |
19 | _TESTS = [{ | |
20 | 'url': 'wyborcza:video:26207634', | |
21 | 'info_dict': { | |
22 | 'id': '26207634', | |
23 | 'ext': 'mp4', | |
24 | 'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar', | |
25 | 'description': ' ', | |
26 | 'uploader': 'Dorota Roman', | |
27 | 'duration': 2474, | |
28 | 'thumbnail': r're:https://.+\.jpg', | |
29 | }, | |
30 | }, { | |
31 | 'url': 'https://wyborcza.pl/video/26207634', | |
32 | 'only_matching': True, | |
33 | }, { | |
34 | 'url': 'https://wyborcza.pl/api-video/26207634', | |
35 | 'only_matching': True, | |
36 | }] | |
37 | ||
38 | def _real_extract(self, url): | |
39 | video_id = self._match_id(url) | |
40 | meta = self._download_json(f'https://wyborcza.pl/api-video/{video_id}', video_id) | |
41 | ||
42 | formats = [] | |
43 | base_url = meta['redirector'].replace('http://', 'https://') + meta['basePath'] | |
44 | for quality in ('standard', 'high'): | |
45 | if not meta['files'].get(quality): | |
46 | continue | |
47 | formats.append({ | |
48 | 'url': base_url + meta['files'][quality], | |
49 | 'height': int_or_none( | |
50 | self._search_regex( | |
51 | r'p(\d+)[a-z]+\.mp4$', meta['files'][quality], | |
52 | 'mp4 video height', default=None)), | |
53 | 'format_id': quality, | |
54 | }) | |
55 | if meta['files'].get('dash'): | |
56 | formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id)) | |
57 | ||
58 | self._sort_formats(formats) | |
59 | return { | |
60 | 'id': video_id, | |
61 | 'formats': formats, | |
62 | 'title': meta.get('title'), | |
63 | 'description': meta.get('lead'), | |
64 | 'uploader': meta.get('signature'), | |
65 | 'thumbnail': meta.get('imageUrl'), | |
66 | 'duration': meta.get('duration'), | |
67 | } | |
68 | ||
69 | ||
70 | class WyborczaPodcastIE(InfoExtractor): | |
71 | _VALID_URL = r'''(?x) | |
72 | https?://(?:www\.)?(?: | |
73 | wyborcza\.pl/podcast(?:/0,172673\.html)?| | |
74 | wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html | |
75 | )(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))? | |
76 | ''' | |
77 | _TESTS = [{ | |
78 | 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast', | |
79 | 'info_dict': { | |
80 | 'id': '100720', | |
81 | 'ext': 'mp3', | |
82 | 'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ', | |
83 | 'uploader': 'Michał Nogaś ', | |
84 | 'upload_date': '20210117', | |
85 | 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d', | |
86 | 'duration': 3684.0, | |
87 | 'thumbnail': r're:https://.+\.jpg', | |
88 | }, | |
89 | }, { | |
90 | 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673', | |
91 | 'info_dict': { | |
92 | 'id': '100673', | |
93 | 'ext': 'mp3', | |
94 | 'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?', | |
95 | 'uploader': 'Agnieszka Urazińska ', | |
96 | 'upload_date': '20210115', | |
97 | 'description': 'md5:c161dc035f8dbb60077011fc41274899', | |
98 | 'duration': 1803.0, | |
99 | 'thumbnail': r're:https://.+\.jpg', | |
100 | }, | |
101 | }, { | |
102 | 'url': 'https://wyborcza.pl/podcast', | |
103 | 'info_dict': { | |
104 | 'id': '334', | |
105 | 'title': 'Gościnnie: Wyborcza, 8:10', | |
106 | 'series': 'Gościnnie: Wyborcza, 8:10', | |
107 | }, | |
108 | 'playlist_mincount': 370, | |
109 | }, { | |
110 | 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html', | |
111 | 'info_dict': { | |
112 | 'id': '395', | |
113 | 'title': 'Gościnnie: Wysokie Obcasy', | |
114 | 'series': 'Gościnnie: Wysokie Obcasy', | |
115 | }, | |
116 | 'playlist_mincount': 12, | |
117 | }] | |
118 | ||
119 | def _real_extract(self, url): | |
120 | podcast_id = self._match_id(url) | |
121 | ||
122 | if not podcast_id: # playlist | |
123 | podcast_id = '395' if 'wysokieobcasy.pl/' in url else '334' | |
124 | return self.url_result(TokFMAuditionIE._create_url(podcast_id), TokFMAuditionIE, podcast_id) | |
125 | ||
126 | meta = self._download_json('https://wyborcza.pl/api/podcast', podcast_id, | |
127 | query={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None}) | |
128 | ||
129 | day, month, year = self._search_regex(r'^(\d\d?) (\w+) (\d{4})$', meta.get('publishedDate'), | |
130 | 'upload date', group=(1, 2, 3), default=(None, None, None)) | |
131 | return { | |
132 | 'id': podcast_id, | |
133 | 'url': meta['url'], | |
134 | 'title': meta.get('title'), | |
135 | 'description': meta.get('description'), | |
136 | 'thumbnail': meta.get('imageUrl'), | |
137 | 'duration': parse_duration(meta.get('duration')), | |
138 | 'uploader': meta.get('author'), | |
139 | 'upload_date': try_call(lambda: f'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'), | |
140 | } | |
141 | ||
142 | ||
143 | class TokFMPodcastIE(InfoExtractor): | |
144 | _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?' | |
145 | IE_NAME = 'tokfm:podcast' | |
146 | _TESTS = [{ | |
147 | 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych', | |
148 | 'info_dict': { | |
149 | 'id': '91275', | |
150 | 'ext': 'aac', | |
151 | 'title': 'md5:a9b15488009065556900169fb8061cce', | |
152 | 'episode': 'md5:a9b15488009065556900169fb8061cce', | |
153 | 'series': 'Analizy', | |
154 | }, | |
155 | }] | |
156 | ||
157 | def _real_extract(self, url): | |
158 | media_id = self._match_id(url) | |
159 | ||
160 | # in case it breaks see this but it returns a lot of useless data | |
161 | # https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true | |
162 | metadata = self._download_json( | |
163 | f'https://audycje.tokfm.pl/getp/3{media_id}', media_id, 'Downloading podcast metadata') | |
164 | if not metadata: | |
165 | raise ExtractorError('No such podcast', expected=True) | |
166 | metadata = metadata[0] | |
167 | ||
168 | formats = [] | |
169 | for ext in ('aac', 'mp3'): | |
170 | url_data = self._download_json( | |
171 | f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', | |
172 | media_id, 'Downloading podcast %s URL' % ext) | |
173 | # prevents inserting the mp3 (default) multiple times | |
174 | if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: | |
175 | formats.append({ | |
176 | 'url': url_data['link_ssl'], | |
177 | 'ext': ext, | |
178 | 'vcodec': 'none', | |
179 | 'acodec': ext, | |
180 | }) | |
181 | ||
182 | self._sort_formats(formats) | |
183 | return { | |
184 | 'id': media_id, | |
185 | 'formats': formats, | |
186 | 'title': metadata.get('podcast_name'), | |
187 | 'series': metadata.get('series_name'), | |
188 | 'episode': metadata.get('podcast_name'), | |
189 | } | |
190 | ||
191 | ||
192 | class TokFMAuditionIE(InfoExtractor): | |
193 | _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?' | |
194 | IE_NAME = 'tokfm:audition' | |
195 | _TESTS = [{ | |
196 | 'url': 'https://audycje.tokfm.pl/audycja/218,Analizy', | |
197 | 'info_dict': { | |
198 | 'id': '218', | |
199 | 'title': 'Analizy', | |
200 | 'series': 'Analizy', | |
201 | }, | |
202 | 'playlist_count': 1635, | |
203 | }] | |
204 | ||
205 | _PAGE_SIZE = 30 | |
206 | _HEADERS = { | |
207 | 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36', | |
208 | } | |
209 | ||
210 | @staticmethod | |
211 | def _create_url(id): | |
212 | return f'https://audycje.tokfm.pl/audycja/{id}' | |
213 | ||
214 | def _real_extract(self, url): | |
215 | audition_id = self._match_id(url) | |
216 | ||
217 | data = self._download_json( | |
218 | f'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}', | |
219 | audition_id, 'Downloading audition metadata', headers=self._HEADERS) | |
220 | if not data: | |
221 | raise ExtractorError('No such audition', expected=True) | |
222 | data = data[0] | |
223 | ||
224 | entries = OnDemandPagedList(functools.partial( | |
225 | self._fetch_page, audition_id, data), self._PAGE_SIZE) | |
226 | ||
227 | return { | |
228 | '_type': 'playlist', | |
229 | 'id': audition_id, | |
230 | 'title': data.get('series_name'), | |
231 | 'series': data.get('series_name'), | |
232 | 'entries': entries, | |
233 | } | |
234 | ||
235 | def _fetch_page(self, audition_id, data, page): | |
236 | for retry in self.RetryManager(): | |
237 | podcast_page = self._download_json( | |
238 | f'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true', | |
239 | audition_id, f'Downloading podcast list page {page + 1}', headers=self._HEADERS) | |
240 | if not podcast_page: | |
241 | retry.error = ExtractorError('Agora returned empty page', expected=True) | |
242 | ||
243 | for podcast in podcast_page: | |
244 | yield { | |
245 | '_type': 'url_transparent', | |
246 | 'url': podcast['podcast_sharing_url'], | |
247 | 'ie_key': TokFMPodcastIE.ie_key(), | |
248 | 'title': podcast.get('podcast_name'), | |
249 | 'episode': podcast.get('podcast_name'), | |
250 | 'description': podcast.get('podcast_description'), | |
251 | 'timestamp': int_or_none(podcast.get('podcast_timestamp')), | |
252 | 'series': data.get('series_name'), | |
253 | } |