4 from .common
import InfoExtractor
15 class WyborczaVideoIE(InfoExtractor
):
16 # this id is not an article id, it has to be extracted from the article
17 _VALID_URL
= r
'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P<id>\d+)'
18 IE_NAME
= 'wyborcza:video'
20 'url': 'wyborcza:video:26207634',
24 'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar',
26 'uploader': 'Dorota Roman',
28 'thumbnail': r
're:https://.+\.jpg',
31 'url': 'https://wyborcza.pl/video/26207634',
32 'only_matching': True,
34 'url': 'https://wyborcza.pl/api-video/26207634',
35 'only_matching': True,
38 def _real_extract(self
, url
):
39 video_id
= self
._match
_id
(url
)
40 meta
= self
._download
_json
(f
'https://wyborcza.pl/api-video/{video_id}', video_id
)
43 base_url
= meta
['redirector'].replace('http://', 'https://') + meta
['basePath']
44 for quality
in ('standard', 'high'):
45 if not meta
['files'].get(quality
):
48 'url': base_url
+ meta
['files'][quality
],
49 'height': int_or_none(
51 r
'p(\d+)[a-z]+\.mp4$', meta
['files'][quality
],
52 'mp4 video height', default
=None)),
55 if meta
['files'].get('dash'):
56 formats
.extend(self
._extract
_mpd
_formats
(base_url
+ meta
['files']['dash'], video_id
))
58 self
._sort
_formats
(formats
)
62 'title': meta
.get('title'),
63 'description': meta
.get('lead'),
64 'uploader': meta
.get('signature'),
65 'thumbnail': meta
.get('imageUrl'),
66 'duration': meta
.get('duration'),
70 class WyborczaPodcastIE(InfoExtractor
):
72 https?://(?:www\.)?(?:
73 wyborcza\.pl/podcast(?:/0,172673\.html)?|
74 wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html
75 )(?:\?(?:[^&#]+?&)*podcast=(?P<id>\d+))?
78 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
82 'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ',
83 'uploader': 'Michał Nogaś ',
84 'upload_date': '20210117',
85 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
87 'thumbnail': r
're:https://.+\.jpg',
90 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
94 'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
95 'uploader': 'Agnieszka Urazińska ',
96 'upload_date': '20210115',
97 'description': 'md5:c161dc035f8dbb60077011fc41274899',
99 'thumbnail': r
're:https://.+\.jpg',
102 'url': 'https://wyborcza.pl/podcast',
105 'title': 'Gościnnie: Wyborcza, 8:10',
106 'series': 'Gościnnie: Wyborcza, 8:10',
108 'playlist_mincount': 370,
110 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
113 'title': 'Gościnnie: Wysokie Obcasy',
114 'series': 'Gościnnie: Wysokie Obcasy',
116 'playlist_mincount': 12,
119 def _real_extract(self
, url
):
120 podcast_id
= self
._match
_id
(url
)
122 if not podcast_id
: # playlist
123 podcast_id
= '395' if 'wysokieobcasy.pl/' in url
else '334'
124 return self
.url_result(TokFMAuditionIE
._create
_url
(podcast_id
), TokFMAuditionIE
, podcast_id
)
126 meta
= self
._download
_json
('https://wyborcza.pl/api/podcast', podcast_id
,
127 query
={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None}
)
129 day
, month
, year
= self
._search
_regex
(r
'^(\d\d?) (\w+) (\d{4})$', meta
.get('publishedDate'),
130 'upload date', group
=(1, 2, 3), default
=(None, None, None))
134 'title': meta
.get('title'),
135 'description': meta
.get('description'),
136 'thumbnail': meta
.get('imageUrl'),
137 'duration': parse_duration(meta
.get('duration')),
138 'uploader': meta
.get('author'),
139 'upload_date': try_call(lambda: f
'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'),
143 class TokFMPodcastIE(InfoExtractor
):
144 _VALID_URL
= r
'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P<id>\d+),?'
145 IE_NAME
= 'tokfm:podcast'
147 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych',
151 'title': 'md5:a9b15488009065556900169fb8061cce',
152 'episode': 'md5:a9b15488009065556900169fb8061cce',
157 def _real_extract(self
, url
):
158 media_id
= self
._match
_id
(url
)
160 # in case it breaks see this but it returns a lot of useless data
161 # https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true
162 metadata
= self
._download
_json
(
163 f
'https://audycje.tokfm.pl/getp/3{media_id}', media_id
, 'Downloading podcast metadata')
165 raise ExtractorError('No such podcast', expected
=True)
166 metadata
= metadata
[0]
169 for ext
in ('aac', 'mp3'):
170 url_data
= self
._download
_json
(
171 f
'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
172 media_id
, 'Downloading podcast %s URL' % ext
)
173 # prevents inserting the mp3 (default) multiple times
174 if 'link_ssl' in url_data
and f
'.{ext}' in url_data
['link_ssl']:
176 'url': url_data
['link_ssl'],
182 self
._sort
_formats
(formats
)
186 'title': metadata
.get('podcast_name'),
187 'series': metadata
.get('series_name'),
188 'episode': metadata
.get('podcast_name'),
192 class TokFMAuditionIE(InfoExtractor
):
193 _VALID_URL
= r
'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P<id>\d+),?'
194 IE_NAME
= 'tokfm:audition'
196 'url': 'https://audycje.tokfm.pl/audycja/218,Analizy',
202 'playlist_count': 1635,
207 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
212 return f
'https://audycje.tokfm.pl/audycja/{id}'
214 def _real_extract(self
, url
):
215 audition_id
= self
._match
_id
(url
)
217 data
= self
._download
_json
(
218 f
'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}',
219 audition_id
, 'Downloading audition metadata', headers
=self
._HEADERS
)
221 raise ExtractorError('No such audition', expected
=True)
224 entries
= OnDemandPagedList(functools
.partial(
225 self
._fetch
_page
, audition_id
, data
), self
._PAGE
_SIZE
)
230 'title': data
.get('series_name'),
231 'series': data
.get('series_name'),
235 def _fetch_page(self
, audition_id
, data
, page
):
236 for retry
in self
.RetryManager():
237 podcast_page
= self
._download
_json
(
238 f
'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true',
239 audition_id
, f
'Downloading podcast list page {page + 1}', headers
=self
._HEADERS
)
241 retry
.error
= ExtractorError('Agora returned empty page', expected
=True)
243 for podcast
in podcast_page
:
245 '_type': 'url_transparent',
246 'url': podcast
['podcast_sharing_url'],
247 'ie_key': TokFMPodcastIE
.ie_key(),
248 'title': podcast
.get('podcast_name'),
249 'episode': podcast
.get('podcast_name'),
250 'description': podcast
.get('podcast_description'),
251 'timestamp': int_or_none(podcast
.get('podcast_timestamp')),
252 'series': data
.get('series_name'),