]>
Commit | Line | Data |
---|---|---|
d9308378 | 1 | import itertools |
c15de6ff | 2 | import random |
29f400b9 TF |
3 | import re |
4 | ||
5137ebac | 5 | from .common import InfoExtractor |
6e3c2047 | 6 | from ..utils import ( |
e897bd82 | 7 | ExtractorError, |
728f4b5c | 8 | clean_html, |
d9308378 | 9 | determine_ext, |
c15de6ff | 10 | dict_get, |
c15de6ff LL |
11 | int_or_none, |
12 | js_to_json, | |
c15de6ff | 13 | str_or_none, |
728f4b5c | 14 | strip_or_none, |
15 | traverse_obj, | |
c15de6ff | 16 | try_get, |
728f4b5c | 17 | url_or_none, |
6e3c2047 | 18 | ) |
c3a3028f | 19 | |
5137ebac | 20 | |
6f8cb242 S |
21 | class TVPIE(InfoExtractor): |
22 | IE_NAME = 'tvp' | |
23 | IE_DESC = 'Telewizja Polska' | |
882e3b75 | 24 | _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)(?:[/?#]|$)' |
fb4b030a PH |
25 | |
26 | _TESTS = [{ | |
c15de6ff | 27 | # TVPlayer 2 in js wrapper |
728f4b5c | 28 | 'url': 'https://swipeto.pl/64095316/uliczny-foxtrot-wypozyczalnia-kaset-kto-pamieta-dvdvideo', |
fb4b030a | 29 | 'info_dict': { |
728f4b5c | 30 | 'id': '64095316', |
fb4b030a | 31 | 'ext': 'mp4', |
728f4b5c | 32 | 'title': 'Uliczny Foxtrot — Wypożyczalnia kaset. Kto pamięta DVD-Video?', |
33 | 'age_limit': 0, | |
34 | 'duration': 374, | |
35 | 'thumbnail': r're:https://.+', | |
fb4b030a | 36 | }, |
728f4b5c | 37 | 'expected_warnings': [ |
38 | 'Failed to download ISM manifest: HTTP Error 404: Not Found', | |
39 | 'Failed to download m3u8 information: HTTP Error 404: Not Found', | |
40 | ], | |
fb4b030a | 41 | }, { |
c15de6ff | 42 | # TVPlayer legacy |
728f4b5c | 43 | 'url': 'https://www.tvp.pl/polska-press-video-uploader/wideo/62042351', |
fb4b030a | 44 | 'info_dict': { |
728f4b5c | 45 | 'id': '62042351', |
fb4b030a | 46 | 'ext': 'mp4', |
728f4b5c | 47 | 'title': 'Wideo', |
48 | 'description': 'Wideo Kamera', | |
49 | 'duration': 24, | |
50 | 'age_limit': 0, | |
51 | 'thumbnail': r're:https://.+', | |
fdd0b8f8 RA |
52 | }, |
53 | }, { | |
c15de6ff LL |
54 | # TVPlayer 2 in iframe |
55 | 'url': 'https://wiadomosci.tvp.pl/50725617/dzieci-na-sprzedaz-dla-homoseksualistow', | |
56 | 'info_dict': { | |
57 | 'id': '50725617', | |
58 | 'ext': 'mp4', | |
59 | 'title': 'Dzieci na sprzedaż dla homoseksualistów', | |
60 | 'description': 'md5:7d318eef04e55ddd9f87a8488ac7d590', | |
61 | 'age_limit': 12, | |
728f4b5c | 62 | 'duration': 259, |
63 | 'thumbnail': r're:https://.+', | |
c15de6ff LL |
64 | }, |
65 | }, { | |
66 | # TVPlayer 2 in client-side rendered website (regional; window.__newsData) | |
67 | 'url': 'https://warszawa.tvp.pl/25804446/studio-yayo', | |
68 | 'info_dict': { | |
69 | 'id': '25804446', | |
70 | 'ext': 'mp4', | |
71 | 'title': 'Studio Yayo', | |
72 | 'upload_date': '20160616', | |
73 | 'timestamp': 1466075700, | |
728f4b5c | 74 | 'age_limit': 0, |
75 | 'duration': 20, | |
76 | 'thumbnail': r're:https://.+', | |
77 | }, | |
78 | 'skip': 'Geo-blocked outside PL', | |
c15de6ff LL |
79 | }, { |
80 | # TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData) | |
81 | 'url': 'https://www.tvp.info/52880236/09042021-0800', | |
82 | 'info_dict': { | |
83 | 'id': '52880236', | |
84 | 'ext': 'mp4', | |
85 | 'title': '09.04.2021, 08:00', | |
728f4b5c | 86 | 'age_limit': 0, |
87 | 'thumbnail': r're:https://.+', | |
c15de6ff | 88 | }, |
728f4b5c | 89 | 'skip': 'Geo-blocked outside PL', |
c15de6ff LL |
90 | }, { |
91 | # client-side rendered (regional) program (playlist) page | |
92 | 'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia', | |
93 | 'info_dict': { | |
94 | 'id': '9660819', | |
95 | 'description': 'Od poniedziałku do piątku o 18:55', | |
96 | 'title': 'Rozmowa dnia', | |
97 | }, | |
98 | 'playlist_mincount': 1800, | |
99 | 'params': { | |
100 | 'skip_download': True, | |
add96eb9 | 101 | }, |
c15de6ff LL |
102 | }, { |
103 | # ABC-specific video embeding | |
104 | # moved to https://bajkowakraina.tvp.pl/wideo/50981130,teleranek,51027049,zubr,51116450 | |
105 | 'url': 'https://abc.tvp.pl/48636269/zubry-odc-124', | |
fdd0b8f8 | 106 | 'info_dict': { |
c15de6ff | 107 | 'id': '48320456', |
fdd0b8f8 | 108 | 'ext': 'mp4', |
c15de6ff LL |
109 | 'title': 'Teleranek, Żubr', |
110 | }, | |
111 | 'skip': 'unavailable', | |
112 | }, { | |
113 | # yet another vue page | |
114 | 'url': 'https://jp2.tvp.pl/46925618/filmy', | |
115 | 'info_dict': { | |
116 | 'id': '46925618', | |
117 | 'title': 'Filmy', | |
fb4b030a | 118 | }, |
c15de6ff | 119 | 'playlist_mincount': 19, |
fb4b030a PH |
120 | }, { |
121 | 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', | |
6f8cb242 S |
122 | 'only_matching': True, |
123 | }, { | |
124 | 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', | |
125 | 'only_matching': True, | |
126 | }, { | |
127 | 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', | |
128 | 'only_matching': True, | |
129 | }, { | |
130 | 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', | |
131 | 'only_matching': True, | |
132 | }, { | |
133 | 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', | |
134 | 'only_matching': True, | |
135 | }, { | |
136 | 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', | |
137 | 'only_matching': True, | |
c15de6ff LL |
138 | }, { |
139 | 'url': 'https://tvp.info/49193823/teczowe-flagi-na-pomnikach-prokuratura-wszczela-postepowanie-wieszwiecej', | |
140 | 'only_matching': True, | |
141 | }, { | |
142 | 'url': 'https://www.tvpparlament.pl/retransmisje-vod/inne/wizyta-premiera-mateusza-morawieckiego-w-firmie-berotu-sp-z-oo/48857277', | |
143 | 'only_matching': True, | |
144 | }, { | |
728f4b5c | 145 | 'url': 'https://tvpworld.com/48583640/tescos-polish-business-bought-by-danish-chain-netto', |
c15de6ff | 146 | 'only_matching': True, |
fb4b030a | 147 | }] |
5137ebac | 148 | |
c15de6ff LL |
149 | def _parse_vue_website_data(self, webpage, page_id): |
150 | website_data = self._search_regex([ | |
151 | # website - regiony, tvp.info | |
152 | # directory - jp2.tvp.pl | |
153 | r'window\.__(?:website|directory)Data\s*=\s*({(?:.|\s)+?});', | |
154 | ], webpage, 'website data') | |
155 | if not website_data: | |
156 | return None | |
157 | return self._parse_json(website_data, page_id, transform_source=js_to_json) | |
158 | ||
159 | def _extract_vue_video(self, video_data, page_id=None): | |
160 | if isinstance(video_data, str): | |
161 | video_data = self._parse_json(video_data, page_id, transform_source=js_to_json) | |
162 | thumbnails = [] | |
163 | image = video_data.get('image') | |
164 | if image: | |
165 | for thumb in (image if isinstance(image, list) else [image]): | |
166 | thmb_url = str_or_none(thumb.get('url')) | |
167 | if thmb_url: | |
168 | thumbnails.append({ | |
169 | 'url': thmb_url, | |
170 | }) | |
171 | is_website = video_data.get('type') == 'website' | |
172 | if is_website: | |
173 | url = video_data['url'] | |
c15de6ff LL |
174 | else: |
175 | url = 'tvp:' + str_or_none(video_data.get('_id') or page_id) | |
176 | return { | |
177 | '_type': 'url_transparent', | |
178 | 'id': str_or_none(video_data.get('_id') or page_id), | |
179 | 'url': url, | |
728f4b5c | 180 | 'ie_key': (TVPIE if is_website else TVPEmbedIE).ie_key(), |
c15de6ff LL |
181 | 'title': str_or_none(video_data.get('title')), |
182 | 'description': str_or_none(video_data.get('lead')), | |
183 | 'timestamp': int_or_none(video_data.get('release_date_long')), | |
184 | 'duration': int_or_none(video_data.get('duration')), | |
185 | 'thumbnails': thumbnails, | |
186 | } | |
187 | ||
188 | def _handle_vuejs_page(self, url, webpage, page_id): | |
189 | # vue client-side rendered sites (all regional pages + tvp.info) | |
190 | video_data = self._search_regex([ | |
191 | r'window\.__(?:news|video)Data\s*=\s*({(?:.|\s)+?})\s*;', | |
192 | ], webpage, 'video data', default=None) | |
193 | if video_data: | |
194 | return self._extract_vue_video(video_data, page_id=page_id) | |
195 | # paged playlists | |
196 | website_data = self._parse_vue_website_data(webpage, page_id) | |
197 | if website_data: | |
198 | entries = self._vuejs_entries(url, website_data, page_id) | |
199 | ||
200 | return { | |
201 | '_type': 'playlist', | |
202 | 'id': page_id, | |
203 | 'title': str_or_none(website_data.get('title')), | |
204 | 'description': str_or_none(website_data.get('lead')), | |
205 | 'entries': entries, | |
206 | } | |
207 | raise ExtractorError('Could not extract video/website data') | |
208 | ||
209 | def _vuejs_entries(self, url, website_data, page_id): | |
210 | ||
211 | def extract_videos(wd): | |
212 | if wd.get('latestVideo'): | |
213 | yield self._extract_vue_video(wd['latestVideo']) | |
214 | for video in wd.get('videos') or []: | |
215 | yield self._extract_vue_video(video) | |
216 | for video in wd.get('items') or []: | |
217 | yield self._extract_vue_video(video) | |
218 | ||
219 | yield from extract_videos(website_data) | |
220 | ||
221 | if website_data.get('items_total_count') > website_data.get('items_per_page'): | |
222 | for page in itertools.count(2): | |
223 | page_website_data = self._parse_vue_website_data( | |
add96eb9 | 224 | self._download_webpage(url, page_id, note=f'Downloading page #{page}', |
c15de6ff LL |
225 | query={'page': page}), |
226 | page_id) | |
227 | if not page_website_data.get('videos') and not page_website_data.get('items'): | |
228 | break | |
229 | yield from extract_videos(page_website_data) | |
230 | ||
fdd0b8f8 RA |
231 | def _real_extract(self, url): |
232 | page_id = self._match_id(url) | |
c15de6ff LL |
233 | webpage, urlh = self._download_webpage_handle(url, page_id) |
234 | ||
235 | # The URL may redirect to a VOD | |
236 | # example: https://vod.tvp.pl/48463890/wadowickie-spotkania-z-janem-pawlem-ii | |
728f4b5c | 237 | for ie_cls in (TVPVODSeriesIE, TVPVODVideoIE): |
238 | if ie_cls.suitable(urlh.url): | |
239 | return self.url_result(urlh.url, ie=ie_cls.ie_key(), video_id=page_id) | |
c15de6ff LL |
240 | |
241 | if re.search( | |
242 | r'window\.__(?:video|news|website|directory)Data\s*=', | |
243 | webpage): | |
244 | return self._handle_vuejs_page(url, webpage, page_id) | |
245 | ||
246 | # classic server-side rendered sites | |
fdd0b8f8 | 247 | video_id = self._search_regex([ |
c15de6ff | 248 | r'<iframe[^>]+src="[^"]*?embed\.php\?(?:[^&]+&)*ID=(\d+)', |
fdd0b8f8 | 249 | r'<iframe[^>]+src="[^"]*?object_id=(\d+)', |
3d8d44c7 | 250 | r"object_id\s*:\s*'(\d+)'", |
c15de6ff LL |
251 | r'data-video-id="(\d+)"', |
252 | ||
253 | # abc.tvp.pl - somehow there are more than one video IDs that seem to be the same video? | |
254 | # the first one is referenced to as "copyid", and seems to be unused by the website | |
255 | r'<script>\s*tvpabc\.video\.init\(\s*\d+,\s*(\d+)\s*\)\s*</script>', | |
256 | ], webpage, 'video id', default=page_id) | |
fdd0b8f8 RA |
257 | return { |
258 | '_type': 'url_transparent', | |
259 | 'url': 'tvp:' + video_id, | |
3c964737 | 260 | 'description': self._og_search_description( |
c15de6ff LL |
261 | webpage, default=None) or (self._html_search_meta( |
262 | 'description', webpage, default=None) | |
263 | if '//s.tvp.pl/files/portal/v' in webpage else None), | |
3c964737 | 264 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), |
fdd0b8f8 RA |
265 | 'ie_key': 'TVPEmbed', |
266 | } | |
267 | ||
268 | ||
ebfab36f LL |
269 | class TVPStreamIE(InfoExtractor): |
270 | IE_NAME = 'tvp:stream' | |
a31d0fa6 | 271 | _VALID_URL = r'(?:tvpstream:|https?://(?:tvpstream\.vod|stream)\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)' |
ebfab36f | 272 | _TESTS = [{ |
a31d0fa6 | 273 | 'url': 'https://stream.tvp.pl/?channel_id=56969941', |
274 | 'only_matching': True, | |
275 | }, { | |
ebfab36f LL |
276 | # untestable as "video" id changes many times across a day |
277 | 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', | |
278 | 'only_matching': True, | |
279 | }, { | |
280 | 'url': 'tvpstream:39821455', | |
281 | 'only_matching': True, | |
282 | }, { | |
283 | # the default stream when you provide no channel_id, most probably TVP Info | |
284 | 'url': 'tvpstream:', | |
285 | 'only_matching': True, | |
286 | }, { | |
287 | 'url': 'https://tvpstream.vod.tvp.pl/', | |
288 | 'only_matching': True, | |
289 | }] | |
290 | ||
ebfab36f LL |
291 | def _real_extract(self, url): |
292 | channel_id = self._match_id(url) | |
add96eb9 | 293 | channel_url = self._proto_relative_url(f'//stream.tvp.pl/?channel_id={channel_id}' or 'default') |
a31d0fa6 | 294 | webpage = self._download_webpage(channel_url, channel_id or 'default', 'Downloading channel webpage') |
295 | channels = self._search_json( | |
296 | r'window\.__channels\s*=', webpage, 'channel list', channel_id, | |
297 | contains_pattern=r'\[\s*{(?s:.+)}\s*]') | |
298 | channel = traverse_obj(channels, (lambda _, v: channel_id == str(v['id'])), get_all=False) if channel_id else channels[0] | |
299 | audition = traverse_obj(channel, ('items', lambda _, v: v['is_live'] is True), get_all=False) | |
ebfab36f LL |
300 | return { |
301 | '_type': 'url_transparent', | |
a31d0fa6 | 302 | 'id': channel_id or channel['id'], |
add96eb9 | 303 | 'url': 'tvp:{}'.format(audition['video_id']), |
a31d0fa6 | 304 | 'title': audition.get('title'), |
305 | 'alt_title': channel.get('title'), | |
ebfab36f LL |
306 | 'is_live': True, |
307 | 'ie_key': 'TVPEmbed', | |
308 | } | |
309 | ||
310 | ||
fdd0b8f8 RA |
311 | class TVPEmbedIE(InfoExtractor): |
312 | IE_NAME = 'tvp:embed' | |
313 | IE_DESC = 'Telewizja Polska' | |
728f4b5c | 314 | _GEO_BYPASS = False |
56bb56f3 LL |
315 | _VALID_URL = r'''(?x) |
316 | (?: | |
317 | tvp: | |
318 | |https?:// | |
319 | (?:[^/]+\.)? | |
728f4b5c | 320 | (?:tvp(?:parlament)?\.pl|tvp\.info|tvpworld\.com|swipeto\.pl)/ |
56bb56f3 LL |
321 | (?:sess/ |
322 | (?:tvplayer\.php\?.*?object_id | |
323 | |TVPlayer2/(?:embed|api)\.php\?.*[Ii][Dd]) | |
324 | |shared/details\.php\?.*?object_id) | |
325 | =) | |
326 | (?P<id>\d+) | |
327 | ''' | |
bfd973ec | 328 | _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL[4:]})'] |
fdd0b8f8 RA |
329 | |
330 | _TESTS = [{ | |
3c964737 | 331 | 'url': 'tvp:194536', |
3c964737 S |
332 | 'info_dict': { |
333 | 'id': '194536', | |
334 | 'ext': 'mp4', | |
335 | 'title': 'Czas honoru, odc. 13 – Władek', | |
56bb56f3 LL |
336 | 'description': 'md5:76649d2014f65c99477be17f23a4dead', |
337 | 'age_limit': 12, | |
728f4b5c | 338 | 'duration': 2652, |
339 | 'series': 'Czas honoru', | |
340 | 'episode': 'Episode 13', | |
341 | 'episode_number': 13, | |
342 | 'season': 'sezon 1', | |
343 | 'thumbnail': r're:https://.+', | |
3c964737 S |
344 | }, |
345 | }, { | |
56bb56f3 | 346 | 'url': 'https://www.tvp.pl/sess/tvplayer.php?object_id=51247504&autoplay=false', |
fdd0b8f8 | 347 | 'info_dict': { |
56bb56f3 | 348 | 'id': '51247504', |
fdd0b8f8 | 349 | 'ext': 'mp4', |
56bb56f3 | 350 | 'title': 'Razmova 091220', |
728f4b5c | 351 | 'duration': 876, |
352 | 'age_limit': 0, | |
353 | 'thumbnail': r're:https://.+', | |
fdd0b8f8 RA |
354 | }, |
355 | }, { | |
56bb56f3 LL |
356 | # TVPlayer2 embed URL |
357 | 'url': 'https://tvp.info/sess/TVPlayer2/embed.php?ID=50595757', | |
358 | 'only_matching': True, | |
359 | }, { | |
360 | 'url': 'https://wiadomosci.tvp.pl/sess/TVPlayer2/api.php?id=51233452', | |
361 | 'only_matching': True, | |
362 | }, { | |
363 | # pulsembed on dziennik.pl | |
364 | 'url': 'https://www.tvp.pl/shared/details.php?copy_id=52205981&object_id=52204505&autoplay=false&is_muted=false&allowfullscreen=true&template=external-embed/video/iframe-video.html', | |
fdd0b8f8 RA |
365 | 'only_matching': True, |
366 | }] | |
367 | ||
5137ebac | 368 | def _real_extract(self, url): |
fb4b030a | 369 | video_id = self._match_id(url) |
030aa5d9 | 370 | |
56bb56f3 LL |
371 | # it could be anything that is a valid JS function name |
372 | callback = random.choice(( | |
373 | 'jebac_pis', | |
374 | 'jebacpis', | |
375 | 'ziobro', | |
376 | 'sasin70', | |
377 | 'sasin_przejebal_70_milionow_PLN', | |
378 | 'tvp_is_a_state_propaganda_service', | |
379 | )) | |
380 | ||
29f400b9 | 381 | webpage = self._download_webpage( |
add96eb9 | 382 | f'https://www.tvp.pl/sess/TVPlayer2/api.php?id={video_id}&@method=getTvpConfig&@callback={callback}', video_id) |
56bb56f3 LL |
383 | |
384 | # stripping JSONP padding | |
385 | datastr = webpage[15 + len(callback):-3] | |
386 | if datastr.startswith('null,'): | |
728f4b5c | 387 | error = self._parse_json(datastr[5:], video_id, fatal=False) |
388 | error_desc = traverse_obj(error, (0, 'desc')) | |
389 | ||
390 | if error_desc == 'Obiekt wymaga płatności': | |
391 | raise ExtractorError('Video requires payment and log-in, but log-in is not implemented') | |
392 | ||
393 | raise ExtractorError(error_desc or 'unexpected JSON error') | |
56bb56f3 LL |
394 | |
395 | content = self._parse_json(datastr, video_id)['content'] | |
396 | info = content['info'] | |
397 | is_live = try_get(info, lambda x: x['isLive'], bool) | |
29f400b9 | 398 | |
728f4b5c | 399 | if info.get('isGeoBlocked'): |
400 | # actual country list is not provided, we just assume it's always available in PL | |
401 | self.raise_geo_restricted(countries=['PL']) | |
402 | ||
6e3c2047 | 403 | formats = [] |
56bb56f3 | 404 | for file in content['files']: |
728f4b5c | 405 | video_url = url_or_none(file.get('url')) |
56bb56f3 LL |
406 | if not video_url: |
407 | continue | |
728f4b5c | 408 | ext = determine_ext(video_url, None) |
409 | if ext == 'm3u8': | |
56bb56f3 | 410 | formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False, live=is_live)) |
728f4b5c | 411 | elif ext == 'mpd': |
56bb56f3 LL |
412 | if is_live: |
413 | # doesn't work with either ffmpeg or native downloader | |
414 | continue | |
415 | formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False)) | |
728f4b5c | 416 | elif ext == 'f4m': |
56bb56f3 LL |
417 | formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False)) |
418 | elif video_url.endswith('.ism/manifest'): | |
419 | formats.extend(self._extract_ism_formats(video_url, video_id, ism_id='mss', fatal=False)) | |
420 | else: | |
56bb56f3 LL |
421 | formats.append({ |
422 | 'format_id': 'direct', | |
423 | 'url': video_url, | |
728f4b5c | 424 | 'ext': ext or file.get('type'), |
425 | 'fps': int_or_none(traverse_obj(file, ('quality', 'fps'))), | |
426 | 'tbr': int_or_none(traverse_obj(file, ('quality', 'bitrate')), scale=1000), | |
427 | 'width': int_or_none(traverse_obj(file, ('quality', 'width'))), | |
428 | 'height': int_or_none(traverse_obj(file, ('quality', 'height'))), | |
56bb56f3 | 429 | }) |
fb4b030a | 430 | |
56bb56f3 LL |
431 | title = dict_get(info, ('subtitle', 'title', 'seoTitle')) |
432 | description = dict_get(info, ('description', 'seoDescription')) | |
433 | thumbnails = [] | |
434 | for thumb in content.get('posters') or (): | |
435 | thumb_url = thumb.get('src') | |
436 | if not thumb_url or '{width}' in thumb_url or '{height}' in thumb_url: | |
437 | continue | |
438 | thumbnails.append({ | |
439 | 'url': thumb.get('src'), | |
440 | 'width': thumb.get('width'), | |
441 | 'height': thumb.get('height'), | |
442 | }) | |
443 | age_limit = try_get(info, lambda x: x['ageGroup']['minAge'], int) | |
444 | if age_limit == 1: | |
445 | age_limit = 0 | |
446 | duration = try_get(info, lambda x: x['duration'], int) if not is_live else None | |
447 | ||
448 | subtitles = {} | |
449 | for sub in content.get('subtitles') or []: | |
450 | if not sub.get('url'): | |
451 | continue | |
452 | subtitles.setdefault(sub['lang'], []).append({ | |
453 | 'url': sub['url'], | |
454 | 'ext': sub.get('type'), | |
455 | }) | |
456 | ||
457 | info_dict = { | |
fb4b030a PH |
458 | 'id': video_id, |
459 | 'title': title, | |
56bb56f3 LL |
460 | 'description': description, |
461 | 'thumbnails': thumbnails, | |
462 | 'age_limit': age_limit, | |
463 | 'is_live': is_live, | |
464 | 'duration': duration, | |
fb4b030a | 465 | 'formats': formats, |
56bb56f3 | 466 | 'subtitles': subtitles, |
fb4b030a | 467 | } |
6ce2c678 | 468 | |
56bb56f3 LL |
469 | # vod.tvp.pl |
470 | if info.get('vortalName') == 'vod': | |
471 | info_dict.update({ | |
add96eb9 | 472 | 'title': '{}, {}'.format(info.get('title'), info.get('subtitle')), |
56bb56f3 LL |
473 | 'series': info.get('title'), |
474 | 'season': info.get('season'), | |
475 | 'episode_number': info.get('episode'), | |
476 | }) | |
477 | ||
478 | return info_dict | |
479 | ||
6ce2c678 | 480 | |
728f4b5c | 481 | class TVPVODBaseIE(InfoExtractor): |
482 | _API_BASE_URL = 'https://vod.tvp.pl/api/products' | |
483 | ||
0c7ce146 | 484 | def _call_api(self, resource, video_id, query={}, **kwargs): |
485 | is_valid = lambda x: 200 <= x < 300 | |
486 | document, urlh = self._download_json_handle( | |
728f4b5c | 487 | f'{self._API_BASE_URL}/{resource}', video_id, |
0c7ce146 | 488 | query={'lang': 'pl', 'platform': 'BROWSER', **query}, |
489 | expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs) | |
3d2623a8 | 490 | if is_valid(urlh.status): |
0c7ce146 | 491 | return document |
3d2623a8 | 492 | raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})') |
0c7ce146 | 493 | |
494 | def _parse_video(self, video, with_url=True): | |
495 | info_dict = traverse_obj(video, { | |
496 | 'id': ('id', {str_or_none}), | |
497 | 'title': 'title', | |
498 | 'age_limit': ('rating', {int_or_none}), | |
499 | 'duration': ('duration', {int_or_none}), | |
500 | 'episode_number': ('number', {int_or_none}), | |
501 | 'series': ('season', 'serial', 'title', {str_or_none}), | |
502 | 'thumbnails': ('images', ..., ..., {'url': ('url', {url_or_none})}), | |
503 | }) | |
504 | info_dict['description'] = clean_html(dict_get(video, ('lead', 'description'))) | |
505 | if with_url: | |
506 | info_dict.update({ | |
507 | '_type': 'url', | |
508 | 'url': video['webUrl'], | |
509 | 'ie_key': TVPVODVideoIE.ie_key(), | |
510 | }) | |
511 | return info_dict | |
728f4b5c | 512 | |
513 | ||
514 | class TVPVODVideoIE(TVPVODBaseIE): | |
515 | IE_NAME = 'tvp:vod' | |
882e3b75 | 516 | _VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)' |
6ce2c678 | 517 | |
fb4b030a | 518 | _TESTS = [{ |
728f4b5c | 519 | 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357', |
fb4b030a | 520 | 'info_dict': { |
0c7ce146 | 521 | 'id': '311357', |
728f4b5c | 522 | 'ext': 'mp4', |
0c7ce146 | 523 | 'title': 'Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24', |
728f4b5c | 524 | 'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c', |
525 | 'duration': 300, | |
526 | 'episode_number': 24, | |
527 | 'episode': 'Episode 24', | |
528 | 'age_limit': 0, | |
529 | 'series': 'Laboratorium alchemika', | |
0c7ce146 | 530 | 'thumbnail': 're:https?://.+', |
fb4b030a | 531 | }, |
0c7ce146 | 532 | 'params': {'skip_download': 'm3u8'}, |
388cfbd3 | 533 | }, { |
728f4b5c | 534 | 'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667', |
388cfbd3 | 535 | 'info_dict': { |
0c7ce146 | 536 | 'id': '339667', |
388cfbd3 | 537 | 'ext': 'mp4', |
0c7ce146 | 538 | 'title': 'Ukraiński sługa narodu', |
728f4b5c | 539 | 'description': 'md5:b7940c0a8e439b0c81653a986f544ef3', |
c15de6ff | 540 | 'age_limit': 12, |
728f4b5c | 541 | 'duration': 3051, |
0c7ce146 | 542 | 'thumbnail': 're:https?://.+', |
543 | 'subtitles': 'count:2', | |
388cfbd3 | 544 | }, |
0c7ce146 | 545 | 'params': {'skip_download': 'm3u8'}, |
546 | }, { | |
547 | 'note': 'embed fails with "payment required"', | |
548 | 'url': 'https://vod.tvp.pl/seriale,18/polowanie-na-cmy-odcinki,390116/odcinek-7,S01E07,398869', | |
549 | 'info_dict': { | |
550 | 'id': '398869', | |
551 | 'ext': 'mp4', | |
552 | 'title': 'odc. 7', | |
553 | 'description': 'md5:dd2bb33f023dc5c2fbaddfbe4cb5dba0', | |
554 | 'duration': 2750, | |
555 | 'age_limit': 16, | |
556 | 'series': 'Polowanie na ćmy', | |
557 | 'episode_number': 7, | |
558 | 'episode': 'Episode 7', | |
559 | 'thumbnail': 're:https?://.+', | |
560 | }, | |
561 | 'params': {'skip_download': 'm3u8'}, | |
882e3b75 | 562 | }, { |
563 | 'url': 'https://vod.tvp.pl/live,1/tvp-world,399731', | |
564 | 'info_dict': { | |
565 | 'id': '399731', | |
566 | 'ext': 'mp4', | |
567 | 'title': r're:TVP WORLD \d{4}-\d{2}-\d{2} \d{2}:\d{2}', | |
568 | 'live_status': 'is_live', | |
569 | 'thumbnail': 're:https?://.+', | |
570 | }, | |
fb4b030a | 571 | }] |
6ce2c678 | 572 | |
728f4b5c | 573 | def _real_extract(self, url): |
882e3b75 | 574 | category, video_id = self._match_valid_url(url).group('category', 'id') |
728f4b5c | 575 | |
882e3b75 | 576 | is_live = category == 'live,1' |
577 | entity = 'lives' if is_live else 'vods' | |
578 | info_dict = self._parse_video(self._call_api(f'{entity}/{video_id}', video_id), with_url=False) | |
0c7ce146 | 579 | |
580 | playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'}) | |
581 | ||
582 | info_dict['formats'] = [] | |
583 | for manifest_url in traverse_obj(playlist, ('sources', 'HLS', ..., 'src')): | |
584 | info_dict['formats'].extend(self._extract_m3u8_formats(manifest_url, video_id, fatal=False)) | |
585 | for manifest_url in traverse_obj(playlist, ('sources', 'DASH', ..., 'src')): | |
586 | info_dict['formats'].extend(self._extract_mpd_formats(manifest_url, video_id, fatal=False)) | |
587 | ||
588 | info_dict['subtitles'] = {} | |
589 | for sub in playlist.get('subtitles') or []: | |
590 | info_dict['subtitles'].setdefault(sub.get('language') or 'und', []).append({ | |
591 | 'url': sub['url'], | |
592 | 'ext': 'ttml', | |
593 | }) | |
594 | ||
882e3b75 | 595 | info_dict['is_live'] = is_live |
596 | ||
0c7ce146 | 597 | return info_dict |
fb4b030a | 598 | |
d9308378 | 599 | |
728f4b5c | 600 | class TVPVODSeriesIE(TVPVODBaseIE): |
601 | IE_NAME = 'tvp:vod:series' | |
602 | _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+-odcinki,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$' | |
603 | ||
604 | _TESTS = [{ | |
605 | 'url': 'https://vod.tvp.pl/seriale,18/ranczo-odcinki,316445', | |
606 | 'info_dict': { | |
607 | 'id': '316445', | |
608 | 'title': 'Ranczo', | |
609 | 'age_limit': 12, | |
610 | 'categories': ['seriale'], | |
611 | }, | |
0c7ce146 | 612 | 'playlist_count': 130, |
728f4b5c | 613 | }, { |
614 | 'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514', | |
615 | 'only_matching': True, | |
616 | }, { | |
617 | 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338', | |
618 | 'only_matching': True, | |
619 | }] | |
d9308378 | 620 | |
728f4b5c | 621 | def _entries(self, seasons, playlist_id): |
622 | for season in seasons: | |
623 | episodes = self._call_api( | |
624 | f'vods/serials/{playlist_id}/seasons/{season["id"]}/episodes', playlist_id, | |
625 | note=f'Downloading episode list for {season["title"]}') | |
626 | yield from map(self._parse_video, episodes) | |
d9308378 S |
627 | |
628 | def _real_extract(self, url): | |
728f4b5c | 629 | playlist_id = self._match_id(url) |
630 | metadata = self._call_api( | |
631 | f'vods/serials/{playlist_id}', playlist_id, | |
632 | note='Downloading serial metadata') | |
633 | seasons = self._call_api( | |
634 | f'vods/serials/{playlist_id}/seasons', playlist_id, | |
635 | note='Downloading season list') | |
388cfbd3 | 636 | return self.playlist_result( |
728f4b5c | 637 | self._entries(seasons, playlist_id), playlist_id, strip_or_none(metadata.get('title')), |
638 | clean_html(traverse_obj(metadata, ('description', 'lead'), expected_type=strip_or_none)), | |
639 | categories=[traverse_obj(metadata, ('mainCategory', 'name'))], | |
640 | age_limit=int_or_none(metadata.get('rating')), | |
641 | ) |