]>
Commit | Line | Data |
---|---|---|
1 | import itertools | |
2 | import random | |
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | clean_html, | |
8 | determine_ext, | |
9 | dict_get, | |
10 | ExtractorError, | |
11 | int_or_none, | |
12 | js_to_json, | |
13 | str_or_none, | |
14 | strip_or_none, | |
15 | traverse_obj, | |
16 | try_get, | |
17 | url_or_none, | |
18 | ) | |
19 | ||
20 | ||
21 | class TVPIE(InfoExtractor): | |
22 | IE_NAME = 'tvp' | |
23 | IE_DESC = 'Telewizja Polska' | |
24 | _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)(?:[/?#]|$)' | |
25 | ||
26 | _TESTS = [{ | |
27 | # TVPlayer 2 in js wrapper | |
28 | 'url': 'https://swipeto.pl/64095316/uliczny-foxtrot-wypozyczalnia-kaset-kto-pamieta-dvdvideo', | |
29 | 'info_dict': { | |
30 | 'id': '64095316', | |
31 | 'ext': 'mp4', | |
32 | 'title': 'Uliczny Foxtrot — Wypożyczalnia kaset. Kto pamięta DVD-Video?', | |
33 | 'age_limit': 0, | |
34 | 'duration': 374, | |
35 | 'thumbnail': r're:https://.+', | |
36 | }, | |
37 | 'expected_warnings': [ | |
38 | 'Failed to download ISM manifest: HTTP Error 404: Not Found', | |
39 | 'Failed to download m3u8 information: HTTP Error 404: Not Found', | |
40 | ], | |
41 | }, { | |
42 | # TVPlayer legacy | |
43 | 'url': 'https://www.tvp.pl/polska-press-video-uploader/wideo/62042351', | |
44 | 'info_dict': { | |
45 | 'id': '62042351', | |
46 | 'ext': 'mp4', | |
47 | 'title': 'Wideo', | |
48 | 'description': 'Wideo Kamera', | |
49 | 'duration': 24, | |
50 | 'age_limit': 0, | |
51 | 'thumbnail': r're:https://.+', | |
52 | }, | |
53 | }, { | |
54 | # TVPlayer 2 in iframe | |
55 | 'url': 'https://wiadomosci.tvp.pl/50725617/dzieci-na-sprzedaz-dla-homoseksualistow', | |
56 | 'info_dict': { | |
57 | 'id': '50725617', | |
58 | 'ext': 'mp4', | |
59 | 'title': 'Dzieci na sprzedaż dla homoseksualistów', | |
60 | 'description': 'md5:7d318eef04e55ddd9f87a8488ac7d590', | |
61 | 'age_limit': 12, | |
62 | 'duration': 259, | |
63 | 'thumbnail': r're:https://.+', | |
64 | }, | |
65 | }, { | |
66 | # TVPlayer 2 in client-side rendered website (regional; window.__newsData) | |
67 | 'url': 'https://warszawa.tvp.pl/25804446/studio-yayo', | |
68 | 'info_dict': { | |
69 | 'id': '25804446', | |
70 | 'ext': 'mp4', | |
71 | 'title': 'Studio Yayo', | |
72 | 'upload_date': '20160616', | |
73 | 'timestamp': 1466075700, | |
74 | 'age_limit': 0, | |
75 | 'duration': 20, | |
76 | 'thumbnail': r're:https://.+', | |
77 | }, | |
78 | 'skip': 'Geo-blocked outside PL', | |
79 | }, { | |
80 | # TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData) | |
81 | 'url': 'https://www.tvp.info/52880236/09042021-0800', | |
82 | 'info_dict': { | |
83 | 'id': '52880236', | |
84 | 'ext': 'mp4', | |
85 | 'title': '09.04.2021, 08:00', | |
86 | 'age_limit': 0, | |
87 | 'thumbnail': r're:https://.+', | |
88 | }, | |
89 | 'skip': 'Geo-blocked outside PL', | |
90 | }, { | |
91 | # client-side rendered (regional) program (playlist) page | |
92 | 'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia', | |
93 | 'info_dict': { | |
94 | 'id': '9660819', | |
95 | 'description': 'Od poniedziałku do piątku o 18:55', | |
96 | 'title': 'Rozmowa dnia', | |
97 | }, | |
98 | 'playlist_mincount': 1800, | |
99 | 'params': { | |
100 | 'skip_download': True, | |
101 | } | |
102 | }, { | |
103 | # ABC-specific video embeding | |
104 | # moved to https://bajkowakraina.tvp.pl/wideo/50981130,teleranek,51027049,zubr,51116450 | |
105 | 'url': 'https://abc.tvp.pl/48636269/zubry-odc-124', | |
106 | 'info_dict': { | |
107 | 'id': '48320456', | |
108 | 'ext': 'mp4', | |
109 | 'title': 'Teleranek, Żubr', | |
110 | }, | |
111 | 'skip': 'unavailable', | |
112 | }, { | |
113 | # yet another vue page | |
114 | 'url': 'https://jp2.tvp.pl/46925618/filmy', | |
115 | 'info_dict': { | |
116 | 'id': '46925618', | |
117 | 'title': 'Filmy', | |
118 | }, | |
119 | 'playlist_mincount': 19, | |
120 | }, { | |
121 | 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', | |
122 | 'only_matching': True, | |
123 | }, { | |
124 | 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', | |
125 | 'only_matching': True, | |
126 | }, { | |
127 | 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', | |
128 | 'only_matching': True, | |
129 | }, { | |
130 | 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', | |
131 | 'only_matching': True, | |
132 | }, { | |
133 | 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', | |
134 | 'only_matching': True, | |
135 | }, { | |
136 | 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', | |
137 | 'only_matching': True, | |
138 | }, { | |
139 | 'url': 'https://tvp.info/49193823/teczowe-flagi-na-pomnikach-prokuratura-wszczela-postepowanie-wieszwiecej', | |
140 | 'only_matching': True, | |
141 | }, { | |
142 | 'url': 'https://www.tvpparlament.pl/retransmisje-vod/inne/wizyta-premiera-mateusza-morawieckiego-w-firmie-berotu-sp-z-oo/48857277', | |
143 | 'only_matching': True, | |
144 | }, { | |
145 | 'url': 'https://tvpworld.com/48583640/tescos-polish-business-bought-by-danish-chain-netto', | |
146 | 'only_matching': True, | |
147 | }] | |
148 | ||
149 | def _parse_vue_website_data(self, webpage, page_id): | |
150 | website_data = self._search_regex([ | |
151 | # website - regiony, tvp.info | |
152 | # directory - jp2.tvp.pl | |
153 | r'window\.__(?:website|directory)Data\s*=\s*({(?:.|\s)+?});', | |
154 | ], webpage, 'website data') | |
155 | if not website_data: | |
156 | return None | |
157 | return self._parse_json(website_data, page_id, transform_source=js_to_json) | |
158 | ||
159 | def _extract_vue_video(self, video_data, page_id=None): | |
160 | if isinstance(video_data, str): | |
161 | video_data = self._parse_json(video_data, page_id, transform_source=js_to_json) | |
162 | thumbnails = [] | |
163 | image = video_data.get('image') | |
164 | if image: | |
165 | for thumb in (image if isinstance(image, list) else [image]): | |
166 | thmb_url = str_or_none(thumb.get('url')) | |
167 | if thmb_url: | |
168 | thumbnails.append({ | |
169 | 'url': thmb_url, | |
170 | }) | |
171 | is_website = video_data.get('type') == 'website' | |
172 | if is_website: | |
173 | url = video_data['url'] | |
174 | else: | |
175 | url = 'tvp:' + str_or_none(video_data.get('_id') or page_id) | |
176 | return { | |
177 | '_type': 'url_transparent', | |
178 | 'id': str_or_none(video_data.get('_id') or page_id), | |
179 | 'url': url, | |
180 | 'ie_key': (TVPIE if is_website else TVPEmbedIE).ie_key(), | |
181 | 'title': str_or_none(video_data.get('title')), | |
182 | 'description': str_or_none(video_data.get('lead')), | |
183 | 'timestamp': int_or_none(video_data.get('release_date_long')), | |
184 | 'duration': int_or_none(video_data.get('duration')), | |
185 | 'thumbnails': thumbnails, | |
186 | } | |
187 | ||
188 | def _handle_vuejs_page(self, url, webpage, page_id): | |
189 | # vue client-side rendered sites (all regional pages + tvp.info) | |
190 | video_data = self._search_regex([ | |
191 | r'window\.__(?:news|video)Data\s*=\s*({(?:.|\s)+?})\s*;', | |
192 | ], webpage, 'video data', default=None) | |
193 | if video_data: | |
194 | return self._extract_vue_video(video_data, page_id=page_id) | |
195 | # paged playlists | |
196 | website_data = self._parse_vue_website_data(webpage, page_id) | |
197 | if website_data: | |
198 | entries = self._vuejs_entries(url, website_data, page_id) | |
199 | ||
200 | return { | |
201 | '_type': 'playlist', | |
202 | 'id': page_id, | |
203 | 'title': str_or_none(website_data.get('title')), | |
204 | 'description': str_or_none(website_data.get('lead')), | |
205 | 'entries': entries, | |
206 | } | |
207 | raise ExtractorError('Could not extract video/website data') | |
208 | ||
209 | def _vuejs_entries(self, url, website_data, page_id): | |
210 | ||
211 | def extract_videos(wd): | |
212 | if wd.get('latestVideo'): | |
213 | yield self._extract_vue_video(wd['latestVideo']) | |
214 | for video in wd.get('videos') or []: | |
215 | yield self._extract_vue_video(video) | |
216 | for video in wd.get('items') or []: | |
217 | yield self._extract_vue_video(video) | |
218 | ||
219 | yield from extract_videos(website_data) | |
220 | ||
221 | if website_data.get('items_total_count') > website_data.get('items_per_page'): | |
222 | for page in itertools.count(2): | |
223 | page_website_data = self._parse_vue_website_data( | |
224 | self._download_webpage(url, page_id, note='Downloading page #%d' % page, | |
225 | query={'page': page}), | |
226 | page_id) | |
227 | if not page_website_data.get('videos') and not page_website_data.get('items'): | |
228 | break | |
229 | yield from extract_videos(page_website_data) | |
230 | ||
231 | def _real_extract(self, url): | |
232 | page_id = self._match_id(url) | |
233 | webpage, urlh = self._download_webpage_handle(url, page_id) | |
234 | ||
235 | # The URL may redirect to a VOD | |
236 | # example: https://vod.tvp.pl/48463890/wadowickie-spotkania-z-janem-pawlem-ii | |
237 | for ie_cls in (TVPVODSeriesIE, TVPVODVideoIE): | |
238 | if ie_cls.suitable(urlh.url): | |
239 | return self.url_result(urlh.url, ie=ie_cls.ie_key(), video_id=page_id) | |
240 | ||
241 | if re.search( | |
242 | r'window\.__(?:video|news|website|directory)Data\s*=', | |
243 | webpage): | |
244 | return self._handle_vuejs_page(url, webpage, page_id) | |
245 | ||
246 | # classic server-side rendered sites | |
247 | video_id = self._search_regex([ | |
248 | r'<iframe[^>]+src="[^"]*?embed\.php\?(?:[^&]+&)*ID=(\d+)', | |
249 | r'<iframe[^>]+src="[^"]*?object_id=(\d+)', | |
250 | r"object_id\s*:\s*'(\d+)'", | |
251 | r'data-video-id="(\d+)"', | |
252 | ||
253 | # abc.tvp.pl - somehow there are more than one video IDs that seem to be the same video? | |
254 | # the first one is referenced to as "copyid", and seems to be unused by the website | |
255 | r'<script>\s*tvpabc\.video\.init\(\s*\d+,\s*(\d+)\s*\)\s*</script>', | |
256 | ], webpage, 'video id', default=page_id) | |
257 | return { | |
258 | '_type': 'url_transparent', | |
259 | 'url': 'tvp:' + video_id, | |
260 | 'description': self._og_search_description( | |
261 | webpage, default=None) or (self._html_search_meta( | |
262 | 'description', webpage, default=None) | |
263 | if '//s.tvp.pl/files/portal/v' in webpage else None), | |
264 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), | |
265 | 'ie_key': 'TVPEmbed', | |
266 | } | |
267 | ||
268 | ||
269 | class TVPStreamIE(InfoExtractor): | |
270 | IE_NAME = 'tvp:stream' | |
271 | _VALID_URL = r'(?:tvpstream:|https?://(?:tvpstream\.vod|stream)\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)' | |
272 | _TESTS = [{ | |
273 | 'url': 'https://stream.tvp.pl/?channel_id=56969941', | |
274 | 'only_matching': True, | |
275 | }, { | |
276 | # untestable as "video" id changes many times across a day | |
277 | 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', | |
278 | 'only_matching': True, | |
279 | }, { | |
280 | 'url': 'tvpstream:39821455', | |
281 | 'only_matching': True, | |
282 | }, { | |
283 | # the default stream when you provide no channel_id, most probably TVP Info | |
284 | 'url': 'tvpstream:', | |
285 | 'only_matching': True, | |
286 | }, { | |
287 | 'url': 'https://tvpstream.vod.tvp.pl/', | |
288 | 'only_matching': True, | |
289 | }] | |
290 | ||
291 | def _real_extract(self, url): | |
292 | channel_id = self._match_id(url) | |
293 | channel_url = self._proto_relative_url('//stream.tvp.pl/?channel_id=%s' % channel_id or 'default') | |
294 | webpage = self._download_webpage(channel_url, channel_id or 'default', 'Downloading channel webpage') | |
295 | channels = self._search_json( | |
296 | r'window\.__channels\s*=', webpage, 'channel list', channel_id, | |
297 | contains_pattern=r'\[\s*{(?s:.+)}\s*]') | |
298 | channel = traverse_obj(channels, (lambda _, v: channel_id == str(v['id'])), get_all=False) if channel_id else channels[0] | |
299 | audition = traverse_obj(channel, ('items', lambda _, v: v['is_live'] is True), get_all=False) | |
300 | return { | |
301 | '_type': 'url_transparent', | |
302 | 'id': channel_id or channel['id'], | |
303 | 'url': 'tvp:%s' % audition['video_id'], | |
304 | 'title': audition.get('title'), | |
305 | 'alt_title': channel.get('title'), | |
306 | 'is_live': True, | |
307 | 'ie_key': 'TVPEmbed', | |
308 | } | |
309 | ||
310 | ||
311 | class TVPEmbedIE(InfoExtractor): | |
312 | IE_NAME = 'tvp:embed' | |
313 | IE_DESC = 'Telewizja Polska' | |
314 | _GEO_BYPASS = False | |
315 | _VALID_URL = r'''(?x) | |
316 | (?: | |
317 | tvp: | |
318 | |https?:// | |
319 | (?:[^/]+\.)? | |
320 | (?:tvp(?:parlament)?\.pl|tvp\.info|tvpworld\.com|swipeto\.pl)/ | |
321 | (?:sess/ | |
322 | (?:tvplayer\.php\?.*?object_id | |
323 | |TVPlayer2/(?:embed|api)\.php\?.*[Ii][Dd]) | |
324 | |shared/details\.php\?.*?object_id) | |
325 | =) | |
326 | (?P<id>\d+) | |
327 | ''' | |
328 | _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL[4:]})'] | |
329 | ||
330 | _TESTS = [{ | |
331 | 'url': 'tvp:194536', | |
332 | 'info_dict': { | |
333 | 'id': '194536', | |
334 | 'ext': 'mp4', | |
335 | 'title': 'Czas honoru, odc. 13 – Władek', | |
336 | 'description': 'md5:76649d2014f65c99477be17f23a4dead', | |
337 | 'age_limit': 12, | |
338 | 'duration': 2652, | |
339 | 'series': 'Czas honoru', | |
340 | 'episode': 'Episode 13', | |
341 | 'episode_number': 13, | |
342 | 'season': 'sezon 1', | |
343 | 'thumbnail': r're:https://.+', | |
344 | }, | |
345 | }, { | |
346 | 'url': 'https://www.tvp.pl/sess/tvplayer.php?object_id=51247504&autoplay=false', | |
347 | 'info_dict': { | |
348 | 'id': '51247504', | |
349 | 'ext': 'mp4', | |
350 | 'title': 'Razmova 091220', | |
351 | 'duration': 876, | |
352 | 'age_limit': 0, | |
353 | 'thumbnail': r're:https://.+', | |
354 | }, | |
355 | }, { | |
356 | # TVPlayer2 embed URL | |
357 | 'url': 'https://tvp.info/sess/TVPlayer2/embed.php?ID=50595757', | |
358 | 'only_matching': True, | |
359 | }, { | |
360 | 'url': 'https://wiadomosci.tvp.pl/sess/TVPlayer2/api.php?id=51233452', | |
361 | 'only_matching': True, | |
362 | }, { | |
363 | # pulsembed on dziennik.pl | |
364 | 'url': 'https://www.tvp.pl/shared/details.php?copy_id=52205981&object_id=52204505&autoplay=false&is_muted=false&allowfullscreen=true&template=external-embed/video/iframe-video.html', | |
365 | 'only_matching': True, | |
366 | }] | |
367 | ||
368 | def _real_extract(self, url): | |
369 | video_id = self._match_id(url) | |
370 | ||
371 | # it could be anything that is a valid JS function name | |
372 | callback = random.choice(( | |
373 | 'jebac_pis', | |
374 | 'jebacpis', | |
375 | 'ziobro', | |
376 | 'sasin70', | |
377 | 'sasin_przejebal_70_milionow_PLN', | |
378 | 'tvp_is_a_state_propaganda_service', | |
379 | )) | |
380 | ||
381 | webpage = self._download_webpage( | |
382 | ('https://www.tvp.pl/sess/TVPlayer2/api.php?id=%s' | |
383 | + '&@method=getTvpConfig&@callback=%s') % (video_id, callback), video_id) | |
384 | ||
385 | # stripping JSONP padding | |
386 | datastr = webpage[15 + len(callback):-3] | |
387 | if datastr.startswith('null,'): | |
388 | error = self._parse_json(datastr[5:], video_id, fatal=False) | |
389 | error_desc = traverse_obj(error, (0, 'desc')) | |
390 | ||
391 | if error_desc == 'Obiekt wymaga płatności': | |
392 | raise ExtractorError('Video requires payment and log-in, but log-in is not implemented') | |
393 | ||
394 | raise ExtractorError(error_desc or 'unexpected JSON error') | |
395 | ||
396 | content = self._parse_json(datastr, video_id)['content'] | |
397 | info = content['info'] | |
398 | is_live = try_get(info, lambda x: x['isLive'], bool) | |
399 | ||
400 | if info.get('isGeoBlocked'): | |
401 | # actual country list is not provided, we just assume it's always available in PL | |
402 | self.raise_geo_restricted(countries=['PL']) | |
403 | ||
404 | formats = [] | |
405 | for file in content['files']: | |
406 | video_url = url_or_none(file.get('url')) | |
407 | if not video_url: | |
408 | continue | |
409 | ext = determine_ext(video_url, None) | |
410 | if ext == 'm3u8': | |
411 | formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False, live=is_live)) | |
412 | elif ext == 'mpd': | |
413 | if is_live: | |
414 | # doesn't work with either ffmpeg or native downloader | |
415 | continue | |
416 | formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False)) | |
417 | elif ext == 'f4m': | |
418 | formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False)) | |
419 | elif video_url.endswith('.ism/manifest'): | |
420 | formats.extend(self._extract_ism_formats(video_url, video_id, ism_id='mss', fatal=False)) | |
421 | else: | |
422 | formats.append({ | |
423 | 'format_id': 'direct', | |
424 | 'url': video_url, | |
425 | 'ext': ext or file.get('type'), | |
426 | 'fps': int_or_none(traverse_obj(file, ('quality', 'fps'))), | |
427 | 'tbr': int_or_none(traverse_obj(file, ('quality', 'bitrate')), scale=1000), | |
428 | 'width': int_or_none(traverse_obj(file, ('quality', 'width'))), | |
429 | 'height': int_or_none(traverse_obj(file, ('quality', 'height'))), | |
430 | }) | |
431 | ||
432 | title = dict_get(info, ('subtitle', 'title', 'seoTitle')) | |
433 | description = dict_get(info, ('description', 'seoDescription')) | |
434 | thumbnails = [] | |
435 | for thumb in content.get('posters') or (): | |
436 | thumb_url = thumb.get('src') | |
437 | if not thumb_url or '{width}' in thumb_url or '{height}' in thumb_url: | |
438 | continue | |
439 | thumbnails.append({ | |
440 | 'url': thumb.get('src'), | |
441 | 'width': thumb.get('width'), | |
442 | 'height': thumb.get('height'), | |
443 | }) | |
444 | age_limit = try_get(info, lambda x: x['ageGroup']['minAge'], int) | |
445 | if age_limit == 1: | |
446 | age_limit = 0 | |
447 | duration = try_get(info, lambda x: x['duration'], int) if not is_live else None | |
448 | ||
449 | subtitles = {} | |
450 | for sub in content.get('subtitles') or []: | |
451 | if not sub.get('url'): | |
452 | continue | |
453 | subtitles.setdefault(sub['lang'], []).append({ | |
454 | 'url': sub['url'], | |
455 | 'ext': sub.get('type'), | |
456 | }) | |
457 | ||
458 | info_dict = { | |
459 | 'id': video_id, | |
460 | 'title': title, | |
461 | 'description': description, | |
462 | 'thumbnails': thumbnails, | |
463 | 'age_limit': age_limit, | |
464 | 'is_live': is_live, | |
465 | 'duration': duration, | |
466 | 'formats': formats, | |
467 | 'subtitles': subtitles, | |
468 | } | |
469 | ||
470 | # vod.tvp.pl | |
471 | if info.get('vortalName') == 'vod': | |
472 | info_dict.update({ | |
473 | 'title': '%s, %s' % (info.get('title'), info.get('subtitle')), | |
474 | 'series': info.get('title'), | |
475 | 'season': info.get('season'), | |
476 | 'episode_number': info.get('episode'), | |
477 | }) | |
478 | ||
479 | return info_dict | |
480 | ||
481 | ||
482 | class TVPVODBaseIE(InfoExtractor): | |
483 | _API_BASE_URL = 'https://vod.tvp.pl/api/products' | |
484 | ||
485 | def _call_api(self, resource, video_id, query={}, **kwargs): | |
486 | is_valid = lambda x: 200 <= x < 300 | |
487 | document, urlh = self._download_json_handle( | |
488 | f'{self._API_BASE_URL}/{resource}', video_id, | |
489 | query={'lang': 'pl', 'platform': 'BROWSER', **query}, | |
490 | expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs) | |
491 | if is_valid(urlh.status): | |
492 | return document | |
493 | raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})') | |
494 | ||
495 | def _parse_video(self, video, with_url=True): | |
496 | info_dict = traverse_obj(video, { | |
497 | 'id': ('id', {str_or_none}), | |
498 | 'title': 'title', | |
499 | 'age_limit': ('rating', {int_or_none}), | |
500 | 'duration': ('duration', {int_or_none}), | |
501 | 'episode_number': ('number', {int_or_none}), | |
502 | 'series': ('season', 'serial', 'title', {str_or_none}), | |
503 | 'thumbnails': ('images', ..., ..., {'url': ('url', {url_or_none})}), | |
504 | }) | |
505 | info_dict['description'] = clean_html(dict_get(video, ('lead', 'description'))) | |
506 | if with_url: | |
507 | info_dict.update({ | |
508 | '_type': 'url', | |
509 | 'url': video['webUrl'], | |
510 | 'ie_key': TVPVODVideoIE.ie_key(), | |
511 | }) | |
512 | return info_dict | |
513 | ||
514 | ||
515 | class TVPVODVideoIE(TVPVODBaseIE): | |
516 | IE_NAME = 'tvp:vod' | |
517 | _VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)' | |
518 | ||
519 | _TESTS = [{ | |
520 | 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357', | |
521 | 'info_dict': { | |
522 | 'id': '311357', | |
523 | 'ext': 'mp4', | |
524 | 'title': 'Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24', | |
525 | 'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c', | |
526 | 'duration': 300, | |
527 | 'episode_number': 24, | |
528 | 'episode': 'Episode 24', | |
529 | 'age_limit': 0, | |
530 | 'series': 'Laboratorium alchemika', | |
531 | 'thumbnail': 're:https?://.+', | |
532 | }, | |
533 | 'params': {'skip_download': 'm3u8'}, | |
534 | }, { | |
535 | 'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667', | |
536 | 'info_dict': { | |
537 | 'id': '339667', | |
538 | 'ext': 'mp4', | |
539 | 'title': 'Ukraiński sługa narodu', | |
540 | 'description': 'md5:b7940c0a8e439b0c81653a986f544ef3', | |
541 | 'age_limit': 12, | |
542 | 'duration': 3051, | |
543 | 'thumbnail': 're:https?://.+', | |
544 | 'subtitles': 'count:2', | |
545 | }, | |
546 | 'params': {'skip_download': 'm3u8'}, | |
547 | }, { | |
548 | 'note': 'embed fails with "payment required"', | |
549 | 'url': 'https://vod.tvp.pl/seriale,18/polowanie-na-cmy-odcinki,390116/odcinek-7,S01E07,398869', | |
550 | 'info_dict': { | |
551 | 'id': '398869', | |
552 | 'ext': 'mp4', | |
553 | 'title': 'odc. 7', | |
554 | 'description': 'md5:dd2bb33f023dc5c2fbaddfbe4cb5dba0', | |
555 | 'duration': 2750, | |
556 | 'age_limit': 16, | |
557 | 'series': 'Polowanie na ćmy', | |
558 | 'episode_number': 7, | |
559 | 'episode': 'Episode 7', | |
560 | 'thumbnail': 're:https?://.+', | |
561 | }, | |
562 | 'params': {'skip_download': 'm3u8'}, | |
563 | }, { | |
564 | 'url': 'https://vod.tvp.pl/live,1/tvp-world,399731', | |
565 | 'info_dict': { | |
566 | 'id': '399731', | |
567 | 'ext': 'mp4', | |
568 | 'title': r're:TVP WORLD \d{4}-\d{2}-\d{2} \d{2}:\d{2}', | |
569 | 'live_status': 'is_live', | |
570 | 'thumbnail': 're:https?://.+', | |
571 | }, | |
572 | }] | |
573 | ||
574 | def _real_extract(self, url): | |
575 | category, video_id = self._match_valid_url(url).group('category', 'id') | |
576 | ||
577 | is_live = category == 'live,1' | |
578 | entity = 'lives' if is_live else 'vods' | |
579 | info_dict = self._parse_video(self._call_api(f'{entity}/{video_id}', video_id), with_url=False) | |
580 | ||
581 | playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'}) | |
582 | ||
583 | info_dict['formats'] = [] | |
584 | for manifest_url in traverse_obj(playlist, ('sources', 'HLS', ..., 'src')): | |
585 | info_dict['formats'].extend(self._extract_m3u8_formats(manifest_url, video_id, fatal=False)) | |
586 | for manifest_url in traverse_obj(playlist, ('sources', 'DASH', ..., 'src')): | |
587 | info_dict['formats'].extend(self._extract_mpd_formats(manifest_url, video_id, fatal=False)) | |
588 | ||
589 | info_dict['subtitles'] = {} | |
590 | for sub in playlist.get('subtitles') or []: | |
591 | info_dict['subtitles'].setdefault(sub.get('language') or 'und', []).append({ | |
592 | 'url': sub['url'], | |
593 | 'ext': 'ttml', | |
594 | }) | |
595 | ||
596 | info_dict['is_live'] = is_live | |
597 | ||
598 | return info_dict | |
599 | ||
600 | ||
601 | class TVPVODSeriesIE(TVPVODBaseIE): | |
602 | IE_NAME = 'tvp:vod:series' | |
603 | _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+-odcinki,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$' | |
604 | ||
605 | _TESTS = [{ | |
606 | 'url': 'https://vod.tvp.pl/seriale,18/ranczo-odcinki,316445', | |
607 | 'info_dict': { | |
608 | 'id': '316445', | |
609 | 'title': 'Ranczo', | |
610 | 'age_limit': 12, | |
611 | 'categories': ['seriale'], | |
612 | }, | |
613 | 'playlist_count': 130, | |
614 | }, { | |
615 | 'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514', | |
616 | 'only_matching': True, | |
617 | }, { | |
618 | 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338', | |
619 | 'only_matching': True, | |
620 | }] | |
621 | ||
622 | def _entries(self, seasons, playlist_id): | |
623 | for season in seasons: | |
624 | episodes = self._call_api( | |
625 | f'vods/serials/{playlist_id}/seasons/{season["id"]}/episodes', playlist_id, | |
626 | note=f'Downloading episode list for {season["title"]}') | |
627 | yield from map(self._parse_video, episodes) | |
628 | ||
629 | def _real_extract(self, url): | |
630 | playlist_id = self._match_id(url) | |
631 | metadata = self._call_api( | |
632 | f'vods/serials/{playlist_id}', playlist_id, | |
633 | note='Downloading serial metadata') | |
634 | seasons = self._call_api( | |
635 | f'vods/serials/{playlist_id}/seasons', playlist_id, | |
636 | note='Downloading season list') | |
637 | return self.playlist_result( | |
638 | self._entries(seasons, playlist_id), playlist_id, strip_or_none(metadata.get('title')), | |
639 | clean_html(traverse_obj(metadata, ('description', 'lead'), expected_type=strip_or_none)), | |
640 | categories=[traverse_obj(metadata, ('mainCategory', 'name'))], | |
641 | age_limit=int_or_none(metadata.get('rating')), | |
642 | ) |