]>
Commit | Line | Data |
---|---|---|
1 | import itertools | |
2 | import random | |
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | clean_html, | |
8 | determine_ext, | |
9 | dict_get, | |
10 | ExtractorError, | |
11 | int_or_none, | |
12 | js_to_json, | |
13 | str_or_none, | |
14 | strip_or_none, | |
15 | traverse_obj, | |
16 | try_get, | |
17 | url_or_none, | |
18 | ) | |
19 | ||
20 | ||
21 | class TVPIE(InfoExtractor): | |
22 | IE_NAME = 'tvp' | |
23 | IE_DESC = 'Telewizja Polska' | |
24 | _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)' | |
25 | ||
26 | _TESTS = [{ | |
27 | # TVPlayer 2 in js wrapper | |
28 | 'url': 'https://swipeto.pl/64095316/uliczny-foxtrot-wypozyczalnia-kaset-kto-pamieta-dvdvideo', | |
29 | 'info_dict': { | |
30 | 'id': '64095316', | |
31 | 'ext': 'mp4', | |
32 | 'title': 'Uliczny Foxtrot — Wypożyczalnia kaset. Kto pamięta DVD-Video?', | |
33 | 'age_limit': 0, | |
34 | 'duration': 374, | |
35 | 'thumbnail': r're:https://.+', | |
36 | }, | |
37 | 'expected_warnings': [ | |
38 | 'Failed to download ISM manifest: HTTP Error 404: Not Found', | |
39 | 'Failed to download m3u8 information: HTTP Error 404: Not Found', | |
40 | ], | |
41 | }, { | |
42 | # TVPlayer legacy | |
43 | 'url': 'https://www.tvp.pl/polska-press-video-uploader/wideo/62042351', | |
44 | 'info_dict': { | |
45 | 'id': '62042351', | |
46 | 'ext': 'mp4', | |
47 | 'title': 'Wideo', | |
48 | 'description': 'Wideo Kamera', | |
49 | 'duration': 24, | |
50 | 'age_limit': 0, | |
51 | 'thumbnail': r're:https://.+', | |
52 | }, | |
53 | }, { | |
54 | # TVPlayer 2 in iframe | |
55 | 'url': 'https://wiadomosci.tvp.pl/50725617/dzieci-na-sprzedaz-dla-homoseksualistow', | |
56 | 'info_dict': { | |
57 | 'id': '50725617', | |
58 | 'ext': 'mp4', | |
59 | 'title': 'Dzieci na sprzedaż dla homoseksualistów', | |
60 | 'description': 'md5:7d318eef04e55ddd9f87a8488ac7d590', | |
61 | 'age_limit': 12, | |
62 | 'duration': 259, | |
63 | 'thumbnail': r're:https://.+', | |
64 | }, | |
65 | }, { | |
66 | # TVPlayer 2 in client-side rendered website (regional; window.__newsData) | |
67 | 'url': 'https://warszawa.tvp.pl/25804446/studio-yayo', | |
68 | 'info_dict': { | |
69 | 'id': '25804446', | |
70 | 'ext': 'mp4', | |
71 | 'title': 'Studio Yayo', | |
72 | 'upload_date': '20160616', | |
73 | 'timestamp': 1466075700, | |
74 | 'age_limit': 0, | |
75 | 'duration': 20, | |
76 | 'thumbnail': r're:https://.+', | |
77 | }, | |
78 | 'skip': 'Geo-blocked outside PL', | |
79 | }, { | |
80 | # TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData) | |
81 | 'url': 'https://www.tvp.info/52880236/09042021-0800', | |
82 | 'info_dict': { | |
83 | 'id': '52880236', | |
84 | 'ext': 'mp4', | |
85 | 'title': '09.04.2021, 08:00', | |
86 | 'age_limit': 0, | |
87 | 'thumbnail': r're:https://.+', | |
88 | }, | |
89 | 'skip': 'Geo-blocked outside PL', | |
90 | }, { | |
91 | # client-side rendered (regional) program (playlist) page | |
92 | 'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia', | |
93 | 'info_dict': { | |
94 | 'id': '9660819', | |
95 | 'description': 'Od poniedziałku do piątku o 18:55', | |
96 | 'title': 'Rozmowa dnia', | |
97 | }, | |
98 | 'playlist_mincount': 1800, | |
99 | 'params': { | |
100 | 'skip_download': True, | |
101 | } | |
102 | }, { | |
103 | # ABC-specific video embeding | |
104 | # moved to https://bajkowakraina.tvp.pl/wideo/50981130,teleranek,51027049,zubr,51116450 | |
105 | 'url': 'https://abc.tvp.pl/48636269/zubry-odc-124', | |
106 | 'info_dict': { | |
107 | 'id': '48320456', | |
108 | 'ext': 'mp4', | |
109 | 'title': 'Teleranek, Żubr', | |
110 | }, | |
111 | 'skip': 'unavailable', | |
112 | }, { | |
113 | # yet another vue page | |
114 | 'url': 'https://jp2.tvp.pl/46925618/filmy', | |
115 | 'info_dict': { | |
116 | 'id': '46925618', | |
117 | 'title': 'Filmy', | |
118 | }, | |
119 | 'playlist_mincount': 19, | |
120 | }, { | |
121 | 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', | |
122 | 'only_matching': True, | |
123 | }, { | |
124 | 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', | |
125 | 'only_matching': True, | |
126 | }, { | |
127 | 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', | |
128 | 'only_matching': True, | |
129 | }, { | |
130 | 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', | |
131 | 'only_matching': True, | |
132 | }, { | |
133 | 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', | |
134 | 'only_matching': True, | |
135 | }, { | |
136 | 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', | |
137 | 'only_matching': True, | |
138 | }, { | |
139 | 'url': 'https://tvp.info/49193823/teczowe-flagi-na-pomnikach-prokuratura-wszczela-postepowanie-wieszwiecej', | |
140 | 'only_matching': True, | |
141 | }, { | |
142 | 'url': 'https://www.tvpparlament.pl/retransmisje-vod/inne/wizyta-premiera-mateusza-morawieckiego-w-firmie-berotu-sp-z-oo/48857277', | |
143 | 'only_matching': True, | |
144 | }, { | |
145 | 'url': 'https://tvpworld.com/48583640/tescos-polish-business-bought-by-danish-chain-netto', | |
146 | 'only_matching': True, | |
147 | }] | |
148 | ||
149 | def _parse_vue_website_data(self, webpage, page_id): | |
150 | website_data = self._search_regex([ | |
151 | # website - regiony, tvp.info | |
152 | # directory - jp2.tvp.pl | |
153 | r'window\.__(?:website|directory)Data\s*=\s*({(?:.|\s)+?});', | |
154 | ], webpage, 'website data') | |
155 | if not website_data: | |
156 | return None | |
157 | return self._parse_json(website_data, page_id, transform_source=js_to_json) | |
158 | ||
159 | def _extract_vue_video(self, video_data, page_id=None): | |
160 | if isinstance(video_data, str): | |
161 | video_data = self._parse_json(video_data, page_id, transform_source=js_to_json) | |
162 | thumbnails = [] | |
163 | image = video_data.get('image') | |
164 | if image: | |
165 | for thumb in (image if isinstance(image, list) else [image]): | |
166 | thmb_url = str_or_none(thumb.get('url')) | |
167 | if thmb_url: | |
168 | thumbnails.append({ | |
169 | 'url': thmb_url, | |
170 | }) | |
171 | is_website = video_data.get('type') == 'website' | |
172 | if is_website: | |
173 | url = video_data['url'] | |
174 | else: | |
175 | url = 'tvp:' + str_or_none(video_data.get('_id') or page_id) | |
176 | return { | |
177 | '_type': 'url_transparent', | |
178 | 'id': str_or_none(video_data.get('_id') or page_id), | |
179 | 'url': url, | |
180 | 'ie_key': (TVPIE if is_website else TVPEmbedIE).ie_key(), | |
181 | 'title': str_or_none(video_data.get('title')), | |
182 | 'description': str_or_none(video_data.get('lead')), | |
183 | 'timestamp': int_or_none(video_data.get('release_date_long')), | |
184 | 'duration': int_or_none(video_data.get('duration')), | |
185 | 'thumbnails': thumbnails, | |
186 | } | |
187 | ||
188 | def _handle_vuejs_page(self, url, webpage, page_id): | |
189 | # vue client-side rendered sites (all regional pages + tvp.info) | |
190 | video_data = self._search_regex([ | |
191 | r'window\.__(?:news|video)Data\s*=\s*({(?:.|\s)+?})\s*;', | |
192 | ], webpage, 'video data', default=None) | |
193 | if video_data: | |
194 | return self._extract_vue_video(video_data, page_id=page_id) | |
195 | # paged playlists | |
196 | website_data = self._parse_vue_website_data(webpage, page_id) | |
197 | if website_data: | |
198 | entries = self._vuejs_entries(url, website_data, page_id) | |
199 | ||
200 | return { | |
201 | '_type': 'playlist', | |
202 | 'id': page_id, | |
203 | 'title': str_or_none(website_data.get('title')), | |
204 | 'description': str_or_none(website_data.get('lead')), | |
205 | 'entries': entries, | |
206 | } | |
207 | raise ExtractorError('Could not extract video/website data') | |
208 | ||
209 | def _vuejs_entries(self, url, website_data, page_id): | |
210 | ||
211 | def extract_videos(wd): | |
212 | if wd.get('latestVideo'): | |
213 | yield self._extract_vue_video(wd['latestVideo']) | |
214 | for video in wd.get('videos') or []: | |
215 | yield self._extract_vue_video(video) | |
216 | for video in wd.get('items') or []: | |
217 | yield self._extract_vue_video(video) | |
218 | ||
219 | yield from extract_videos(website_data) | |
220 | ||
221 | if website_data.get('items_total_count') > website_data.get('items_per_page'): | |
222 | for page in itertools.count(2): | |
223 | page_website_data = self._parse_vue_website_data( | |
224 | self._download_webpage(url, page_id, note='Downloading page #%d' % page, | |
225 | query={'page': page}), | |
226 | page_id) | |
227 | if not page_website_data.get('videos') and not page_website_data.get('items'): | |
228 | break | |
229 | yield from extract_videos(page_website_data) | |
230 | ||
231 | def _real_extract(self, url): | |
232 | page_id = self._match_id(url) | |
233 | webpage, urlh = self._download_webpage_handle(url, page_id) | |
234 | ||
235 | # The URL may redirect to a VOD | |
236 | # example: https://vod.tvp.pl/48463890/wadowickie-spotkania-z-janem-pawlem-ii | |
237 | for ie_cls in (TVPVODSeriesIE, TVPVODVideoIE): | |
238 | if ie_cls.suitable(urlh.url): | |
239 | return self.url_result(urlh.url, ie=ie_cls.ie_key(), video_id=page_id) | |
240 | ||
241 | if re.search( | |
242 | r'window\.__(?:video|news|website|directory)Data\s*=', | |
243 | webpage): | |
244 | return self._handle_vuejs_page(url, webpage, page_id) | |
245 | ||
246 | # classic server-side rendered sites | |
247 | video_id = self._search_regex([ | |
248 | r'<iframe[^>]+src="[^"]*?embed\.php\?(?:[^&]+&)*ID=(\d+)', | |
249 | r'<iframe[^>]+src="[^"]*?object_id=(\d+)', | |
250 | r"object_id\s*:\s*'(\d+)'", | |
251 | r'data-video-id="(\d+)"', | |
252 | ||
253 | # abc.tvp.pl - somehow there are more than one video IDs that seem to be the same video? | |
254 | # the first one is referenced to as "copyid", and seems to be unused by the website | |
255 | r'<script>\s*tvpabc\.video\.init\(\s*\d+,\s*(\d+)\s*\)\s*</script>', | |
256 | ], webpage, 'video id', default=page_id) | |
257 | return { | |
258 | '_type': 'url_transparent', | |
259 | 'url': 'tvp:' + video_id, | |
260 | 'description': self._og_search_description( | |
261 | webpage, default=None) or (self._html_search_meta( | |
262 | 'description', webpage, default=None) | |
263 | if '//s.tvp.pl/files/portal/v' in webpage else None), | |
264 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), | |
265 | 'ie_key': 'TVPEmbed', | |
266 | } | |
267 | ||
268 | ||
269 | class TVPStreamIE(InfoExtractor): | |
270 | IE_NAME = 'tvp:stream' | |
271 | _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)' | |
272 | _TESTS = [{ | |
273 | # untestable as "video" id changes many times across a day | |
274 | 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', | |
275 | 'only_matching': True, | |
276 | }, { | |
277 | 'url': 'tvpstream:39821455', | |
278 | 'only_matching': True, | |
279 | }, { | |
280 | # the default stream when you provide no channel_id, most probably TVP Info | |
281 | 'url': 'tvpstream:', | |
282 | 'only_matching': True, | |
283 | }, { | |
284 | 'url': 'https://tvpstream.vod.tvp.pl/', | |
285 | 'only_matching': True, | |
286 | }] | |
287 | ||
288 | _PLAYER_BOX_RE = r'<div\s[^>]*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)' | |
289 | _BUTTON_RE = r'<div\s[^>]*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')' | |
290 | ||
291 | def _real_extract(self, url): | |
292 | channel_id = self._match_id(url) | |
293 | channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default') | |
294 | webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage') | |
295 | if not channel_id: | |
296 | channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel', | |
297 | webpage, 'default channel id') | |
298 | video_id = self._search_regex(self._PLAYER_BOX_RE % 'video', | |
299 | webpage, 'video id') | |
300 | audition_title, station_name = self._search_regex( | |
301 | self._BUTTON_RE % (re.escape(channel_id)), webpage, | |
302 | 'audition title and station name', | |
303 | group=(1, 2)) | |
304 | return { | |
305 | '_type': 'url_transparent', | |
306 | 'id': channel_id, | |
307 | 'url': 'tvp:%s' % video_id, | |
308 | 'title': audition_title, | |
309 | 'alt_title': station_name, | |
310 | 'is_live': True, | |
311 | 'ie_key': 'TVPEmbed', | |
312 | } | |
313 | ||
314 | ||
315 | class TVPEmbedIE(InfoExtractor): | |
316 | IE_NAME = 'tvp:embed' | |
317 | IE_DESC = 'Telewizja Polska' | |
318 | _GEO_BYPASS = False | |
319 | _VALID_URL = r'''(?x) | |
320 | (?: | |
321 | tvp: | |
322 | |https?:// | |
323 | (?:[^/]+\.)? | |
324 | (?:tvp(?:parlament)?\.pl|tvp\.info|tvpworld\.com|swipeto\.pl)/ | |
325 | (?:sess/ | |
326 | (?:tvplayer\.php\?.*?object_id | |
327 | |TVPlayer2/(?:embed|api)\.php\?.*[Ii][Dd]) | |
328 | |shared/details\.php\?.*?object_id) | |
329 | =) | |
330 | (?P<id>\d+) | |
331 | ''' | |
332 | _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL[4:]})'] | |
333 | ||
334 | _TESTS = [{ | |
335 | 'url': 'tvp:194536', | |
336 | 'info_dict': { | |
337 | 'id': '194536', | |
338 | 'ext': 'mp4', | |
339 | 'title': 'Czas honoru, odc. 13 – Władek', | |
340 | 'description': 'md5:76649d2014f65c99477be17f23a4dead', | |
341 | 'age_limit': 12, | |
342 | 'duration': 2652, | |
343 | 'series': 'Czas honoru', | |
344 | 'episode': 'Episode 13', | |
345 | 'episode_number': 13, | |
346 | 'season': 'sezon 1', | |
347 | 'thumbnail': r're:https://.+', | |
348 | }, | |
349 | }, { | |
350 | 'url': 'https://www.tvp.pl/sess/tvplayer.php?object_id=51247504&autoplay=false', | |
351 | 'info_dict': { | |
352 | 'id': '51247504', | |
353 | 'ext': 'mp4', | |
354 | 'title': 'Razmova 091220', | |
355 | 'duration': 876, | |
356 | 'age_limit': 0, | |
357 | 'thumbnail': r're:https://.+', | |
358 | }, | |
359 | }, { | |
360 | # TVPlayer2 embed URL | |
361 | 'url': 'https://tvp.info/sess/TVPlayer2/embed.php?ID=50595757', | |
362 | 'only_matching': True, | |
363 | }, { | |
364 | 'url': 'https://wiadomosci.tvp.pl/sess/TVPlayer2/api.php?id=51233452', | |
365 | 'only_matching': True, | |
366 | }, { | |
367 | # pulsembed on dziennik.pl | |
368 | 'url': 'https://www.tvp.pl/shared/details.php?copy_id=52205981&object_id=52204505&autoplay=false&is_muted=false&allowfullscreen=true&template=external-embed/video/iframe-video.html', | |
369 | 'only_matching': True, | |
370 | }] | |
371 | ||
372 | def _real_extract(self, url): | |
373 | video_id = self._match_id(url) | |
374 | ||
375 | # it could be anything that is a valid JS function name | |
376 | callback = random.choice(( | |
377 | 'jebac_pis', | |
378 | 'jebacpis', | |
379 | 'ziobro', | |
380 | 'sasin70', | |
381 | 'sasin_przejebal_70_milionow_PLN', | |
382 | 'tvp_is_a_state_propaganda_service', | |
383 | )) | |
384 | ||
385 | webpage = self._download_webpage( | |
386 | ('https://www.tvp.pl/sess/TVPlayer2/api.php?id=%s' | |
387 | + '&@method=getTvpConfig&@callback=%s') % (video_id, callback), video_id) | |
388 | ||
389 | # stripping JSONP padding | |
390 | datastr = webpage[15 + len(callback):-3] | |
391 | if datastr.startswith('null,'): | |
392 | error = self._parse_json(datastr[5:], video_id, fatal=False) | |
393 | error_desc = traverse_obj(error, (0, 'desc')) | |
394 | ||
395 | if error_desc == 'Obiekt wymaga płatności': | |
396 | raise ExtractorError('Video requires payment and log-in, but log-in is not implemented') | |
397 | ||
398 | raise ExtractorError(error_desc or 'unexpected JSON error') | |
399 | ||
400 | content = self._parse_json(datastr, video_id)['content'] | |
401 | info = content['info'] | |
402 | is_live = try_get(info, lambda x: x['isLive'], bool) | |
403 | ||
404 | if info.get('isGeoBlocked'): | |
405 | # actual country list is not provided, we just assume it's always available in PL | |
406 | self.raise_geo_restricted(countries=['PL']) | |
407 | ||
408 | formats = [] | |
409 | for file in content['files']: | |
410 | video_url = url_or_none(file.get('url')) | |
411 | if not video_url: | |
412 | continue | |
413 | ext = determine_ext(video_url, None) | |
414 | if ext == 'm3u8': | |
415 | formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False, live=is_live)) | |
416 | elif ext == 'mpd': | |
417 | if is_live: | |
418 | # doesn't work with either ffmpeg or native downloader | |
419 | continue | |
420 | formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False)) | |
421 | elif ext == 'f4m': | |
422 | formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False)) | |
423 | elif video_url.endswith('.ism/manifest'): | |
424 | formats.extend(self._extract_ism_formats(video_url, video_id, ism_id='mss', fatal=False)) | |
425 | else: | |
426 | formats.append({ | |
427 | 'format_id': 'direct', | |
428 | 'url': video_url, | |
429 | 'ext': ext or file.get('type'), | |
430 | 'fps': int_or_none(traverse_obj(file, ('quality', 'fps'))), | |
431 | 'tbr': int_or_none(traverse_obj(file, ('quality', 'bitrate')), scale=1000), | |
432 | 'width': int_or_none(traverse_obj(file, ('quality', 'width'))), | |
433 | 'height': int_or_none(traverse_obj(file, ('quality', 'height'))), | |
434 | }) | |
435 | ||
436 | title = dict_get(info, ('subtitle', 'title', 'seoTitle')) | |
437 | description = dict_get(info, ('description', 'seoDescription')) | |
438 | thumbnails = [] | |
439 | for thumb in content.get('posters') or (): | |
440 | thumb_url = thumb.get('src') | |
441 | if not thumb_url or '{width}' in thumb_url or '{height}' in thumb_url: | |
442 | continue | |
443 | thumbnails.append({ | |
444 | 'url': thumb.get('src'), | |
445 | 'width': thumb.get('width'), | |
446 | 'height': thumb.get('height'), | |
447 | }) | |
448 | age_limit = try_get(info, lambda x: x['ageGroup']['minAge'], int) | |
449 | if age_limit == 1: | |
450 | age_limit = 0 | |
451 | duration = try_get(info, lambda x: x['duration'], int) if not is_live else None | |
452 | ||
453 | subtitles = {} | |
454 | for sub in content.get('subtitles') or []: | |
455 | if not sub.get('url'): | |
456 | continue | |
457 | subtitles.setdefault(sub['lang'], []).append({ | |
458 | 'url': sub['url'], | |
459 | 'ext': sub.get('type'), | |
460 | }) | |
461 | ||
462 | info_dict = { | |
463 | 'id': video_id, | |
464 | 'title': title, | |
465 | 'description': description, | |
466 | 'thumbnails': thumbnails, | |
467 | 'age_limit': age_limit, | |
468 | 'is_live': is_live, | |
469 | 'duration': duration, | |
470 | 'formats': formats, | |
471 | 'subtitles': subtitles, | |
472 | } | |
473 | ||
474 | # vod.tvp.pl | |
475 | if info.get('vortalName') == 'vod': | |
476 | info_dict.update({ | |
477 | 'title': '%s, %s' % (info.get('title'), info.get('subtitle')), | |
478 | 'series': info.get('title'), | |
479 | 'season': info.get('season'), | |
480 | 'episode_number': info.get('episode'), | |
481 | }) | |
482 | ||
483 | return info_dict | |
484 | ||
485 | ||
486 | class TVPVODBaseIE(InfoExtractor): | |
487 | _API_BASE_URL = 'https://vod.tvp.pl/api/products' | |
488 | ||
489 | def _call_api(self, resource, video_id, **kwargs): | |
490 | return self._download_json( | |
491 | f'{self._API_BASE_URL}/{resource}', video_id, | |
492 | query={'lang': 'pl', 'platform': 'BROWSER'}, **kwargs) | |
493 | ||
494 | def _parse_video(self, video): | |
495 | return { | |
496 | '_type': 'url', | |
497 | 'url': 'tvp:' + video['externalUid'], | |
498 | 'ie_key': TVPEmbedIE.ie_key(), | |
499 | 'title': video.get('title'), | |
500 | 'description': traverse_obj(video, ('lead', 'description')), | |
501 | 'age_limit': int_or_none(video.get('rating')), | |
502 | 'duration': int_or_none(video.get('duration')), | |
503 | } | |
504 | ||
505 | ||
506 | class TVPVODVideoIE(TVPVODBaseIE): | |
507 | IE_NAME = 'tvp:vod' | |
508 | _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$' | |
509 | ||
510 | _TESTS = [{ | |
511 | 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357', | |
512 | 'info_dict': { | |
513 | 'id': '60468609', | |
514 | 'ext': 'mp4', | |
515 | 'title': 'Laboratorium alchemika, Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24', | |
516 | 'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c', | |
517 | 'duration': 300, | |
518 | 'episode_number': 24, | |
519 | 'episode': 'Episode 24', | |
520 | 'age_limit': 0, | |
521 | 'series': 'Laboratorium alchemika', | |
522 | 'thumbnail': 're:https://.+', | |
523 | }, | |
524 | }, { | |
525 | 'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667', | |
526 | 'info_dict': { | |
527 | 'id': '51640077', | |
528 | 'ext': 'mp4', | |
529 | 'title': 'Ukraiński sługa narodu, Ukraiński sługa narodu', | |
530 | 'series': 'Ukraiński sługa narodu', | |
531 | 'description': 'md5:b7940c0a8e439b0c81653a986f544ef3', | |
532 | 'age_limit': 12, | |
533 | 'episode': 'Episode 0', | |
534 | 'episode_number': 0, | |
535 | 'duration': 3051, | |
536 | 'thumbnail': 're:https://.+', | |
537 | }, | |
538 | }] | |
539 | ||
540 | def _real_extract(self, url): | |
541 | video_id = self._match_id(url) | |
542 | ||
543 | return self._parse_video(self._call_api(f'vods/{video_id}', video_id)) | |
544 | ||
545 | ||
546 | class TVPVODSeriesIE(TVPVODBaseIE): | |
547 | IE_NAME = 'tvp:vod:series' | |
548 | _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+-odcinki,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$' | |
549 | ||
550 | _TESTS = [{ | |
551 | 'url': 'https://vod.tvp.pl/seriale,18/ranczo-odcinki,316445', | |
552 | 'info_dict': { | |
553 | 'id': '316445', | |
554 | 'title': 'Ranczo', | |
555 | 'age_limit': 12, | |
556 | 'categories': ['seriale'], | |
557 | }, | |
558 | 'playlist_count': 129, | |
559 | }, { | |
560 | 'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514', | |
561 | 'only_matching': True, | |
562 | }, { | |
563 | 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338', | |
564 | 'only_matching': True, | |
565 | }] | |
566 | ||
567 | def _entries(self, seasons, playlist_id): | |
568 | for season in seasons: | |
569 | episodes = self._call_api( | |
570 | f'vods/serials/{playlist_id}/seasons/{season["id"]}/episodes', playlist_id, | |
571 | note=f'Downloading episode list for {season["title"]}') | |
572 | yield from map(self._parse_video, episodes) | |
573 | ||
574 | def _real_extract(self, url): | |
575 | playlist_id = self._match_id(url) | |
576 | metadata = self._call_api( | |
577 | f'vods/serials/{playlist_id}', playlist_id, | |
578 | note='Downloading serial metadata') | |
579 | seasons = self._call_api( | |
580 | f'vods/serials/{playlist_id}/seasons', playlist_id, | |
581 | note='Downloading season list') | |
582 | return self.playlist_result( | |
583 | self._entries(seasons, playlist_id), playlist_id, strip_or_none(metadata.get('title')), | |
584 | clean_html(traverse_obj(metadata, ('description', 'lead'), expected_type=strip_or_none)), | |
585 | categories=[traverse_obj(metadata, ('mainCategory', 'name'))], | |
586 | age_limit=int_or_none(metadata.get('rating')), | |
587 | ) |