]>
Commit | Line | Data |
---|---|---|
d9308378 | 1 | import itertools |
c15de6ff | 2 | import random |
29f400b9 TF |
3 | import re |
4 | ||
5137ebac | 5 | from .common import InfoExtractor |
6e3c2047 | 6 | from ..utils import ( |
d9308378 | 7 | determine_ext, |
c15de6ff | 8 | dict_get, |
6e3c2047 | 9 | ExtractorError, |
c15de6ff LL |
10 | int_or_none, |
11 | js_to_json, | |
d9308378 | 12 | orderedSet, |
c15de6ff LL |
13 | str_or_none, |
14 | try_get, | |
6e3c2047 | 15 | ) |
c3a3028f | 16 | |
5137ebac | 17 | |
6f8cb242 S |
18 | class TVPIE(InfoExtractor): |
19 | IE_NAME = 'tvp' | |
20 | IE_DESC = 'Telewizja Polska' | |
c15de6ff | 21 | _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|polandin\.com)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)' |
fb4b030a PH |
22 | |
23 | _TESTS = [{ | |
c15de6ff | 24 | # TVPlayer 2 in js wrapper |
4e599194 | 25 | 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536', |
fb4b030a PH |
26 | 'info_dict': { |
27 | 'id': '194536', | |
28 | 'ext': 'mp4', | |
3c964737 S |
29 | 'title': 'Czas honoru, odc. 13 – Władek', |
30 | 'description': 'md5:437f48b93558370b031740546b696e24', | |
c15de6ff | 31 | 'age_limit': 12, |
fb4b030a PH |
32 | }, |
33 | }, { | |
c15de6ff | 34 | # TVPlayer legacy |
fb4b030a PH |
35 | 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', |
36 | 'info_dict': { | |
37 | 'id': '17916176', | |
38 | 'ext': 'mp4', | |
39 | 'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', | |
fdd0b8f8 RA |
40 | 'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', |
41 | }, | |
42 | }, { | |
c15de6ff LL |
43 | # TVPlayer 2 in iframe |
44 | 'url': 'https://wiadomosci.tvp.pl/50725617/dzieci-na-sprzedaz-dla-homoseksualistow', | |
45 | 'info_dict': { | |
46 | 'id': '50725617', | |
47 | 'ext': 'mp4', | |
48 | 'title': 'Dzieci na sprzedaż dla homoseksualistów', | |
49 | 'description': 'md5:7d318eef04e55ddd9f87a8488ac7d590', | |
50 | 'age_limit': 12, | |
51 | }, | |
52 | }, { | |
53 | # TVPlayer 2 in client-side rendered website (regional; window.__newsData) | |
54 | 'url': 'https://warszawa.tvp.pl/25804446/studio-yayo', | |
55 | 'info_dict': { | |
56 | 'id': '25804446', | |
57 | 'ext': 'mp4', | |
58 | 'title': 'Studio Yayo', | |
59 | 'upload_date': '20160616', | |
60 | 'timestamp': 1466075700, | |
61 | } | |
62 | }, { | |
63 | # TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData) | |
64 | 'url': 'https://www.tvp.info/52880236/09042021-0800', | |
65 | 'info_dict': { | |
66 | 'id': '52880236', | |
67 | 'ext': 'mp4', | |
68 | 'title': '09.04.2021, 08:00', | |
69 | }, | |
70 | }, { | |
71 | # client-side rendered (regional) program (playlist) page | |
72 | 'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia', | |
73 | 'info_dict': { | |
74 | 'id': '9660819', | |
75 | 'description': 'Od poniedziałku do piątku o 18:55', | |
76 | 'title': 'Rozmowa dnia', | |
77 | }, | |
78 | 'playlist_mincount': 1800, | |
79 | 'params': { | |
80 | 'skip_download': True, | |
81 | } | |
82 | }, { | |
83 | # ABC-specific video embeding | |
84 | # moved to https://bajkowakraina.tvp.pl/wideo/50981130,teleranek,51027049,zubr,51116450 | |
85 | 'url': 'https://abc.tvp.pl/48636269/zubry-odc-124', | |
fdd0b8f8 | 86 | 'info_dict': { |
c15de6ff | 87 | 'id': '48320456', |
fdd0b8f8 | 88 | 'ext': 'mp4', |
c15de6ff LL |
89 | 'title': 'Teleranek, Żubr', |
90 | }, | |
91 | 'skip': 'unavailable', | |
92 | }, { | |
93 | # yet another vue page | |
94 | 'url': 'https://jp2.tvp.pl/46925618/filmy', | |
95 | 'info_dict': { | |
96 | 'id': '46925618', | |
97 | 'title': 'Filmy', | |
fb4b030a | 98 | }, |
c15de6ff | 99 | 'playlist_mincount': 19, |
fb4b030a PH |
100 | }, { |
101 | 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', | |
6f8cb242 S |
102 | 'only_matching': True, |
103 | }, { | |
104 | 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', | |
105 | 'only_matching': True, | |
106 | }, { | |
107 | 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', | |
108 | 'only_matching': True, | |
109 | }, { | |
110 | 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', | |
111 | 'only_matching': True, | |
112 | }, { | |
113 | 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', | |
114 | 'only_matching': True, | |
115 | }, { | |
116 | 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', | |
117 | 'only_matching': True, | |
c15de6ff LL |
118 | }, { |
119 | 'url': 'https://tvp.info/49193823/teczowe-flagi-na-pomnikach-prokuratura-wszczela-postepowanie-wieszwiecej', | |
120 | 'only_matching': True, | |
121 | }, { | |
122 | 'url': 'https://www.tvpparlament.pl/retransmisje-vod/inne/wizyta-premiera-mateusza-morawieckiego-w-firmie-berotu-sp-z-oo/48857277', | |
123 | 'only_matching': True, | |
124 | }, { | |
125 | 'url': 'https://polandin.com/47942651/pln-10-billion-in-subsidies-transferred-to-companies-pm', | |
126 | 'only_matching': True, | |
fb4b030a | 127 | }] |
5137ebac | 128 | |
c15de6ff LL |
129 | def _parse_vue_website_data(self, webpage, page_id): |
130 | website_data = self._search_regex([ | |
131 | # website - regiony, tvp.info | |
132 | # directory - jp2.tvp.pl | |
133 | r'window\.__(?:website|directory)Data\s*=\s*({(?:.|\s)+?});', | |
134 | ], webpage, 'website data') | |
135 | if not website_data: | |
136 | return None | |
137 | return self._parse_json(website_data, page_id, transform_source=js_to_json) | |
138 | ||
139 | def _extract_vue_video(self, video_data, page_id=None): | |
140 | if isinstance(video_data, str): | |
141 | video_data = self._parse_json(video_data, page_id, transform_source=js_to_json) | |
142 | thumbnails = [] | |
143 | image = video_data.get('image') | |
144 | if image: | |
145 | for thumb in (image if isinstance(image, list) else [image]): | |
146 | thmb_url = str_or_none(thumb.get('url')) | |
147 | if thmb_url: | |
148 | thumbnails.append({ | |
149 | 'url': thmb_url, | |
150 | }) | |
151 | is_website = video_data.get('type') == 'website' | |
152 | if is_website: | |
153 | url = video_data['url'] | |
154 | fucked_up_url_parts = re.match(r'https?://vod\.tvp\.pl/(\d+)/([^/?#]+)', url) | |
155 | if fucked_up_url_parts: | |
156 | url = f'https://vod.tvp.pl/website/{fucked_up_url_parts.group(2)},{fucked_up_url_parts.group(1)}' | |
157 | else: | |
158 | url = 'tvp:' + str_or_none(video_data.get('_id') or page_id) | |
159 | return { | |
160 | '_type': 'url_transparent', | |
161 | 'id': str_or_none(video_data.get('_id') or page_id), | |
162 | 'url': url, | |
163 | 'ie_key': 'TVPEmbed' if not is_website else 'TVPWebsite', | |
164 | 'title': str_or_none(video_data.get('title')), | |
165 | 'description': str_or_none(video_data.get('lead')), | |
166 | 'timestamp': int_or_none(video_data.get('release_date_long')), | |
167 | 'duration': int_or_none(video_data.get('duration')), | |
168 | 'thumbnails': thumbnails, | |
169 | } | |
170 | ||
171 | def _handle_vuejs_page(self, url, webpage, page_id): | |
172 | # vue client-side rendered sites (all regional pages + tvp.info) | |
173 | video_data = self._search_regex([ | |
174 | r'window\.__(?:news|video)Data\s*=\s*({(?:.|\s)+?})\s*;', | |
175 | ], webpage, 'video data', default=None) | |
176 | if video_data: | |
177 | return self._extract_vue_video(video_data, page_id=page_id) | |
178 | # paged playlists | |
179 | website_data = self._parse_vue_website_data(webpage, page_id) | |
180 | if website_data: | |
181 | entries = self._vuejs_entries(url, website_data, page_id) | |
182 | ||
183 | return { | |
184 | '_type': 'playlist', | |
185 | 'id': page_id, | |
186 | 'title': str_or_none(website_data.get('title')), | |
187 | 'description': str_or_none(website_data.get('lead')), | |
188 | 'entries': entries, | |
189 | } | |
190 | raise ExtractorError('Could not extract video/website data') | |
191 | ||
192 | def _vuejs_entries(self, url, website_data, page_id): | |
193 | ||
194 | def extract_videos(wd): | |
195 | if wd.get('latestVideo'): | |
196 | yield self._extract_vue_video(wd['latestVideo']) | |
197 | for video in wd.get('videos') or []: | |
198 | yield self._extract_vue_video(video) | |
199 | for video in wd.get('items') or []: | |
200 | yield self._extract_vue_video(video) | |
201 | ||
202 | yield from extract_videos(website_data) | |
203 | ||
204 | if website_data.get('items_total_count') > website_data.get('items_per_page'): | |
205 | for page in itertools.count(2): | |
206 | page_website_data = self._parse_vue_website_data( | |
207 | self._download_webpage(url, page_id, note='Downloading page #%d' % page, | |
208 | query={'page': page}), | |
209 | page_id) | |
210 | if not page_website_data.get('videos') and not page_website_data.get('items'): | |
211 | break | |
212 | yield from extract_videos(page_website_data) | |
213 | ||
fdd0b8f8 RA |
214 | def _real_extract(self, url): |
215 | page_id = self._match_id(url) | |
c15de6ff LL |
216 | webpage, urlh = self._download_webpage_handle(url, page_id) |
217 | ||
218 | # The URL may redirect to a VOD | |
219 | # example: https://vod.tvp.pl/48463890/wadowickie-spotkania-z-janem-pawlem-ii | |
220 | if TVPWebsiteIE.suitable(urlh.url): | |
221 | return self.url_result(urlh.url, ie=TVPWebsiteIE.ie_key(), video_id=page_id) | |
222 | ||
223 | if re.search( | |
224 | r'window\.__(?:video|news|website|directory)Data\s*=', | |
225 | webpage): | |
226 | return self._handle_vuejs_page(url, webpage, page_id) | |
227 | ||
228 | # classic server-side rendered sites | |
fdd0b8f8 | 229 | video_id = self._search_regex([ |
c15de6ff | 230 | r'<iframe[^>]+src="[^"]*?embed\.php\?(?:[^&]+&)*ID=(\d+)', |
fdd0b8f8 | 231 | r'<iframe[^>]+src="[^"]*?object_id=(\d+)', |
3d8d44c7 | 232 | r"object_id\s*:\s*'(\d+)'", |
c15de6ff LL |
233 | r'data-video-id="(\d+)"', |
234 | ||
235 | # abc.tvp.pl - somehow there are more than one video IDs that seem to be the same video? | |
236 | # the first one is referenced to as "copyid", and seems to be unused by the website | |
237 | r'<script>\s*tvpabc\.video\.init\(\s*\d+,\s*(\d+)\s*\)\s*</script>', | |
238 | ], webpage, 'video id', default=page_id) | |
fdd0b8f8 RA |
239 | return { |
240 | '_type': 'url_transparent', | |
241 | 'url': 'tvp:' + video_id, | |
3c964737 | 242 | 'description': self._og_search_description( |
c15de6ff LL |
243 | webpage, default=None) or (self._html_search_meta( |
244 | 'description', webpage, default=None) | |
245 | if '//s.tvp.pl/files/portal/v' in webpage else None), | |
3c964737 | 246 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), |
fdd0b8f8 RA |
247 | 'ie_key': 'TVPEmbed', |
248 | } | |
249 | ||
250 | ||
ebfab36f LL |
251 | class TVPStreamIE(InfoExtractor): |
252 | IE_NAME = 'tvp:stream' | |
253 | _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)' | |
254 | _TESTS = [{ | |
255 | # untestable as "video" id changes many times across a day | |
256 | 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', | |
257 | 'only_matching': True, | |
258 | }, { | |
259 | 'url': 'tvpstream:39821455', | |
260 | 'only_matching': True, | |
261 | }, { | |
262 | # the default stream when you provide no channel_id, most probably TVP Info | |
263 | 'url': 'tvpstream:', | |
264 | 'only_matching': True, | |
265 | }, { | |
266 | 'url': 'https://tvpstream.vod.tvp.pl/', | |
267 | 'only_matching': True, | |
268 | }] | |
269 | ||
270 | _PLAYER_BOX_RE = r'<div\s[^>]*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)' | |
271 | _BUTTON_RE = r'<div\s[^>]*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')' | |
272 | ||
273 | def _real_extract(self, url): | |
274 | channel_id = self._match_id(url) | |
275 | channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default') | |
276 | webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage') | |
277 | if not channel_id: | |
278 | channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel', | |
279 | webpage, 'default channel id') | |
280 | video_id = self._search_regex(self._PLAYER_BOX_RE % 'video', | |
281 | webpage, 'video id') | |
282 | audition_title, station_name = self._search_regex( | |
283 | self._BUTTON_RE % (re.escape(channel_id)), webpage, | |
284 | 'audition title and station name', | |
285 | group=(1, 2)) | |
286 | return { | |
287 | '_type': 'url_transparent', | |
288 | 'id': channel_id, | |
289 | 'url': 'tvp:%s' % video_id, | |
290 | 'title': audition_title, | |
291 | 'alt_title': station_name, | |
292 | 'is_live': True, | |
293 | 'ie_key': 'TVPEmbed', | |
294 | } | |
295 | ||
296 | ||
fdd0b8f8 RA |
297 | class TVPEmbedIE(InfoExtractor): |
298 | IE_NAME = 'tvp:embed' | |
299 | IE_DESC = 'Telewizja Polska' | |
56bb56f3 LL |
300 | _VALID_URL = r'''(?x) |
301 | (?: | |
302 | tvp: | |
303 | |https?:// | |
304 | (?:[^/]+\.)? | |
305 | (?:tvp(?:parlament)?\.pl|tvp\.info|polandin\.com)/ | |
306 | (?:sess/ | |
307 | (?:tvplayer\.php\?.*?object_id | |
308 | |TVPlayer2/(?:embed|api)\.php\?.*[Ii][Dd]) | |
309 | |shared/details\.php\?.*?object_id) | |
310 | =) | |
311 | (?P<id>\d+) | |
312 | ''' | |
fdd0b8f8 RA |
313 | |
314 | _TESTS = [{ | |
3c964737 | 315 | 'url': 'tvp:194536', |
3c964737 S |
316 | 'info_dict': { |
317 | 'id': '194536', | |
318 | 'ext': 'mp4', | |
319 | 'title': 'Czas honoru, odc. 13 – Władek', | |
56bb56f3 LL |
320 | 'description': 'md5:76649d2014f65c99477be17f23a4dead', |
321 | 'age_limit': 12, | |
3c964737 S |
322 | }, |
323 | }, { | |
56bb56f3 | 324 | 'url': 'https://www.tvp.pl/sess/tvplayer.php?object_id=51247504&autoplay=false', |
fdd0b8f8 | 325 | 'info_dict': { |
56bb56f3 | 326 | 'id': '51247504', |
fdd0b8f8 | 327 | 'ext': 'mp4', |
56bb56f3 | 328 | 'title': 'Razmova 091220', |
fdd0b8f8 RA |
329 | }, |
330 | }, { | |
56bb56f3 LL |
331 | # TVPlayer2 embed URL |
332 | 'url': 'https://tvp.info/sess/TVPlayer2/embed.php?ID=50595757', | |
333 | 'only_matching': True, | |
334 | }, { | |
335 | 'url': 'https://wiadomosci.tvp.pl/sess/TVPlayer2/api.php?id=51233452', | |
336 | 'only_matching': True, | |
337 | }, { | |
338 | # pulsembed on dziennik.pl | |
339 | 'url': 'https://www.tvp.pl/shared/details.php?copy_id=52205981&object_id=52204505&autoplay=false&is_muted=false&allowfullscreen=true&template=external-embed/video/iframe-video.html', | |
fdd0b8f8 RA |
340 | 'only_matching': True, |
341 | }] | |
342 | ||
56bb56f3 LL |
343 | @staticmethod |
344 | def _extract_urls(webpage, **kw): | |
345 | return [m.group('embed') for m in re.finditer( | |
346 | r'(?x)<iframe[^>]+?src=(["\'])(?P<embed>%s)' % TVPEmbedIE._VALID_URL[4:], | |
347 | webpage)] | |
348 | ||
5137ebac | 349 | def _real_extract(self, url): |
fb4b030a | 350 | video_id = self._match_id(url) |
030aa5d9 | 351 | |
56bb56f3 LL |
352 | # it could be anything that is a valid JS function name |
353 | callback = random.choice(( | |
354 | 'jebac_pis', | |
355 | 'jebacpis', | |
356 | 'ziobro', | |
357 | 'sasin70', | |
358 | 'sasin_przejebal_70_milionow_PLN', | |
359 | 'tvp_is_a_state_propaganda_service', | |
360 | )) | |
361 | ||
29f400b9 | 362 | webpage = self._download_webpage( |
56bb56f3 LL |
363 | ('https://www.tvp.pl/sess/TVPlayer2/api.php?id=%s' |
364 | + '&@method=getTvpConfig&@callback=%s') % (video_id, callback), video_id) | |
365 | ||
366 | # stripping JSONP padding | |
367 | datastr = webpage[15 + len(callback):-3] | |
368 | if datastr.startswith('null,'): | |
369 | error = self._parse_json(datastr[5:], video_id) | |
370 | raise ExtractorError(error[0]['desc']) | |
371 | ||
372 | content = self._parse_json(datastr, video_id)['content'] | |
373 | info = content['info'] | |
374 | is_live = try_get(info, lambda x: x['isLive'], bool) | |
29f400b9 | 375 | |
6e3c2047 | 376 | formats = [] |
56bb56f3 LL |
377 | for file in content['files']: |
378 | video_url = file.get('url') | |
379 | if not video_url: | |
380 | continue | |
381 | if video_url.endswith('.m3u8'): | |
382 | formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False, live=is_live)) | |
383 | elif video_url.endswith('.mpd'): | |
384 | if is_live: | |
385 | # doesn't work with either ffmpeg or native downloader | |
386 | continue | |
387 | formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False)) | |
388 | elif video_url.endswith('.f4m'): | |
389 | formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False)) | |
390 | elif video_url.endswith('.ism/manifest'): | |
391 | formats.extend(self._extract_ism_formats(video_url, video_id, ism_id='mss', fatal=False)) | |
392 | else: | |
393 | # mp4, wmv or something | |
394 | quality = file.get('quality', {}) | |
395 | formats.append({ | |
396 | 'format_id': 'direct', | |
397 | 'url': video_url, | |
398 | 'ext': determine_ext(video_url, file['type']), | |
399 | 'fps': int_or_none(quality.get('fps')), | |
400 | 'tbr': int_or_none(quality.get('bitrate')), | |
401 | 'width': int_or_none(quality.get('width')), | |
402 | 'height': int_or_none(quality.get('height')), | |
403 | }) | |
fb4b030a PH |
404 | |
405 | self._sort_formats(formats) | |
406 | ||
56bb56f3 LL |
407 | title = dict_get(info, ('subtitle', 'title', 'seoTitle')) |
408 | description = dict_get(info, ('description', 'seoDescription')) | |
409 | thumbnails = [] | |
410 | for thumb in content.get('posters') or (): | |
411 | thumb_url = thumb.get('src') | |
412 | if not thumb_url or '{width}' in thumb_url or '{height}' in thumb_url: | |
413 | continue | |
414 | thumbnails.append({ | |
415 | 'url': thumb.get('src'), | |
416 | 'width': thumb.get('width'), | |
417 | 'height': thumb.get('height'), | |
418 | }) | |
419 | age_limit = try_get(info, lambda x: x['ageGroup']['minAge'], int) | |
420 | if age_limit == 1: | |
421 | age_limit = 0 | |
422 | duration = try_get(info, lambda x: x['duration'], int) if not is_live else None | |
423 | ||
424 | subtitles = {} | |
425 | for sub in content.get('subtitles') or []: | |
426 | if not sub.get('url'): | |
427 | continue | |
428 | subtitles.setdefault(sub['lang'], []).append({ | |
429 | 'url': sub['url'], | |
430 | 'ext': sub.get('type'), | |
431 | }) | |
432 | ||
433 | info_dict = { | |
fb4b030a PH |
434 | 'id': video_id, |
435 | 'title': title, | |
56bb56f3 LL |
436 | 'description': description, |
437 | 'thumbnails': thumbnails, | |
438 | 'age_limit': age_limit, | |
439 | 'is_live': is_live, | |
440 | 'duration': duration, | |
fb4b030a | 441 | 'formats': formats, |
56bb56f3 | 442 | 'subtitles': subtitles, |
fb4b030a | 443 | } |
6ce2c678 | 444 | |
56bb56f3 LL |
445 | # vod.tvp.pl |
446 | if info.get('vortalName') == 'vod': | |
447 | info_dict.update({ | |
448 | 'title': '%s, %s' % (info.get('title'), info.get('subtitle')), | |
449 | 'series': info.get('title'), | |
450 | 'season': info.get('season'), | |
451 | 'episode_number': info.get('episode'), | |
452 | }) | |
453 | ||
454 | return info_dict | |
455 | ||
6ce2c678 | 456 | |
388cfbd3 | 457 | class TVPWebsiteIE(InfoExtractor): |
6f8cb242 | 458 | IE_NAME = 'tvp:series' |
388cfbd3 | 459 | _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)' |
6ce2c678 | 460 | |
fb4b030a | 461 | _TESTS = [{ |
388cfbd3 | 462 | # series |
c15de6ff | 463 | 'url': 'https://vod.tvp.pl/website/wspaniale-stulecie,17069012/video', |
fb4b030a | 464 | 'info_dict': { |
c15de6ff | 465 | 'id': '17069012', |
fb4b030a | 466 | }, |
c15de6ff | 467 | 'playlist_count': 312, |
388cfbd3 S |
468 | }, { |
469 | # film | |
c15de6ff | 470 | 'url': 'https://vod.tvp.pl/website/krzysztof-krawczyk-cale-moje-zycie,51374466', |
388cfbd3 | 471 | 'info_dict': { |
c15de6ff | 472 | 'id': '51374509', |
388cfbd3 | 473 | 'ext': 'mp4', |
c15de6ff LL |
474 | 'title': 'Krzysztof Krawczyk – całe moje życie, Krzysztof Krawczyk – całe moje życie', |
475 | 'description': 'md5:2e80823f00f5fc263555482f76f8fa42', | |
476 | 'age_limit': 12, | |
388cfbd3 S |
477 | }, |
478 | 'params': { | |
479 | 'skip_download': True, | |
480 | }, | |
481 | 'add_ie': ['TVPEmbed'], | |
482 | }, { | |
483 | 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312', | |
484 | 'only_matching': True, | |
fb4b030a | 485 | }] |
6ce2c678 | 486 | |
388cfbd3 S |
487 | def _entries(self, display_id, playlist_id): |
488 | url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id) | |
d9308378 S |
489 | for page_num in itertools.count(1): |
490 | page = self._download_webpage( | |
491 | url, display_id, 'Downloading page %d' % page_num, | |
492 | query={'page': page_num}) | |
fb4b030a | 493 | |
d9308378 S |
494 | video_ids = orderedSet(re.findall( |
495 | r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id, | |
496 | page)) | |
497 | ||
498 | if not video_ids: | |
499 | break | |
500 | ||
501 | for video_id in video_ids: | |
502 | yield self.url_result( | |
503 | 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(), | |
504 | video_id=video_id) | |
505 | ||
506 | def _real_extract(self, url): | |
5ad28e7f | 507 | mobj = self._match_valid_url(url) |
d9308378 | 508 | display_id, playlist_id = mobj.group('display_id', 'id') |
388cfbd3 S |
509 | return self.playlist_result( |
510 | self._entries(display_id, playlist_id), playlist_id) |