]>
Commit | Line | Data |
---|---|---|
6f8cb242 | 1 | # coding: utf-8 |
24144e3b | 2 | from __future__ import unicode_literals |
5137ebac | 3 | |
d9308378 | 4 | import itertools |
c15de6ff | 5 | import random |
29f400b9 TF |
6 | import re |
7 | ||
5137ebac | 8 | from .common import InfoExtractor |
6e3c2047 | 9 | from ..utils import ( |
d9308378 | 10 | determine_ext, |
c15de6ff | 11 | dict_get, |
6e3c2047 | 12 | ExtractorError, |
c15de6ff LL |
13 | int_or_none, |
14 | js_to_json, | |
d9308378 | 15 | orderedSet, |
c15de6ff LL |
16 | str_or_none, |
17 | try_get, | |
6e3c2047 | 18 | ) |
c3a3028f | 19 | |
5137ebac | 20 | |
6f8cb242 S |
21 | class TVPIE(InfoExtractor): |
22 | IE_NAME = 'tvp' | |
23 | IE_DESC = 'Telewizja Polska' | |
c15de6ff | 24 | _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|polandin\.com)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)' |
fb4b030a PH |
25 | |
26 | _TESTS = [{ | |
c15de6ff | 27 | # TVPlayer 2 in js wrapper |
4e599194 | 28 | 'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536', |
fb4b030a PH |
29 | 'info_dict': { |
30 | 'id': '194536', | |
31 | 'ext': 'mp4', | |
3c964737 S |
32 | 'title': 'Czas honoru, odc. 13 – Władek', |
33 | 'description': 'md5:437f48b93558370b031740546b696e24', | |
c15de6ff | 34 | 'age_limit': 12, |
fb4b030a PH |
35 | }, |
36 | }, { | |
c15de6ff | 37 | # TVPlayer legacy |
fb4b030a PH |
38 | 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', |
39 | 'info_dict': { | |
40 | 'id': '17916176', | |
41 | 'ext': 'mp4', | |
42 | 'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', | |
fdd0b8f8 RA |
43 | 'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', |
44 | }, | |
45 | }, { | |
c15de6ff LL |
46 | # TVPlayer 2 in iframe |
47 | 'url': 'https://wiadomosci.tvp.pl/50725617/dzieci-na-sprzedaz-dla-homoseksualistow', | |
48 | 'info_dict': { | |
49 | 'id': '50725617', | |
50 | 'ext': 'mp4', | |
51 | 'title': 'Dzieci na sprzedaż dla homoseksualistów', | |
52 | 'description': 'md5:7d318eef04e55ddd9f87a8488ac7d590', | |
53 | 'age_limit': 12, | |
54 | }, | |
55 | }, { | |
56 | # TVPlayer 2 in client-side rendered website (regional; window.__newsData) | |
57 | 'url': 'https://warszawa.tvp.pl/25804446/studio-yayo', | |
58 | 'info_dict': { | |
59 | 'id': '25804446', | |
60 | 'ext': 'mp4', | |
61 | 'title': 'Studio Yayo', | |
62 | 'upload_date': '20160616', | |
63 | 'timestamp': 1466075700, | |
64 | } | |
65 | }, { | |
66 | # TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData) | |
67 | 'url': 'https://www.tvp.info/52880236/09042021-0800', | |
68 | 'info_dict': { | |
69 | 'id': '52880236', | |
70 | 'ext': 'mp4', | |
71 | 'title': '09.04.2021, 08:00', | |
72 | }, | |
73 | }, { | |
74 | # client-side rendered (regional) program (playlist) page | |
75 | 'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia', | |
76 | 'info_dict': { | |
77 | 'id': '9660819', | |
78 | 'description': 'Od poniedziałku do piątku o 18:55', | |
79 | 'title': 'Rozmowa dnia', | |
80 | }, | |
81 | 'playlist_mincount': 1800, | |
82 | 'params': { | |
83 | 'skip_download': True, | |
84 | } | |
85 | }, { | |
86 | # ABC-specific video embeding | |
87 | # moved to https://bajkowakraina.tvp.pl/wideo/50981130,teleranek,51027049,zubr,51116450 | |
88 | 'url': 'https://abc.tvp.pl/48636269/zubry-odc-124', | |
fdd0b8f8 | 89 | 'info_dict': { |
c15de6ff | 90 | 'id': '48320456', |
fdd0b8f8 | 91 | 'ext': 'mp4', |
c15de6ff LL |
92 | 'title': 'Teleranek, Żubr', |
93 | }, | |
94 | 'skip': 'unavailable', | |
95 | }, { | |
96 | # yet another vue page | |
97 | 'url': 'https://jp2.tvp.pl/46925618/filmy', | |
98 | 'info_dict': { | |
99 | 'id': '46925618', | |
100 | 'title': 'Filmy', | |
fb4b030a | 101 | }, |
c15de6ff | 102 | 'playlist_mincount': 19, |
fb4b030a PH |
103 | }, { |
104 | 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', | |
6f8cb242 S |
105 | 'only_matching': True, |
106 | }, { | |
107 | 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', | |
108 | 'only_matching': True, | |
109 | }, { | |
110 | 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', | |
111 | 'only_matching': True, | |
112 | }, { | |
113 | 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', | |
114 | 'only_matching': True, | |
115 | }, { | |
116 | 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', | |
117 | 'only_matching': True, | |
118 | }, { | |
119 | 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', | |
120 | 'only_matching': True, | |
c15de6ff LL |
121 | }, { |
122 | 'url': 'https://tvp.info/49193823/teczowe-flagi-na-pomnikach-prokuratura-wszczela-postepowanie-wieszwiecej', | |
123 | 'only_matching': True, | |
124 | }, { | |
125 | 'url': 'https://www.tvpparlament.pl/retransmisje-vod/inne/wizyta-premiera-mateusza-morawieckiego-w-firmie-berotu-sp-z-oo/48857277', | |
126 | 'only_matching': True, | |
127 | }, { | |
128 | 'url': 'https://polandin.com/47942651/pln-10-billion-in-subsidies-transferred-to-companies-pm', | |
129 | 'only_matching': True, | |
fb4b030a | 130 | }] |
5137ebac | 131 | |
c15de6ff LL |
132 | def _parse_vue_website_data(self, webpage, page_id): |
133 | website_data = self._search_regex([ | |
134 | # website - regiony, tvp.info | |
135 | # directory - jp2.tvp.pl | |
136 | r'window\.__(?:website|directory)Data\s*=\s*({(?:.|\s)+?});', | |
137 | ], webpage, 'website data') | |
138 | if not website_data: | |
139 | return None | |
140 | return self._parse_json(website_data, page_id, transform_source=js_to_json) | |
141 | ||
142 | def _extract_vue_video(self, video_data, page_id=None): | |
143 | if isinstance(video_data, str): | |
144 | video_data = self._parse_json(video_data, page_id, transform_source=js_to_json) | |
145 | thumbnails = [] | |
146 | image = video_data.get('image') | |
147 | if image: | |
148 | for thumb in (image if isinstance(image, list) else [image]): | |
149 | thmb_url = str_or_none(thumb.get('url')) | |
150 | if thmb_url: | |
151 | thumbnails.append({ | |
152 | 'url': thmb_url, | |
153 | }) | |
154 | is_website = video_data.get('type') == 'website' | |
155 | if is_website: | |
156 | url = video_data['url'] | |
157 | fucked_up_url_parts = re.match(r'https?://vod\.tvp\.pl/(\d+)/([^/?#]+)', url) | |
158 | if fucked_up_url_parts: | |
159 | url = f'https://vod.tvp.pl/website/{fucked_up_url_parts.group(2)},{fucked_up_url_parts.group(1)}' | |
160 | else: | |
161 | url = 'tvp:' + str_or_none(video_data.get('_id') or page_id) | |
162 | return { | |
163 | '_type': 'url_transparent', | |
164 | 'id': str_or_none(video_data.get('_id') or page_id), | |
165 | 'url': url, | |
166 | 'ie_key': 'TVPEmbed' if not is_website else 'TVPWebsite', | |
167 | 'title': str_or_none(video_data.get('title')), | |
168 | 'description': str_or_none(video_data.get('lead')), | |
169 | 'timestamp': int_or_none(video_data.get('release_date_long')), | |
170 | 'duration': int_or_none(video_data.get('duration')), | |
171 | 'thumbnails': thumbnails, | |
172 | } | |
173 | ||
174 | def _handle_vuejs_page(self, url, webpage, page_id): | |
175 | # vue client-side rendered sites (all regional pages + tvp.info) | |
176 | video_data = self._search_regex([ | |
177 | r'window\.__(?:news|video)Data\s*=\s*({(?:.|\s)+?})\s*;', | |
178 | ], webpage, 'video data', default=None) | |
179 | if video_data: | |
180 | return self._extract_vue_video(video_data, page_id=page_id) | |
181 | # paged playlists | |
182 | website_data = self._parse_vue_website_data(webpage, page_id) | |
183 | if website_data: | |
184 | entries = self._vuejs_entries(url, website_data, page_id) | |
185 | ||
186 | return { | |
187 | '_type': 'playlist', | |
188 | 'id': page_id, | |
189 | 'title': str_or_none(website_data.get('title')), | |
190 | 'description': str_or_none(website_data.get('lead')), | |
191 | 'entries': entries, | |
192 | } | |
193 | raise ExtractorError('Could not extract video/website data') | |
194 | ||
195 | def _vuejs_entries(self, url, website_data, page_id): | |
196 | ||
197 | def extract_videos(wd): | |
198 | if wd.get('latestVideo'): | |
199 | yield self._extract_vue_video(wd['latestVideo']) | |
200 | for video in wd.get('videos') or []: | |
201 | yield self._extract_vue_video(video) | |
202 | for video in wd.get('items') or []: | |
203 | yield self._extract_vue_video(video) | |
204 | ||
205 | yield from extract_videos(website_data) | |
206 | ||
207 | if website_data.get('items_total_count') > website_data.get('items_per_page'): | |
208 | for page in itertools.count(2): | |
209 | page_website_data = self._parse_vue_website_data( | |
210 | self._download_webpage(url, page_id, note='Downloading page #%d' % page, | |
211 | query={'page': page}), | |
212 | page_id) | |
213 | if not page_website_data.get('videos') and not page_website_data.get('items'): | |
214 | break | |
215 | yield from extract_videos(page_website_data) | |
216 | ||
fdd0b8f8 RA |
217 | def _real_extract(self, url): |
218 | page_id = self._match_id(url) | |
c15de6ff LL |
219 | webpage, urlh = self._download_webpage_handle(url, page_id) |
220 | ||
221 | # The URL may redirect to a VOD | |
222 | # example: https://vod.tvp.pl/48463890/wadowickie-spotkania-z-janem-pawlem-ii | |
223 | if TVPWebsiteIE.suitable(urlh.url): | |
224 | return self.url_result(urlh.url, ie=TVPWebsiteIE.ie_key(), video_id=page_id) | |
225 | ||
226 | if re.search( | |
227 | r'window\.__(?:video|news|website|directory)Data\s*=', | |
228 | webpage): | |
229 | return self._handle_vuejs_page(url, webpage, page_id) | |
230 | ||
231 | # classic server-side rendered sites | |
fdd0b8f8 | 232 | video_id = self._search_regex([ |
c15de6ff | 233 | r'<iframe[^>]+src="[^"]*?embed\.php\?(?:[^&]+&)*ID=(\d+)', |
fdd0b8f8 | 234 | r'<iframe[^>]+src="[^"]*?object_id=(\d+)', |
3d8d44c7 | 235 | r"object_id\s*:\s*'(\d+)'", |
c15de6ff LL |
236 | r'data-video-id="(\d+)"', |
237 | ||
238 | # abc.tvp.pl - somehow there are more than one video IDs that seem to be the same video? | |
239 | # the first one is referenced to as "copyid", and seems to be unused by the website | |
240 | r'<script>\s*tvpabc\.video\.init\(\s*\d+,\s*(\d+)\s*\)\s*</script>', | |
241 | ], webpage, 'video id', default=page_id) | |
fdd0b8f8 RA |
242 | return { |
243 | '_type': 'url_transparent', | |
244 | 'url': 'tvp:' + video_id, | |
3c964737 | 245 | 'description': self._og_search_description( |
c15de6ff LL |
246 | webpage, default=None) or (self._html_search_meta( |
247 | 'description', webpage, default=None) | |
248 | if '//s.tvp.pl/files/portal/v' in webpage else None), | |
3c964737 | 249 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), |
fdd0b8f8 RA |
250 | 'ie_key': 'TVPEmbed', |
251 | } | |
252 | ||
253 | ||
ebfab36f LL |
254 | class TVPStreamIE(InfoExtractor): |
255 | IE_NAME = 'tvp:stream' | |
256 | _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)' | |
257 | _TESTS = [{ | |
258 | # untestable as "video" id changes many times across a day | |
259 | 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', | |
260 | 'only_matching': True, | |
261 | }, { | |
262 | 'url': 'tvpstream:39821455', | |
263 | 'only_matching': True, | |
264 | }, { | |
265 | # the default stream when you provide no channel_id, most probably TVP Info | |
266 | 'url': 'tvpstream:', | |
267 | 'only_matching': True, | |
268 | }, { | |
269 | 'url': 'https://tvpstream.vod.tvp.pl/', | |
270 | 'only_matching': True, | |
271 | }] | |
272 | ||
273 | _PLAYER_BOX_RE = r'<div\s[^>]*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)' | |
274 | _BUTTON_RE = r'<div\s[^>]*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')' | |
275 | ||
276 | def _real_extract(self, url): | |
277 | channel_id = self._match_id(url) | |
278 | channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default') | |
279 | webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage') | |
280 | if not channel_id: | |
281 | channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel', | |
282 | webpage, 'default channel id') | |
283 | video_id = self._search_regex(self._PLAYER_BOX_RE % 'video', | |
284 | webpage, 'video id') | |
285 | audition_title, station_name = self._search_regex( | |
286 | self._BUTTON_RE % (re.escape(channel_id)), webpage, | |
287 | 'audition title and station name', | |
288 | group=(1, 2)) | |
289 | return { | |
290 | '_type': 'url_transparent', | |
291 | 'id': channel_id, | |
292 | 'url': 'tvp:%s' % video_id, | |
293 | 'title': audition_title, | |
294 | 'alt_title': station_name, | |
295 | 'is_live': True, | |
296 | 'ie_key': 'TVPEmbed', | |
297 | } | |
298 | ||
299 | ||
fdd0b8f8 RA |
300 | class TVPEmbedIE(InfoExtractor): |
301 | IE_NAME = 'tvp:embed' | |
302 | IE_DESC = 'Telewizja Polska' | |
56bb56f3 LL |
303 | _VALID_URL = r'''(?x) |
304 | (?: | |
305 | tvp: | |
306 | |https?:// | |
307 | (?:[^/]+\.)? | |
308 | (?:tvp(?:parlament)?\.pl|tvp\.info|polandin\.com)/ | |
309 | (?:sess/ | |
310 | (?:tvplayer\.php\?.*?object_id | |
311 | |TVPlayer2/(?:embed|api)\.php\?.*[Ii][Dd]) | |
312 | |shared/details\.php\?.*?object_id) | |
313 | =) | |
314 | (?P<id>\d+) | |
315 | ''' | |
fdd0b8f8 RA |
316 | |
317 | _TESTS = [{ | |
3c964737 | 318 | 'url': 'tvp:194536', |
3c964737 S |
319 | 'info_dict': { |
320 | 'id': '194536', | |
321 | 'ext': 'mp4', | |
322 | 'title': 'Czas honoru, odc. 13 – Władek', | |
56bb56f3 LL |
323 | 'description': 'md5:76649d2014f65c99477be17f23a4dead', |
324 | 'age_limit': 12, | |
3c964737 S |
325 | }, |
326 | }, { | |
56bb56f3 | 327 | 'url': 'https://www.tvp.pl/sess/tvplayer.php?object_id=51247504&autoplay=false', |
fdd0b8f8 | 328 | 'info_dict': { |
56bb56f3 | 329 | 'id': '51247504', |
fdd0b8f8 | 330 | 'ext': 'mp4', |
56bb56f3 | 331 | 'title': 'Razmova 091220', |
fdd0b8f8 RA |
332 | }, |
333 | }, { | |
56bb56f3 LL |
334 | # TVPlayer2 embed URL |
335 | 'url': 'https://tvp.info/sess/TVPlayer2/embed.php?ID=50595757', | |
336 | 'only_matching': True, | |
337 | }, { | |
338 | 'url': 'https://wiadomosci.tvp.pl/sess/TVPlayer2/api.php?id=51233452', | |
339 | 'only_matching': True, | |
340 | }, { | |
341 | # pulsembed on dziennik.pl | |
342 | 'url': 'https://www.tvp.pl/shared/details.php?copy_id=52205981&object_id=52204505&autoplay=false&is_muted=false&allowfullscreen=true&template=external-embed/video/iframe-video.html', | |
fdd0b8f8 RA |
343 | 'only_matching': True, |
344 | }] | |
345 | ||
56bb56f3 LL |
346 | @staticmethod |
347 | def _extract_urls(webpage, **kw): | |
348 | return [m.group('embed') for m in re.finditer( | |
349 | r'(?x)<iframe[^>]+?src=(["\'])(?P<embed>%s)' % TVPEmbedIE._VALID_URL[4:], | |
350 | webpage)] | |
351 | ||
5137ebac | 352 | def _real_extract(self, url): |
fb4b030a | 353 | video_id = self._match_id(url) |
030aa5d9 | 354 | |
56bb56f3 LL |
355 | # it could be anything that is a valid JS function name |
356 | callback = random.choice(( | |
357 | 'jebac_pis', | |
358 | 'jebacpis', | |
359 | 'ziobro', | |
360 | 'sasin70', | |
361 | 'sasin_przejebal_70_milionow_PLN', | |
362 | 'tvp_is_a_state_propaganda_service', | |
363 | )) | |
364 | ||
29f400b9 | 365 | webpage = self._download_webpage( |
56bb56f3 LL |
366 | ('https://www.tvp.pl/sess/TVPlayer2/api.php?id=%s' |
367 | + '&@method=getTvpConfig&@callback=%s') % (video_id, callback), video_id) | |
368 | ||
369 | # stripping JSONP padding | |
370 | datastr = webpage[15 + len(callback):-3] | |
371 | if datastr.startswith('null,'): | |
372 | error = self._parse_json(datastr[5:], video_id) | |
373 | raise ExtractorError(error[0]['desc']) | |
374 | ||
375 | content = self._parse_json(datastr, video_id)['content'] | |
376 | info = content['info'] | |
377 | is_live = try_get(info, lambda x: x['isLive'], bool) | |
29f400b9 | 378 | |
6e3c2047 | 379 | formats = [] |
56bb56f3 LL |
380 | for file in content['files']: |
381 | video_url = file.get('url') | |
382 | if not video_url: | |
383 | continue | |
384 | if video_url.endswith('.m3u8'): | |
385 | formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False, live=is_live)) | |
386 | elif video_url.endswith('.mpd'): | |
387 | if is_live: | |
388 | # doesn't work with either ffmpeg or native downloader | |
389 | continue | |
390 | formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False)) | |
391 | elif video_url.endswith('.f4m'): | |
392 | formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False)) | |
393 | elif video_url.endswith('.ism/manifest'): | |
394 | formats.extend(self._extract_ism_formats(video_url, video_id, ism_id='mss', fatal=False)) | |
395 | else: | |
396 | # mp4, wmv or something | |
397 | quality = file.get('quality', {}) | |
398 | formats.append({ | |
399 | 'format_id': 'direct', | |
400 | 'url': video_url, | |
401 | 'ext': determine_ext(video_url, file['type']), | |
402 | 'fps': int_or_none(quality.get('fps')), | |
403 | 'tbr': int_or_none(quality.get('bitrate')), | |
404 | 'width': int_or_none(quality.get('width')), | |
405 | 'height': int_or_none(quality.get('height')), | |
406 | }) | |
fb4b030a PH |
407 | |
408 | self._sort_formats(formats) | |
409 | ||
56bb56f3 LL |
410 | title = dict_get(info, ('subtitle', 'title', 'seoTitle')) |
411 | description = dict_get(info, ('description', 'seoDescription')) | |
412 | thumbnails = [] | |
413 | for thumb in content.get('posters') or (): | |
414 | thumb_url = thumb.get('src') | |
415 | if not thumb_url or '{width}' in thumb_url or '{height}' in thumb_url: | |
416 | continue | |
417 | thumbnails.append({ | |
418 | 'url': thumb.get('src'), | |
419 | 'width': thumb.get('width'), | |
420 | 'height': thumb.get('height'), | |
421 | }) | |
422 | age_limit = try_get(info, lambda x: x['ageGroup']['minAge'], int) | |
423 | if age_limit == 1: | |
424 | age_limit = 0 | |
425 | duration = try_get(info, lambda x: x['duration'], int) if not is_live else None | |
426 | ||
427 | subtitles = {} | |
428 | for sub in content.get('subtitles') or []: | |
429 | if not sub.get('url'): | |
430 | continue | |
431 | subtitles.setdefault(sub['lang'], []).append({ | |
432 | 'url': sub['url'], | |
433 | 'ext': sub.get('type'), | |
434 | }) | |
435 | ||
436 | info_dict = { | |
fb4b030a PH |
437 | 'id': video_id, |
438 | 'title': title, | |
56bb56f3 LL |
439 | 'description': description, |
440 | 'thumbnails': thumbnails, | |
441 | 'age_limit': age_limit, | |
442 | 'is_live': is_live, | |
443 | 'duration': duration, | |
fb4b030a | 444 | 'formats': formats, |
56bb56f3 | 445 | 'subtitles': subtitles, |
fb4b030a | 446 | } |
6ce2c678 | 447 | |
56bb56f3 LL |
448 | # vod.tvp.pl |
449 | if info.get('vortalName') == 'vod': | |
450 | info_dict.update({ | |
451 | 'title': '%s, %s' % (info.get('title'), info.get('subtitle')), | |
452 | 'series': info.get('title'), | |
453 | 'season': info.get('season'), | |
454 | 'episode_number': info.get('episode'), | |
455 | }) | |
456 | ||
457 | return info_dict | |
458 | ||
6ce2c678 | 459 | |
388cfbd3 | 460 | class TVPWebsiteIE(InfoExtractor): |
6f8cb242 | 461 | IE_NAME = 'tvp:series' |
388cfbd3 | 462 | _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)' |
6ce2c678 | 463 | |
fb4b030a | 464 | _TESTS = [{ |
388cfbd3 | 465 | # series |
c15de6ff | 466 | 'url': 'https://vod.tvp.pl/website/wspaniale-stulecie,17069012/video', |
fb4b030a | 467 | 'info_dict': { |
c15de6ff | 468 | 'id': '17069012', |
fb4b030a | 469 | }, |
c15de6ff | 470 | 'playlist_count': 312, |
388cfbd3 S |
471 | }, { |
472 | # film | |
c15de6ff | 473 | 'url': 'https://vod.tvp.pl/website/krzysztof-krawczyk-cale-moje-zycie,51374466', |
388cfbd3 | 474 | 'info_dict': { |
c15de6ff | 475 | 'id': '51374509', |
388cfbd3 | 476 | 'ext': 'mp4', |
c15de6ff LL |
477 | 'title': 'Krzysztof Krawczyk – całe moje życie, Krzysztof Krawczyk – całe moje życie', |
478 | 'description': 'md5:2e80823f00f5fc263555482f76f8fa42', | |
479 | 'age_limit': 12, | |
388cfbd3 S |
480 | }, |
481 | 'params': { | |
482 | 'skip_download': True, | |
483 | }, | |
484 | 'add_ie': ['TVPEmbed'], | |
485 | }, { | |
486 | 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312', | |
487 | 'only_matching': True, | |
fb4b030a | 488 | }] |
6ce2c678 | 489 | |
388cfbd3 S |
490 | def _entries(self, display_id, playlist_id): |
491 | url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id) | |
d9308378 S |
492 | for page_num in itertools.count(1): |
493 | page = self._download_webpage( | |
494 | url, display_id, 'Downloading page %d' % page_num, | |
495 | query={'page': page_num}) | |
fb4b030a | 496 | |
d9308378 S |
497 | video_ids = orderedSet(re.findall( |
498 | r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id, | |
499 | page)) | |
500 | ||
501 | if not video_ids: | |
502 | break | |
503 | ||
504 | for video_id in video_ids: | |
505 | yield self.url_result( | |
506 | 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(), | |
507 | video_id=video_id) | |
508 | ||
509 | def _real_extract(self, url): | |
5ad28e7f | 510 | mobj = self._match_valid_url(url) |
d9308378 | 511 | display_id, playlist_id = mobj.group('display_id', 'id') |
388cfbd3 S |
512 | return self.playlist_result( |
513 | self._entries(display_id, playlist_id), playlist_id) |