]>
Commit | Line | Data |
---|---|---|
02d9b82a JW |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
e84888b4 S |
5 | from ..utils import ( |
6 | int_or_none, | |
7 | unescapeHTML, | |
8 | ) | |
02d9b82a JW |
9 | |
10 | ||
11 | class TVN24IE(InfoExtractor): | |
3e4cedf9 | 12 | _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)' |
e84888b4 | 13 | _TESTS = [{ |
02d9b82a JW |
14 | 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', |
15 | 'md5': 'fbdec753d7bc29d96036808275f2130c', | |
16 | 'info_dict': { | |
17 | 'id': '1584444', | |
18 | 'ext': 'mp4', | |
19 | 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', | |
20 | 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', | |
3e4cedf9 | 21 | 'thumbnail': 're:https?://.*[.]jpeg', |
02d9b82a | 22 | } |
e84888b4 S |
23 | }, { |
24 | 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', | |
25 | 'only_matching': True, | |
26 | }, { | |
27 | 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html', | |
28 | 'only_matching': True, | |
29 | }, { | |
30 | 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html', | |
31 | 'only_matching': True, | |
3e4cedf9 JW |
32 | }, { |
33 | 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158', | |
34 | 'only_matching': True, | |
e84888b4 | 35 | }] |
02d9b82a JW |
36 | |
37 | def _real_extract(self, url): | |
e84888b4 S |
38 | video_id = self._match_id(url) |
39 | ||
40 | webpage = self._download_webpage(url, video_id) | |
41 | ||
02d9b82a | 42 | title = self._og_search_title(webpage) |
e84888b4 S |
43 | |
44 | def extract_json(attr, name, fatal=True): | |
45 | return self._parse_json( | |
46 | self._search_regex( | |
47 | r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage, | |
48 | name, group='json', fatal=fatal) or '{}', | |
49 | video_id, transform_source=unescapeHTML, fatal=fatal) | |
50 | ||
51 | quality_data = extract_json('data-quality', 'formats') | |
52 | ||
02d9b82a JW |
53 | formats = [] |
54 | for format_id, url in quality_data.items(): | |
55 | formats.append({ | |
02d9b82a | 56 | 'url': url, |
e84888b4 S |
57 | 'format_id': format_id, |
58 | 'height': int_or_none(format_id.rstrip('p')), | |
02d9b82a JW |
59 | }) |
60 | self._sort_formats(formats) | |
e84888b4 S |
61 | |
62 | description = self._og_search_description(webpage) | |
63 | thumbnail = self._og_search_thumbnail( | |
64 | webpage, default=None) or self._html_search_regex( | |
65 | r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage, | |
66 | 'thumbnail', group='url') | |
67 | ||
68 | share_params = extract_json( | |
69 | 'data-share-params', 'share params', fatal=False) | |
70 | if isinstance(share_params, dict): | |
71 | video_id = share_params.get('id') or video_id | |
72 | ||
02d9b82a JW |
73 | return { |
74 | 'id': video_id, | |
75 | 'title': title, | |
76 | 'description': description, | |
77 | 'thumbnail': thumbnail, | |
78 | 'formats': formats, | |
79 | } |