]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import ( | |
3 | int_or_none, | |
4 | NO_DEFAULT, | |
5 | unescapeHTML, | |
6 | ) | |
7 | ||
8 | ||
9 | class TVN24IE(InfoExtractor): | |
10 | _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)' | |
11 | _TESTS = [{ | |
12 | 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', | |
13 | 'md5': 'fbdec753d7bc29d96036808275f2130c', | |
14 | 'info_dict': { | |
15 | 'id': '1584444', | |
16 | 'ext': 'mp4', | |
17 | 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', | |
18 | 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.', | |
19 | 'thumbnail': 're:https?://.*[.]jpeg', | |
20 | } | |
21 | }, { | |
22 | # different layout | |
23 | 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html', | |
24 | 'info_dict': { | |
25 | 'id': '1771763', | |
26 | 'ext': 'mp4', | |
27 | 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)', | |
28 | 'thumbnail': 're:https?://.*', | |
29 | }, | |
30 | 'params': { | |
31 | 'skip_download': True, | |
32 | }, | |
33 | }, { | |
34 | 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', | |
35 | 'only_matching': True, | |
36 | }, { | |
37 | 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html', | |
38 | 'only_matching': True, | |
39 | }, { | |
40 | 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html', | |
41 | 'only_matching': True, | |
42 | }, { | |
43 | 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158', | |
44 | 'only_matching': True, | |
45 | }] | |
46 | ||
47 | def _real_extract(self, url): | |
48 | display_id = self._match_id(url) | |
49 | ||
50 | webpage = self._download_webpage(url, display_id) | |
51 | ||
52 | title = self._og_search_title( | |
53 | webpage, default=None) or self._search_regex( | |
54 | r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h', | |
55 | webpage, 'title') | |
56 | ||
57 | def extract_json(attr, name, default=NO_DEFAULT, fatal=True): | |
58 | return self._parse_json( | |
59 | self._search_regex( | |
60 | r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage, | |
61 | name, group='json', default=default, fatal=fatal) or '{}', | |
62 | display_id, transform_source=unescapeHTML, fatal=fatal) | |
63 | ||
64 | quality_data = extract_json('data-quality', 'formats') | |
65 | ||
66 | formats = [] | |
67 | for format_id, url in quality_data.items(): | |
68 | formats.append({ | |
69 | 'url': url, | |
70 | 'format_id': format_id, | |
71 | 'height': int_or_none(format_id.rstrip('p')), | |
72 | }) | |
73 | self._sort_formats(formats) | |
74 | ||
75 | description = self._og_search_description(webpage, default=None) | |
76 | thumbnail = self._og_search_thumbnail( | |
77 | webpage, default=None) or self._html_search_regex( | |
78 | r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage, | |
79 | 'thumbnail', group='url') | |
80 | ||
81 | video_id = None | |
82 | ||
83 | share_params = extract_json( | |
84 | 'data-share-params', 'share params', default=None) | |
85 | if isinstance(share_params, dict): | |
86 | video_id = share_params.get('id') | |
87 | ||
88 | if not video_id: | |
89 | video_id = self._search_regex( | |
90 | r'data-vid-id=["\'](\d+)', webpage, 'video id', | |
91 | default=None) or self._search_regex( | |
92 | r',(\d+)\.html', url, 'video id', default=display_id) | |
93 | ||
94 | return { | |
95 | 'id': video_id, | |
96 | 'title': title, | |
97 | 'description': description, | |
98 | 'thumbnail': thumbnail, | |
99 | 'formats': formats, | |
100 | } |