]>
Commit | Line | Data |
---|---|---|
02d9b82a JW |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
e84888b4 S |
5 | from ..utils import ( |
6 | int_or_none, | |
72791634 | 7 | NO_DEFAULT, |
e84888b4 S |
8 | unescapeHTML, |
9 | ) | |
02d9b82a JW |
10 | |
11 | ||
12 | class TVN24IE(InfoExtractor): | |
3e4cedf9 | 13 | _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)' |
e84888b4 | 14 | _TESTS = [{ |
02d9b82a JW |
15 | 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', |
16 | 'md5': 'fbdec753d7bc29d96036808275f2130c', | |
17 | 'info_dict': { | |
18 | 'id': '1584444', | |
19 | 'ext': 'mp4', | |
20 | 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', | |
766c4f60 | 21 | 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.', |
3e4cedf9 | 22 | 'thumbnail': 're:https?://.*[.]jpeg', |
02d9b82a | 23 | } |
72791634 S |
24 | }, { |
25 | # different layout | |
26 | 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html', | |
27 | 'info_dict': { | |
28 | 'id': '1771763', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)', | |
31 | 'thumbnail': 're:https?://.*', | |
32 | }, | |
33 | 'params': { | |
34 | 'skip_download': True, | |
35 | }, | |
e84888b4 S |
36 | }, { |
37 | 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', | |
38 | 'only_matching': True, | |
39 | }, { | |
40 | 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html', | |
41 | 'only_matching': True, | |
42 | }, { | |
43 | 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html', | |
44 | 'only_matching': True, | |
3e4cedf9 JW |
45 | }, { |
46 | 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158', | |
47 | 'only_matching': True, | |
e84888b4 | 48 | }] |
02d9b82a JW |
49 | |
50 | def _real_extract(self, url): | |
72791634 | 51 | display_id = self._match_id(url) |
e84888b4 | 52 | |
72791634 | 53 | webpage = self._download_webpage(url, display_id) |
e84888b4 | 54 | |
72791634 S |
55 | title = self._og_search_title( |
56 | webpage, default=None) or self._search_regex( | |
57 | r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h', | |
58 | webpage, 'title') | |
e84888b4 | 59 | |
72791634 | 60 | def extract_json(attr, name, default=NO_DEFAULT, fatal=True): |
e84888b4 S |
61 | return self._parse_json( |
62 | self._search_regex( | |
63 | r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage, | |
72791634 S |
64 | name, group='json', default=default, fatal=fatal) or '{}', |
65 | display_id, transform_source=unescapeHTML, fatal=fatal) | |
e84888b4 S |
66 | |
67 | quality_data = extract_json('data-quality', 'formats') | |
68 | ||
02d9b82a JW |
69 | formats = [] |
70 | for format_id, url in quality_data.items(): | |
71 | formats.append({ | |
02d9b82a | 72 | 'url': url, |
e84888b4 S |
73 | 'format_id': format_id, |
74 | 'height': int_or_none(format_id.rstrip('p')), | |
02d9b82a JW |
75 | }) |
76 | self._sort_formats(formats) | |
e84888b4 | 77 | |
72791634 | 78 | description = self._og_search_description(webpage, default=None) |
e84888b4 S |
79 | thumbnail = self._og_search_thumbnail( |
80 | webpage, default=None) or self._html_search_regex( | |
81 | r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage, | |
82 | 'thumbnail', group='url') | |
83 | ||
72791634 S |
84 | video_id = None |
85 | ||
e84888b4 | 86 | share_params = extract_json( |
72791634 | 87 | 'data-share-params', 'share params', default=None) |
e84888b4 | 88 | if isinstance(share_params, dict): |
72791634 S |
89 | video_id = share_params.get('id') |
90 | ||
91 | if not video_id: | |
92 | video_id = self._search_regex( | |
93 | r'data-vid-id=["\'](\d+)', webpage, 'video id', | |
94 | default=None) or self._search_regex( | |
95 | r',(\d+)\.html', url, 'video id', default=display_id) | |
e84888b4 | 96 | |
02d9b82a JW |
97 | return { |
98 | 'id': video_id, | |
99 | 'title': title, | |
100 | 'description': description, | |
101 | 'thumbnail': thumbnail, | |
102 | 'formats': formats, | |
103 | } |