]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | int_or_none, | |
7 | NO_DEFAULT, | |
8 | unescapeHTML, | |
9 | ) | |
10 | ||
11 | ||
12 | class TVN24IE(InfoExtractor): | |
13 | _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)' | |
14 | _TESTS = [{ | |
15 | 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', | |
16 | 'md5': 'fbdec753d7bc29d96036808275f2130c', | |
17 | 'info_dict': { | |
18 | 'id': '1584444', | |
19 | 'ext': 'mp4', | |
20 | 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', | |
21 | 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.', | |
22 | 'thumbnail': 're:https?://.*[.]jpeg', | |
23 | } | |
24 | }, { | |
25 | # different layout | |
26 | 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html', | |
27 | 'info_dict': { | |
28 | 'id': '1771763', | |
29 | 'ext': 'mp4', | |
30 | 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)', | |
31 | 'thumbnail': 're:https?://.*', | |
32 | }, | |
33 | 'params': { | |
34 | 'skip_download': True, | |
35 | }, | |
36 | }, { | |
37 | 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', | |
38 | 'only_matching': True, | |
39 | }, { | |
40 | 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html', | |
41 | 'only_matching': True, | |
42 | }, { | |
43 | 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html', | |
44 | 'only_matching': True, | |
45 | }, { | |
46 | 'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158', | |
47 | 'only_matching': True, | |
48 | }] | |
49 | ||
50 | def _real_extract(self, url): | |
51 | display_id = self._match_id(url) | |
52 | ||
53 | webpage = self._download_webpage(url, display_id) | |
54 | ||
55 | title = self._og_search_title( | |
56 | webpage, default=None) or self._search_regex( | |
57 | r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h', | |
58 | webpage, 'title') | |
59 | ||
60 | def extract_json(attr, name, default=NO_DEFAULT, fatal=True): | |
61 | return self._parse_json( | |
62 | self._search_regex( | |
63 | r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage, | |
64 | name, group='json', default=default, fatal=fatal) or '{}', | |
65 | display_id, transform_source=unescapeHTML, fatal=fatal) | |
66 | ||
67 | quality_data = extract_json('data-quality', 'formats') | |
68 | ||
69 | formats = [] | |
70 | for format_id, url in quality_data.items(): | |
71 | formats.append({ | |
72 | 'url': url, | |
73 | 'format_id': format_id, | |
74 | 'height': int_or_none(format_id.rstrip('p')), | |
75 | }) | |
76 | self._sort_formats(formats) | |
77 | ||
78 | description = self._og_search_description(webpage, default=None) | |
79 | thumbnail = self._og_search_thumbnail( | |
80 | webpage, default=None) or self._html_search_regex( | |
81 | r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage, | |
82 | 'thumbnail', group='url') | |
83 | ||
84 | video_id = None | |
85 | ||
86 | share_params = extract_json( | |
87 | 'data-share-params', 'share params', default=None) | |
88 | if isinstance(share_params, dict): | |
89 | video_id = share_params.get('id') | |
90 | ||
91 | if not video_id: | |
92 | video_id = self._search_regex( | |
93 | r'data-vid-id=["\'](\d+)', webpage, 'video id', | |
94 | default=None) or self._search_regex( | |
95 | r',(\d+)\.html', url, 'video id', default=display_id) | |
96 | ||
97 | return { | |
98 | 'id': video_id, | |
99 | 'title': title, | |
100 | 'description': description, | |
101 | 'thumbnail': thumbnail, | |
102 | 'formats': formats, | |
103 | } |