]>
Commit | Line | Data |
---|---|---|
f8752b86 YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | determine_ext, | |
9 | ExtractorError, | |
10 | float_or_none, | |
11 | get_element_by_class, | |
12 | int_or_none, | |
13 | js_to_json, | |
bd65f181 | 14 | NO_DEFAULT, |
f8752b86 YCH |
15 | parse_iso8601, |
16 | remove_start, | |
17 | strip_or_none, | |
18 | url_basename, | |
19 | ) | |
20 | ||
21 | ||
22 | class OnetBaseIE(InfoExtractor): | |
8989349e RA |
23 | _URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/' |
24 | ||
f8752b86 YCH |
25 | def _search_mvp_id(self, webpage): |
26 | return self._search_regex( | |
27 | r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') | |
28 | ||
d31aa74f | 29 | def _extract_from_id(self, video_id, webpage=None): |
f8752b86 YCH |
30 | response = self._download_json( |
31 | 'http://qi.ckm.onetapi.pl/', video_id, | |
32 | query={ | |
33 | 'body[id]': video_id, | |
34 | 'body[jsonrpc]': '2.0', | |
35 | 'body[method]': 'get_asset_detail', | |
36 | 'body[params][ID_Publikacji]': video_id, | |
37 | 'body[params][Service]': 'www.onet.pl', | |
38 | 'content-type': 'application/jsonp', | |
39 | 'x-onet-app': 'player.front.onetapi.pl', | |
40 | }) | |
41 | ||
42 | error = response.get('error') | |
43 | if error: | |
44 | raise ExtractorError( | |
45 | '%s said: %s' % (self.IE_NAME, error['message']), expected=True) | |
46 | ||
47 | video = response['result'].get('0') | |
48 | ||
49 | formats = [] | |
8989349e | 50 | for format_type, formats_dict in video['formats'].items(): |
f8752b86 YCH |
51 | if not isinstance(formats_dict, dict): |
52 | continue | |
53 | for format_id, format_list in formats_dict.items(): | |
54 | if not isinstance(format_list, list): | |
55 | continue | |
56 | for f in format_list: | |
57 | video_url = f.get('url') | |
58 | if not video_url: | |
59 | continue | |
60 | ext = determine_ext(video_url) | |
8989349e | 61 | if format_id.startswith('ism'): |
639e3b5c RA |
62 | formats.extend(self._extract_ism_formats( |
63 | video_url, video_id, 'mss', fatal=False)) | |
f8752b86 | 64 | elif ext == 'mpd': |
ae6fff4e S |
65 | formats.extend(self._extract_mpd_formats( |
66 | video_url, video_id, mpd_id='dash', fatal=False)) | |
8989349e RA |
67 | elif format_id.startswith('hls'): |
68 | formats.extend(self._extract_m3u8_formats( | |
69 | video_url, video_id, 'mp4', 'm3u8_native', | |
70 | m3u8_id='hls', fatal=False)) | |
f8752b86 | 71 | else: |
8989349e | 72 | http_f = { |
f8752b86 YCH |
73 | 'url': video_url, |
74 | 'format_id': format_id, | |
f8752b86 | 75 | 'abr': float_or_none(f.get('audio_bitrate')), |
8989349e RA |
76 | } |
77 | if format_type == 'audio': | |
78 | http_f['vcodec'] = 'none' | |
79 | else: | |
80 | http_f.update({ | |
81 | 'height': int_or_none(f.get('vertical_resolution')), | |
82 | 'width': int_or_none(f.get('horizontal_resolution')), | |
83 | 'vbr': float_or_none(f.get('video_bitrate')), | |
84 | }) | |
85 | formats.append(http_f) | |
f8752b86 YCH |
86 | self._sort_formats(formats) |
87 | ||
88 | meta = video.get('meta', {}) | |
89 | ||
d31aa74f S |
90 | title = (self._og_search_title( |
91 | webpage, default=None) if webpage else None) or meta['title'] | |
92 | description = (self._og_search_description( | |
93 | webpage, default=None) if webpage else None) or meta.get('description') | |
f8752b86 YCH |
94 | duration = meta.get('length') or meta.get('lenght') |
95 | timestamp = parse_iso8601(meta.get('addDate'), ' ') | |
96 | ||
97 | return { | |
98 | 'id': video_id, | |
99 | 'title': title, | |
100 | 'description': description, | |
101 | 'duration': duration, | |
102 | 'timestamp': timestamp, | |
103 | 'formats': formats, | |
104 | } | |
105 | ||
106 | ||
d31aa74f S |
107 | class OnetMVPIE(OnetBaseIE): |
108 | _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)' | |
109 | ||
110 | _TEST = { | |
111 | 'url': 'onetmvp:381027.1509591944', | |
112 | 'only_matching': True, | |
113 | } | |
114 | ||
115 | def _real_extract(self, url): | |
116 | return self._extract_from_id(self._match_id(url)) | |
117 | ||
118 | ||
f8752b86 | 119 | class OnetIE(OnetBaseIE): |
8989349e | 120 | _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' |
f8752b86 YCH |
121 | IE_NAME = 'onet.tv' |
122 | ||
8989349e | 123 | _TESTS = [{ |
f8752b86 | 124 | 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', |
8989349e | 125 | 'md5': '436102770fb095c75b8bb0392d3da9ff', |
f8752b86 YCH |
126 | 'info_dict': { |
127 | 'id': 'qbpyqc', | |
128 | 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd', | |
129 | 'ext': 'mp4', | |
130 | 'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd', | |
131 | 'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...', | |
132 | 'upload_date': '20160705', | |
133 | 'timestamp': 1467721580, | |
134 | }, | |
8989349e RA |
135 | }, { |
136 | 'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', | |
137 | 'only_matching': True, | |
138 | }] | |
f8752b86 YCH |
139 | |
140 | def _real_extract(self, url): | |
5ad28e7f | 141 | mobj = self._match_valid_url(url) |
f8752b86 YCH |
142 | display_id, video_id = mobj.group('display_id', 'id') |
143 | ||
144 | webpage = self._download_webpage(url, display_id) | |
145 | ||
146 | mvp_id = self._search_mvp_id(webpage) | |
147 | ||
148 | info_dict = self._extract_from_id(mvp_id, webpage) | |
149 | info_dict.update({ | |
150 | 'id': video_id, | |
151 | 'display_id': display_id, | |
152 | }) | |
153 | ||
154 | return info_dict | |
155 | ||
156 | ||
157 | class OnetChannelIE(OnetBaseIE): | |
8989349e | 158 | _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P<id>[a-z]+)(?:[?#]|$)' |
f8752b86 YCH |
159 | IE_NAME = 'onet.tv:channel' |
160 | ||
8989349e | 161 | _TESTS = [{ |
f8752b86 YCH |
162 | 'url': 'http://onet.tv/k/openerfestival', |
163 | 'info_dict': { | |
164 | 'id': 'openerfestival', | |
8989349e RA |
165 | 'title': "Open'er Festival", |
166 | 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.", | |
f8752b86 | 167 | }, |
8989349e RA |
168 | 'playlist_mincount': 35, |
169 | }, { | |
170 | 'url': 'https://onet100.vod.pl/k/openerfestival', | |
171 | 'only_matching': True, | |
172 | }] | |
f8752b86 YCH |
173 | |
174 | def _real_extract(self, url): | |
175 | channel_id = self._match_id(url) | |
176 | ||
177 | webpage = self._download_webpage(url, channel_id) | |
178 | ||
179 | current_clip_info = self._parse_json(self._search_regex( | |
180 | r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id, | |
181 | transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s))) | |
182 | video_id = remove_start(current_clip_info['ckmId'], 'mvp:') | |
183 | video_name = url_basename(current_clip_info['url']) | |
184 | ||
f40ee5e9 | 185 | if not self._yes_playlist(channel_id, video_name, playlist_label='channel'): |
f8752b86 YCH |
186 | return self._extract_from_id(video_id, webpage) |
187 | ||
f8752b86 | 188 | matches = re.findall( |
8989349e | 189 | r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE, |
f8752b86 YCH |
190 | webpage) |
191 | entries = [ | |
192 | self.url_result(video_link, OnetIE.ie_key()) | |
193 | for video_link in matches] | |
194 | ||
195 | channel_title = strip_or_none(get_element_by_class('o_channelName', webpage)) | |
196 | channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage)) | |
197 | return self.playlist_result(entries, channel_id, channel_title, channel_description) | |
43a3d9ed S |
198 | |
199 | ||
200 | class OnetPlIE(InfoExtractor): | |
04a74123 | 201 | _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' |
43a3d9ed S |
202 | IE_NAME = 'onet.pl' |
203 | ||
204 | _TESTS = [{ | |
205 | 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly', | |
206 | 'md5': 'b94021eb56214c3969380388b6e73cb0', | |
207 | 'info_dict': { | |
208 | 'id': '1561707.1685479', | |
209 | 'ext': 'mp4', | |
210 | 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu', | |
211 | 'description': 'md5:61fb0740084d2d702ea96512a03585b4', | |
212 | 'upload_date': '20170214', | |
213 | 'timestamp': 1487078046, | |
214 | }, | |
bd65f181 S |
215 | }, { |
216 | # embedded via pulsembed | |
217 | 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0', | |
218 | 'info_dict': { | |
219 | 'id': '501235.965429946', | |
220 | 'ext': 'mp4', | |
221 | 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu', | |
222 | 'upload_date': '20170622', | |
223 | 'timestamp': 1498159955, | |
224 | }, | |
225 | 'params': { | |
226 | 'skip_download': True, | |
227 | }, | |
43a3d9ed S |
228 | }, { |
229 | 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', | |
230 | 'only_matching': True, | |
04a74123 S |
231 | }, { |
232 | 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e', | |
233 | 'only_matching': True, | |
234 | }, { | |
235 | 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk', | |
236 | 'only_matching': True, | |
237 | }, { | |
238 | 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89', | |
239 | 'only_matching': True, | |
43a3d9ed S |
240 | }] |
241 | ||
bd65f181 S |
242 | def _search_mvp_id(self, webpage, default=NO_DEFAULT): |
243 | return self._search_regex( | |
244 | r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id', | |
245 | default=default) | |
246 | ||
43a3d9ed S |
247 | def _real_extract(self, url): |
248 | video_id = self._match_id(url) | |
249 | ||
250 | webpage = self._download_webpage(url, video_id) | |
251 | ||
bd65f181 S |
252 | mvp_id = self._search_mvp_id(webpage, default=None) |
253 | ||
254 | if not mvp_id: | |
255 | pulsembed_url = self._search_regex( | |
256 | r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1', | |
257 | webpage, 'pulsembed url', group='url') | |
258 | webpage = self._download_webpage( | |
259 | pulsembed_url, video_id, 'Downloading pulsembed webpage') | |
260 | mvp_id = self._search_mvp_id(webpage) | |
43a3d9ed S |
261 | |
262 | return self.url_result( | |
263 | 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) |