]>
Commit | Line | Data |
---|---|---|
f8752b86 YCH |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | determine_ext, | |
6 | ExtractorError, | |
7 | float_or_none, | |
8 | get_element_by_class, | |
9 | int_or_none, | |
10 | js_to_json, | |
bd65f181 | 11 | NO_DEFAULT, |
f8752b86 YCH |
12 | parse_iso8601, |
13 | remove_start, | |
14 | strip_or_none, | |
15 | url_basename, | |
16 | ) | |
17 | ||
18 | ||
19 | class OnetBaseIE(InfoExtractor): | |
8989349e RA |
20 | _URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/' |
21 | ||
f8752b86 YCH |
22 | def _search_mvp_id(self, webpage): |
23 | return self._search_regex( | |
24 | r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') | |
25 | ||
d31aa74f | 26 | def _extract_from_id(self, video_id, webpage=None): |
f8752b86 YCH |
27 | response = self._download_json( |
28 | 'http://qi.ckm.onetapi.pl/', video_id, | |
29 | query={ | |
30 | 'body[id]': video_id, | |
31 | 'body[jsonrpc]': '2.0', | |
32 | 'body[method]': 'get_asset_detail', | |
33 | 'body[params][ID_Publikacji]': video_id, | |
34 | 'body[params][Service]': 'www.onet.pl', | |
35 | 'content-type': 'application/jsonp', | |
36 | 'x-onet-app': 'player.front.onetapi.pl', | |
37 | }) | |
38 | ||
39 | error = response.get('error') | |
40 | if error: | |
41 | raise ExtractorError( | |
42 | '%s said: %s' % (self.IE_NAME, error['message']), expected=True) | |
43 | ||
44 | video = response['result'].get('0') | |
45 | ||
46 | formats = [] | |
8989349e | 47 | for format_type, formats_dict in video['formats'].items(): |
f8752b86 YCH |
48 | if not isinstance(formats_dict, dict): |
49 | continue | |
50 | for format_id, format_list in formats_dict.items(): | |
51 | if not isinstance(format_list, list): | |
52 | continue | |
53 | for f in format_list: | |
54 | video_url = f.get('url') | |
55 | if not video_url: | |
56 | continue | |
57 | ext = determine_ext(video_url) | |
8989349e | 58 | if format_id.startswith('ism'): |
639e3b5c RA |
59 | formats.extend(self._extract_ism_formats( |
60 | video_url, video_id, 'mss', fatal=False)) | |
f8752b86 | 61 | elif ext == 'mpd': |
ae6fff4e S |
62 | formats.extend(self._extract_mpd_formats( |
63 | video_url, video_id, mpd_id='dash', fatal=False)) | |
8989349e RA |
64 | elif format_id.startswith('hls'): |
65 | formats.extend(self._extract_m3u8_formats( | |
66 | video_url, video_id, 'mp4', 'm3u8_native', | |
67 | m3u8_id='hls', fatal=False)) | |
f8752b86 | 68 | else: |
8989349e | 69 | http_f = { |
f8752b86 YCH |
70 | 'url': video_url, |
71 | 'format_id': format_id, | |
f8752b86 | 72 | 'abr': float_or_none(f.get('audio_bitrate')), |
8989349e RA |
73 | } |
74 | if format_type == 'audio': | |
75 | http_f['vcodec'] = 'none' | |
76 | else: | |
77 | http_f.update({ | |
78 | 'height': int_or_none(f.get('vertical_resolution')), | |
79 | 'width': int_or_none(f.get('horizontal_resolution')), | |
80 | 'vbr': float_or_none(f.get('video_bitrate')), | |
81 | }) | |
82 | formats.append(http_f) | |
f8752b86 YCH |
83 | |
84 | meta = video.get('meta', {}) | |
85 | ||
d31aa74f S |
86 | title = (self._og_search_title( |
87 | webpage, default=None) if webpage else None) or meta['title'] | |
88 | description = (self._og_search_description( | |
89 | webpage, default=None) if webpage else None) or meta.get('description') | |
f8752b86 YCH |
90 | duration = meta.get('length') or meta.get('lenght') |
91 | timestamp = parse_iso8601(meta.get('addDate'), ' ') | |
92 | ||
93 | return { | |
94 | 'id': video_id, | |
95 | 'title': title, | |
96 | 'description': description, | |
97 | 'duration': duration, | |
98 | 'timestamp': timestamp, | |
99 | 'formats': formats, | |
100 | } | |
101 | ||
102 | ||
d31aa74f S |
103 | class OnetMVPIE(OnetBaseIE): |
104 | _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)' | |
105 | ||
106 | _TEST = { | |
107 | 'url': 'onetmvp:381027.1509591944', | |
108 | 'only_matching': True, | |
109 | } | |
110 | ||
111 | def _real_extract(self, url): | |
112 | return self._extract_from_id(self._match_id(url)) | |
113 | ||
114 | ||
f8752b86 | 115 | class OnetIE(OnetBaseIE): |
8989349e | 116 | _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' |
f8752b86 YCH |
117 | IE_NAME = 'onet.tv' |
118 | ||
8989349e | 119 | _TESTS = [{ |
f8752b86 | 120 | 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', |
8989349e | 121 | 'md5': '436102770fb095c75b8bb0392d3da9ff', |
f8752b86 YCH |
122 | 'info_dict': { |
123 | 'id': 'qbpyqc', | |
124 | 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd', | |
125 | 'ext': 'mp4', | |
126 | 'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd', | |
127 | 'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...', | |
128 | 'upload_date': '20160705', | |
129 | 'timestamp': 1467721580, | |
130 | }, | |
8989349e RA |
131 | }, { |
132 | 'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', | |
133 | 'only_matching': True, | |
134 | }] | |
f8752b86 YCH |
135 | |
136 | def _real_extract(self, url): | |
5ad28e7f | 137 | mobj = self._match_valid_url(url) |
f8752b86 YCH |
138 | display_id, video_id = mobj.group('display_id', 'id') |
139 | ||
140 | webpage = self._download_webpage(url, display_id) | |
141 | ||
142 | mvp_id = self._search_mvp_id(webpage) | |
143 | ||
144 | info_dict = self._extract_from_id(mvp_id, webpage) | |
145 | info_dict.update({ | |
146 | 'id': video_id, | |
147 | 'display_id': display_id, | |
148 | }) | |
149 | ||
150 | return info_dict | |
151 | ||
152 | ||
153 | class OnetChannelIE(OnetBaseIE): | |
8989349e | 154 | _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P<id>[a-z]+)(?:[?#]|$)' |
f8752b86 YCH |
155 | IE_NAME = 'onet.tv:channel' |
156 | ||
8989349e | 157 | _TESTS = [{ |
f8752b86 YCH |
158 | 'url': 'http://onet.tv/k/openerfestival', |
159 | 'info_dict': { | |
160 | 'id': 'openerfestival', | |
8989349e RA |
161 | 'title': "Open'er Festival", |
162 | 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.", | |
f8752b86 | 163 | }, |
8989349e RA |
164 | 'playlist_mincount': 35, |
165 | }, { | |
166 | 'url': 'https://onet100.vod.pl/k/openerfestival', | |
167 | 'only_matching': True, | |
168 | }] | |
f8752b86 YCH |
169 | |
170 | def _real_extract(self, url): | |
171 | channel_id = self._match_id(url) | |
172 | ||
173 | webpage = self._download_webpage(url, channel_id) | |
174 | ||
175 | current_clip_info = self._parse_json(self._search_regex( | |
176 | r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id, | |
177 | transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s))) | |
178 | video_id = remove_start(current_clip_info['ckmId'], 'mvp:') | |
179 | video_name = url_basename(current_clip_info['url']) | |
180 | ||
f40ee5e9 | 181 | if not self._yes_playlist(channel_id, video_name, playlist_label='channel'): |
f8752b86 YCH |
182 | return self._extract_from_id(video_id, webpage) |
183 | ||
f8752b86 | 184 | matches = re.findall( |
8989349e | 185 | r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE, |
f8752b86 YCH |
186 | webpage) |
187 | entries = [ | |
188 | self.url_result(video_link, OnetIE.ie_key()) | |
189 | for video_link in matches] | |
190 | ||
191 | channel_title = strip_or_none(get_element_by_class('o_channelName', webpage)) | |
192 | channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage)) | |
193 | return self.playlist_result(entries, channel_id, channel_title, channel_description) | |
43a3d9ed S |
194 | |
195 | ||
196 | class OnetPlIE(InfoExtractor): | |
04a74123 | 197 | _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' |
43a3d9ed S |
198 | IE_NAME = 'onet.pl' |
199 | ||
200 | _TESTS = [{ | |
201 | 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly', | |
202 | 'md5': 'b94021eb56214c3969380388b6e73cb0', | |
203 | 'info_dict': { | |
204 | 'id': '1561707.1685479', | |
205 | 'ext': 'mp4', | |
206 | 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu', | |
207 | 'description': 'md5:61fb0740084d2d702ea96512a03585b4', | |
208 | 'upload_date': '20170214', | |
209 | 'timestamp': 1487078046, | |
210 | }, | |
bd65f181 S |
211 | }, { |
212 | # embedded via pulsembed | |
213 | 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0', | |
214 | 'info_dict': { | |
215 | 'id': '501235.965429946', | |
216 | 'ext': 'mp4', | |
217 | 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu', | |
218 | 'upload_date': '20170622', | |
219 | 'timestamp': 1498159955, | |
220 | }, | |
221 | 'params': { | |
222 | 'skip_download': True, | |
223 | }, | |
43a3d9ed S |
224 | }, { |
225 | 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', | |
226 | 'only_matching': True, | |
04a74123 S |
227 | }, { |
228 | 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e', | |
229 | 'only_matching': True, | |
230 | }, { | |
231 | 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk', | |
232 | 'only_matching': True, | |
233 | }, { | |
234 | 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89', | |
235 | 'only_matching': True, | |
43a3d9ed S |
236 | }] |
237 | ||
bd65f181 S |
238 | def _search_mvp_id(self, webpage, default=NO_DEFAULT): |
239 | return self._search_regex( | |
240 | r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id', | |
241 | default=default) | |
242 | ||
43a3d9ed S |
243 | def _real_extract(self, url): |
244 | video_id = self._match_id(url) | |
245 | ||
246 | webpage = self._download_webpage(url, video_id) | |
247 | ||
bd65f181 S |
248 | mvp_id = self._search_mvp_id(webpage, default=None) |
249 | ||
250 | if not mvp_id: | |
251 | pulsembed_url = self._search_regex( | |
252 | r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1', | |
253 | webpage, 'pulsembed url', group='url') | |
254 | webpage = self._download_webpage( | |
255 | pulsembed_url, video_id, 'Downloading pulsembed webpage') | |
256 | mvp_id = self._search_mvp_id(webpage) | |
43a3d9ed S |
257 | |
258 | return self.url_result( | |
259 | 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) |