]>
Commit | Line | Data |
---|---|---|
f8752b86 YCH |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | determine_ext, | |
6 | ExtractorError, | |
7 | float_or_none, | |
8 | get_element_by_class, | |
9 | int_or_none, | |
10 | js_to_json, | |
bd65f181 | 11 | NO_DEFAULT, |
f8752b86 YCH |
12 | parse_iso8601, |
13 | remove_start, | |
14 | strip_or_none, | |
15 | url_basename, | |
16 | ) | |
17 | ||
18 | ||
19 | class OnetBaseIE(InfoExtractor): | |
8989349e RA |
20 | _URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/' |
21 | ||
f8752b86 YCH |
22 | def _search_mvp_id(self, webpage): |
23 | return self._search_regex( | |
24 | r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') | |
25 | ||
d31aa74f | 26 | def _extract_from_id(self, video_id, webpage=None): |
f8752b86 YCH |
27 | response = self._download_json( |
28 | 'http://qi.ckm.onetapi.pl/', video_id, | |
29 | query={ | |
30 | 'body[id]': video_id, | |
31 | 'body[jsonrpc]': '2.0', | |
32 | 'body[method]': 'get_asset_detail', | |
33 | 'body[params][ID_Publikacji]': video_id, | |
34 | 'body[params][Service]': 'www.onet.pl', | |
35 | 'content-type': 'application/jsonp', | |
36 | 'x-onet-app': 'player.front.onetapi.pl', | |
37 | }) | |
38 | ||
39 | error = response.get('error') | |
40 | if error: | |
41 | raise ExtractorError( | |
42 | '%s said: %s' % (self.IE_NAME, error['message']), expected=True) | |
43 | ||
44 | video = response['result'].get('0') | |
45 | ||
46 | formats = [] | |
8989349e | 47 | for format_type, formats_dict in video['formats'].items(): |
f8752b86 YCH |
48 | if not isinstance(formats_dict, dict): |
49 | continue | |
50 | for format_id, format_list in formats_dict.items(): | |
51 | if not isinstance(format_list, list): | |
52 | continue | |
53 | for f in format_list: | |
54 | video_url = f.get('url') | |
55 | if not video_url: | |
56 | continue | |
57 | ext = determine_ext(video_url) | |
8989349e | 58 | if format_id.startswith('ism'): |
639e3b5c RA |
59 | formats.extend(self._extract_ism_formats( |
60 | video_url, video_id, 'mss', fatal=False)) | |
f8752b86 | 61 | elif ext == 'mpd': |
ae6fff4e S |
62 | formats.extend(self._extract_mpd_formats( |
63 | video_url, video_id, mpd_id='dash', fatal=False)) | |
8989349e RA |
64 | elif format_id.startswith('hls'): |
65 | formats.extend(self._extract_m3u8_formats( | |
66 | video_url, video_id, 'mp4', 'm3u8_native', | |
67 | m3u8_id='hls', fatal=False)) | |
f8752b86 | 68 | else: |
8989349e | 69 | http_f = { |
f8752b86 YCH |
70 | 'url': video_url, |
71 | 'format_id': format_id, | |
f8752b86 | 72 | 'abr': float_or_none(f.get('audio_bitrate')), |
8989349e RA |
73 | } |
74 | if format_type == 'audio': | |
75 | http_f['vcodec'] = 'none' | |
76 | else: | |
77 | http_f.update({ | |
78 | 'height': int_or_none(f.get('vertical_resolution')), | |
79 | 'width': int_or_none(f.get('horizontal_resolution')), | |
80 | 'vbr': float_or_none(f.get('video_bitrate')), | |
81 | }) | |
82 | formats.append(http_f) | |
f8752b86 YCH |
83 | self._sort_formats(formats) |
84 | ||
85 | meta = video.get('meta', {}) | |
86 | ||
d31aa74f S |
87 | title = (self._og_search_title( |
88 | webpage, default=None) if webpage else None) or meta['title'] | |
89 | description = (self._og_search_description( | |
90 | webpage, default=None) if webpage else None) or meta.get('description') | |
f8752b86 YCH |
91 | duration = meta.get('length') or meta.get('lenght') |
92 | timestamp = parse_iso8601(meta.get('addDate'), ' ') | |
93 | ||
94 | return { | |
95 | 'id': video_id, | |
96 | 'title': title, | |
97 | 'description': description, | |
98 | 'duration': duration, | |
99 | 'timestamp': timestamp, | |
100 | 'formats': formats, | |
101 | } | |
102 | ||
103 | ||
d31aa74f S |
104 | class OnetMVPIE(OnetBaseIE): |
105 | _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)' | |
106 | ||
107 | _TEST = { | |
108 | 'url': 'onetmvp:381027.1509591944', | |
109 | 'only_matching': True, | |
110 | } | |
111 | ||
112 | def _real_extract(self, url): | |
113 | return self._extract_from_id(self._match_id(url)) | |
114 | ||
115 | ||
f8752b86 | 116 | class OnetIE(OnetBaseIE): |
8989349e | 117 | _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' |
f8752b86 YCH |
118 | IE_NAME = 'onet.tv' |
119 | ||
8989349e | 120 | _TESTS = [{ |
f8752b86 | 121 | 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', |
8989349e | 122 | 'md5': '436102770fb095c75b8bb0392d3da9ff', |
f8752b86 YCH |
123 | 'info_dict': { |
124 | 'id': 'qbpyqc', | |
125 | 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd', | |
126 | 'ext': 'mp4', | |
127 | 'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd', | |
128 | 'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...', | |
129 | 'upload_date': '20160705', | |
130 | 'timestamp': 1467721580, | |
131 | }, | |
8989349e RA |
132 | }, { |
133 | 'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', | |
134 | 'only_matching': True, | |
135 | }] | |
f8752b86 YCH |
136 | |
137 | def _real_extract(self, url): | |
5ad28e7f | 138 | mobj = self._match_valid_url(url) |
f8752b86 YCH |
139 | display_id, video_id = mobj.group('display_id', 'id') |
140 | ||
141 | webpage = self._download_webpage(url, display_id) | |
142 | ||
143 | mvp_id = self._search_mvp_id(webpage) | |
144 | ||
145 | info_dict = self._extract_from_id(mvp_id, webpage) | |
146 | info_dict.update({ | |
147 | 'id': video_id, | |
148 | 'display_id': display_id, | |
149 | }) | |
150 | ||
151 | return info_dict | |
152 | ||
153 | ||
154 | class OnetChannelIE(OnetBaseIE): | |
8989349e | 155 | _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P<id>[a-z]+)(?:[?#]|$)' |
f8752b86 YCH |
156 | IE_NAME = 'onet.tv:channel' |
157 | ||
8989349e | 158 | _TESTS = [{ |
f8752b86 YCH |
159 | 'url': 'http://onet.tv/k/openerfestival', |
160 | 'info_dict': { | |
161 | 'id': 'openerfestival', | |
8989349e RA |
162 | 'title': "Open'er Festival", |
163 | 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.", | |
f8752b86 | 164 | }, |
8989349e RA |
165 | 'playlist_mincount': 35, |
166 | }, { | |
167 | 'url': 'https://onet100.vod.pl/k/openerfestival', | |
168 | 'only_matching': True, | |
169 | }] | |
f8752b86 YCH |
170 | |
171 | def _real_extract(self, url): | |
172 | channel_id = self._match_id(url) | |
173 | ||
174 | webpage = self._download_webpage(url, channel_id) | |
175 | ||
176 | current_clip_info = self._parse_json(self._search_regex( | |
177 | r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id, | |
178 | transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s))) | |
179 | video_id = remove_start(current_clip_info['ckmId'], 'mvp:') | |
180 | video_name = url_basename(current_clip_info['url']) | |
181 | ||
f40ee5e9 | 182 | if not self._yes_playlist(channel_id, video_name, playlist_label='channel'): |
f8752b86 YCH |
183 | return self._extract_from_id(video_id, webpage) |
184 | ||
f8752b86 | 185 | matches = re.findall( |
8989349e | 186 | r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE, |
f8752b86 YCH |
187 | webpage) |
188 | entries = [ | |
189 | self.url_result(video_link, OnetIE.ie_key()) | |
190 | for video_link in matches] | |
191 | ||
192 | channel_title = strip_or_none(get_element_by_class('o_channelName', webpage)) | |
193 | channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage)) | |
194 | return self.playlist_result(entries, channel_id, channel_title, channel_description) | |
43a3d9ed S |
195 | |
196 | ||
197 | class OnetPlIE(InfoExtractor): | |
04a74123 | 198 | _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' |
43a3d9ed S |
199 | IE_NAME = 'onet.pl' |
200 | ||
201 | _TESTS = [{ | |
202 | 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly', | |
203 | 'md5': 'b94021eb56214c3969380388b6e73cb0', | |
204 | 'info_dict': { | |
205 | 'id': '1561707.1685479', | |
206 | 'ext': 'mp4', | |
207 | 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu', | |
208 | 'description': 'md5:61fb0740084d2d702ea96512a03585b4', | |
209 | 'upload_date': '20170214', | |
210 | 'timestamp': 1487078046, | |
211 | }, | |
bd65f181 S |
212 | }, { |
213 | # embedded via pulsembed | |
214 | 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0', | |
215 | 'info_dict': { | |
216 | 'id': '501235.965429946', | |
217 | 'ext': 'mp4', | |
218 | 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu', | |
219 | 'upload_date': '20170622', | |
220 | 'timestamp': 1498159955, | |
221 | }, | |
222 | 'params': { | |
223 | 'skip_download': True, | |
224 | }, | |
43a3d9ed S |
225 | }, { |
226 | 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', | |
227 | 'only_matching': True, | |
04a74123 S |
228 | }, { |
229 | 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e', | |
230 | 'only_matching': True, | |
231 | }, { | |
232 | 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk', | |
233 | 'only_matching': True, | |
234 | }, { | |
235 | 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89', | |
236 | 'only_matching': True, | |
43a3d9ed S |
237 | }] |
238 | ||
bd65f181 S |
239 | def _search_mvp_id(self, webpage, default=NO_DEFAULT): |
240 | return self._search_regex( | |
241 | r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id', | |
242 | default=default) | |
243 | ||
43a3d9ed S |
244 | def _real_extract(self, url): |
245 | video_id = self._match_id(url) | |
246 | ||
247 | webpage = self._download_webpage(url, video_id) | |
248 | ||
bd65f181 S |
249 | mvp_id = self._search_mvp_id(webpage, default=None) |
250 | ||
251 | if not mvp_id: | |
252 | pulsembed_url = self._search_regex( | |
253 | r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1', | |
254 | webpage, 'pulsembed url', group='url') | |
255 | webpage = self._download_webpage( | |
256 | pulsembed_url, video_id, 'Downloading pulsembed webpage') | |
257 | mvp_id = self._search_mvp_id(webpage) | |
43a3d9ed S |
258 | |
259 | return self.url_result( | |
260 | 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) |