]>
Commit | Line | Data |
---|---|---|
f8752b86 YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | determine_ext, | |
9 | ExtractorError, | |
10 | float_or_none, | |
11 | get_element_by_class, | |
12 | int_or_none, | |
13 | js_to_json, | |
bd65f181 | 14 | NO_DEFAULT, |
f8752b86 YCH |
15 | parse_iso8601, |
16 | remove_start, | |
17 | strip_or_none, | |
18 | url_basename, | |
19 | ) | |
20 | ||
21 | ||
22 | class OnetBaseIE(InfoExtractor): | |
23 | def _search_mvp_id(self, webpage): | |
24 | return self._search_regex( | |
25 | r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') | |
26 | ||
d31aa74f | 27 | def _extract_from_id(self, video_id, webpage=None): |
f8752b86 YCH |
28 | response = self._download_json( |
29 | 'http://qi.ckm.onetapi.pl/', video_id, | |
30 | query={ | |
31 | 'body[id]': video_id, | |
32 | 'body[jsonrpc]': '2.0', | |
33 | 'body[method]': 'get_asset_detail', | |
34 | 'body[params][ID_Publikacji]': video_id, | |
35 | 'body[params][Service]': 'www.onet.pl', | |
36 | 'content-type': 'application/jsonp', | |
37 | 'x-onet-app': 'player.front.onetapi.pl', | |
38 | }) | |
39 | ||
40 | error = response.get('error') | |
41 | if error: | |
42 | raise ExtractorError( | |
43 | '%s said: %s' % (self.IE_NAME, error['message']), expected=True) | |
44 | ||
45 | video = response['result'].get('0') | |
46 | ||
47 | formats = [] | |
48 | for _, formats_dict in video['formats'].items(): | |
49 | if not isinstance(formats_dict, dict): | |
50 | continue | |
51 | for format_id, format_list in formats_dict.items(): | |
52 | if not isinstance(format_list, list): | |
53 | continue | |
54 | for f in format_list: | |
55 | video_url = f.get('url') | |
56 | if not video_url: | |
57 | continue | |
58 | ext = determine_ext(video_url) | |
59 | if format_id == 'ism': | |
639e3b5c RA |
60 | formats.extend(self._extract_ism_formats( |
61 | video_url, video_id, 'mss', fatal=False)) | |
f8752b86 | 62 | elif ext == 'mpd': |
ae6fff4e S |
63 | formats.extend(self._extract_mpd_formats( |
64 | video_url, video_id, mpd_id='dash', fatal=False)) | |
f8752b86 YCH |
65 | else: |
66 | formats.append({ | |
67 | 'url': video_url, | |
68 | 'format_id': format_id, | |
69 | 'height': int_or_none(f.get('vertical_resolution')), | |
70 | 'width': int_or_none(f.get('horizontal_resolution')), | |
71 | 'abr': float_or_none(f.get('audio_bitrate')), | |
72 | 'vbr': float_or_none(f.get('video_bitrate')), | |
73 | }) | |
74 | self._sort_formats(formats) | |
75 | ||
76 | meta = video.get('meta', {}) | |
77 | ||
d31aa74f S |
78 | title = (self._og_search_title( |
79 | webpage, default=None) if webpage else None) or meta['title'] | |
80 | description = (self._og_search_description( | |
81 | webpage, default=None) if webpage else None) or meta.get('description') | |
f8752b86 YCH |
82 | duration = meta.get('length') or meta.get('lenght') |
83 | timestamp = parse_iso8601(meta.get('addDate'), ' ') | |
84 | ||
85 | return { | |
86 | 'id': video_id, | |
87 | 'title': title, | |
88 | 'description': description, | |
89 | 'duration': duration, | |
90 | 'timestamp': timestamp, | |
91 | 'formats': formats, | |
92 | } | |
93 | ||
94 | ||
d31aa74f S |
95 | class OnetMVPIE(OnetBaseIE): |
96 | _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)' | |
97 | ||
98 | _TEST = { | |
99 | 'url': 'onetmvp:381027.1509591944', | |
100 | 'only_matching': True, | |
101 | } | |
102 | ||
103 | def _real_extract(self, url): | |
104 | return self._extract_from_id(self._match_id(url)) | |
105 | ||
106 | ||
f8752b86 | 107 | class OnetIE(OnetBaseIE): |
25042f73 | 108 | _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' |
f8752b86 YCH |
109 | IE_NAME = 'onet.tv' |
110 | ||
111 | _TEST = { | |
112 | 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', | |
43f1e4e4 | 113 | 'md5': 'e3ffbf47590032ac3f27249204173d50', |
f8752b86 YCH |
114 | 'info_dict': { |
115 | 'id': 'qbpyqc', | |
116 | 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd', | |
117 | 'ext': 'mp4', | |
118 | 'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd', | |
119 | 'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...', | |
120 | 'upload_date': '20160705', | |
121 | 'timestamp': 1467721580, | |
122 | }, | |
123 | } | |
124 | ||
125 | def _real_extract(self, url): | |
126 | mobj = re.match(self._VALID_URL, url) | |
127 | display_id, video_id = mobj.group('display_id', 'id') | |
128 | ||
129 | webpage = self._download_webpage(url, display_id) | |
130 | ||
131 | mvp_id = self._search_mvp_id(webpage) | |
132 | ||
133 | info_dict = self._extract_from_id(mvp_id, webpage) | |
134 | info_dict.update({ | |
135 | 'id': video_id, | |
136 | 'display_id': display_id, | |
137 | }) | |
138 | ||
139 | return info_dict | |
140 | ||
141 | ||
142 | class OnetChannelIE(OnetBaseIE): | |
143 | _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P<id>[a-z]+)(?:[?#]|$)' | |
144 | IE_NAME = 'onet.tv:channel' | |
145 | ||
146 | _TEST = { | |
147 | 'url': 'http://onet.tv/k/openerfestival', | |
148 | 'info_dict': { | |
149 | 'id': 'openerfestival', | |
150 | 'title': 'Open\'er Festival Live', | |
151 | 'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.', | |
152 | }, | |
153 | 'playlist_mincount': 46, | |
154 | } | |
155 | ||
156 | def _real_extract(self, url): | |
157 | channel_id = self._match_id(url) | |
158 | ||
159 | webpage = self._download_webpage(url, channel_id) | |
160 | ||
161 | current_clip_info = self._parse_json(self._search_regex( | |
162 | r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id, | |
163 | transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s))) | |
164 | video_id = remove_start(current_clip_info['ckmId'], 'mvp:') | |
165 | video_name = url_basename(current_clip_info['url']) | |
166 | ||
167 | if self._downloader.params.get('noplaylist'): | |
168 | self.to_screen( | |
169 | 'Downloading just video %s because of --no-playlist' % video_name) | |
170 | return self._extract_from_id(video_id, webpage) | |
171 | ||
172 | self.to_screen( | |
173 | 'Downloading channel %s - add --no-playlist to just download video %s' % ( | |
174 | channel_id, video_name)) | |
175 | matches = re.findall( | |
176 | r'<a[^>]+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)', | |
177 | webpage) | |
178 | entries = [ | |
179 | self.url_result(video_link, OnetIE.ie_key()) | |
180 | for video_link in matches] | |
181 | ||
182 | channel_title = strip_or_none(get_element_by_class('o_channelName', webpage)) | |
183 | channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage)) | |
184 | return self.playlist_result(entries, channel_id, channel_title, channel_description) | |
43a3d9ed S |
185 | |
186 | ||
187 | class OnetPlIE(InfoExtractor): | |
04a74123 | 188 | _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' |
43a3d9ed S |
189 | IE_NAME = 'onet.pl' |
190 | ||
191 | _TESTS = [{ | |
192 | 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly', | |
193 | 'md5': 'b94021eb56214c3969380388b6e73cb0', | |
194 | 'info_dict': { | |
195 | 'id': '1561707.1685479', | |
196 | 'ext': 'mp4', | |
197 | 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu', | |
198 | 'description': 'md5:61fb0740084d2d702ea96512a03585b4', | |
199 | 'upload_date': '20170214', | |
200 | 'timestamp': 1487078046, | |
201 | }, | |
bd65f181 S |
202 | }, { |
203 | # embedded via pulsembed | |
204 | 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0', | |
205 | 'info_dict': { | |
206 | 'id': '501235.965429946', | |
207 | 'ext': 'mp4', | |
208 | 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu', | |
209 | 'upload_date': '20170622', | |
210 | 'timestamp': 1498159955, | |
211 | }, | |
212 | 'params': { | |
213 | 'skip_download': True, | |
214 | }, | |
43a3d9ed S |
215 | }, { |
216 | 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', | |
217 | 'only_matching': True, | |
04a74123 S |
218 | }, { |
219 | 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e', | |
220 | 'only_matching': True, | |
221 | }, { | |
222 | 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk', | |
223 | 'only_matching': True, | |
224 | }, { | |
225 | 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89', | |
226 | 'only_matching': True, | |
43a3d9ed S |
227 | }] |
228 | ||
bd65f181 S |
229 | def _search_mvp_id(self, webpage, default=NO_DEFAULT): |
230 | return self._search_regex( | |
231 | r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id', | |
232 | default=default) | |
233 | ||
43a3d9ed S |
234 | def _real_extract(self, url): |
235 | video_id = self._match_id(url) | |
236 | ||
237 | webpage = self._download_webpage(url, video_id) | |
238 | ||
bd65f181 S |
239 | mvp_id = self._search_mvp_id(webpage, default=None) |
240 | ||
241 | if not mvp_id: | |
242 | pulsembed_url = self._search_regex( | |
243 | r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1', | |
244 | webpage, 'pulsembed url', group='url') | |
245 | webpage = self._download_webpage( | |
246 | pulsembed_url, video_id, 'Downloading pulsembed webpage') | |
247 | mvp_id = self._search_mvp_id(webpage) | |
43a3d9ed S |
248 | |
249 | return self.url_result( | |
250 | 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) |