]>
Commit | Line | Data |
---|---|---|
f8752b86 YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | determine_ext, | |
9 | ExtractorError, | |
10 | float_or_none, | |
11 | get_element_by_class, | |
12 | int_or_none, | |
13 | js_to_json, | |
14 | parse_iso8601, | |
15 | remove_start, | |
16 | strip_or_none, | |
17 | url_basename, | |
18 | ) | |
19 | ||
20 | ||
21 | class OnetBaseIE(InfoExtractor): | |
22 | def _search_mvp_id(self, webpage): | |
23 | return self._search_regex( | |
24 | r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') | |
25 | ||
26 | def _extract_from_id(self, video_id, webpage): | |
27 | response = self._download_json( | |
28 | 'http://qi.ckm.onetapi.pl/', video_id, | |
29 | query={ | |
30 | 'body[id]': video_id, | |
31 | 'body[jsonrpc]': '2.0', | |
32 | 'body[method]': 'get_asset_detail', | |
33 | 'body[params][ID_Publikacji]': video_id, | |
34 | 'body[params][Service]': 'www.onet.pl', | |
35 | 'content-type': 'application/jsonp', | |
36 | 'x-onet-app': 'player.front.onetapi.pl', | |
37 | }) | |
38 | ||
39 | error = response.get('error') | |
40 | if error: | |
41 | raise ExtractorError( | |
42 | '%s said: %s' % (self.IE_NAME, error['message']), expected=True) | |
43 | ||
44 | video = response['result'].get('0') | |
45 | ||
46 | formats = [] | |
47 | for _, formats_dict in video['formats'].items(): | |
48 | if not isinstance(formats_dict, dict): | |
49 | continue | |
50 | for format_id, format_list in formats_dict.items(): | |
51 | if not isinstance(format_list, list): | |
52 | continue | |
53 | for f in format_list: | |
54 | video_url = f.get('url') | |
55 | if not video_url: | |
56 | continue | |
57 | ext = determine_ext(video_url) | |
58 | if format_id == 'ism': | |
639e3b5c RA |
59 | formats.extend(self._extract_ism_formats( |
60 | video_url, video_id, 'mss', fatal=False)) | |
f8752b86 | 61 | elif ext == 'mpd': |
ae6fff4e S |
62 | formats.extend(self._extract_mpd_formats( |
63 | video_url, video_id, mpd_id='dash', fatal=False)) | |
f8752b86 YCH |
64 | else: |
65 | formats.append({ | |
66 | 'url': video_url, | |
67 | 'format_id': format_id, | |
68 | 'height': int_or_none(f.get('vertical_resolution')), | |
69 | 'width': int_or_none(f.get('horizontal_resolution')), | |
70 | 'abr': float_or_none(f.get('audio_bitrate')), | |
71 | 'vbr': float_or_none(f.get('video_bitrate')), | |
72 | }) | |
73 | self._sort_formats(formats) | |
74 | ||
75 | meta = video.get('meta', {}) | |
76 | ||
77 | title = self._og_search_title(webpage, default=None) or meta['title'] | |
78 | description = self._og_search_description(webpage, default=None) or meta.get('description') | |
79 | duration = meta.get('length') or meta.get('lenght') | |
80 | timestamp = parse_iso8601(meta.get('addDate'), ' ') | |
81 | ||
82 | return { | |
83 | 'id': video_id, | |
84 | 'title': title, | |
85 | 'description': description, | |
86 | 'duration': duration, | |
87 | 'timestamp': timestamp, | |
88 | 'formats': formats, | |
89 | } | |
90 | ||
91 | ||
92 | class OnetIE(OnetBaseIE): | |
25042f73 | 93 | _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' |
f8752b86 YCH |
94 | IE_NAME = 'onet.tv' |
95 | ||
96 | _TEST = { | |
97 | 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc', | |
43f1e4e4 | 98 | 'md5': 'e3ffbf47590032ac3f27249204173d50', |
f8752b86 YCH |
99 | 'info_dict': { |
100 | 'id': 'qbpyqc', | |
101 | 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd', | |
102 | 'ext': 'mp4', | |
103 | 'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd', | |
104 | 'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...', | |
105 | 'upload_date': '20160705', | |
106 | 'timestamp': 1467721580, | |
107 | }, | |
108 | } | |
109 | ||
110 | def _real_extract(self, url): | |
111 | mobj = re.match(self._VALID_URL, url) | |
112 | display_id, video_id = mobj.group('display_id', 'id') | |
113 | ||
114 | webpage = self._download_webpage(url, display_id) | |
115 | ||
116 | mvp_id = self._search_mvp_id(webpage) | |
117 | ||
118 | info_dict = self._extract_from_id(mvp_id, webpage) | |
119 | info_dict.update({ | |
120 | 'id': video_id, | |
121 | 'display_id': display_id, | |
122 | }) | |
123 | ||
124 | return info_dict | |
125 | ||
126 | ||
127 | class OnetChannelIE(OnetBaseIE): | |
128 | _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P<id>[a-z]+)(?:[?#]|$)' | |
129 | IE_NAME = 'onet.tv:channel' | |
130 | ||
131 | _TEST = { | |
132 | 'url': 'http://onet.tv/k/openerfestival', | |
133 | 'info_dict': { | |
134 | 'id': 'openerfestival', | |
135 | 'title': 'Open\'er Festival Live', | |
136 | 'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.', | |
137 | }, | |
138 | 'playlist_mincount': 46, | |
139 | } | |
140 | ||
141 | def _real_extract(self, url): | |
142 | channel_id = self._match_id(url) | |
143 | ||
144 | webpage = self._download_webpage(url, channel_id) | |
145 | ||
146 | current_clip_info = self._parse_json(self._search_regex( | |
147 | r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id, | |
148 | transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s))) | |
149 | video_id = remove_start(current_clip_info['ckmId'], 'mvp:') | |
150 | video_name = url_basename(current_clip_info['url']) | |
151 | ||
152 | if self._downloader.params.get('noplaylist'): | |
153 | self.to_screen( | |
154 | 'Downloading just video %s because of --no-playlist' % video_name) | |
155 | return self._extract_from_id(video_id, webpage) | |
156 | ||
157 | self.to_screen( | |
158 | 'Downloading channel %s - add --no-playlist to just download video %s' % ( | |
159 | channel_id, video_name)) | |
160 | matches = re.findall( | |
161 | r'<a[^>]+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)', | |
162 | webpage) | |
163 | entries = [ | |
164 | self.url_result(video_link, OnetIE.ie_key()) | |
165 | for video_link in matches] | |
166 | ||
167 | channel_title = strip_or_none(get_element_by_class('o_channelName', webpage)) | |
168 | channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage)) | |
169 | return self.playlist_result(entries, channel_id, channel_title, channel_description) |