]>
Commit | Line | Data |
---|---|---|
17f0eb66 M |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import itertools | |
5 | import re | |
6 | ||
7 | from .openload import PhantomJSwrapper | |
8 | ||
9 | from .common import InfoExtractor | |
10 | from ..utils import ( | |
11 | ExtractorError, | |
12 | RegexNotFoundError, | |
13 | strip_or_none, | |
14 | try_get | |
15 | ) | |
16 | ||
17 | ||
18 | class RCTIPlusBaseIE(InfoExtractor): | |
19 | def _real_initialize(self): | |
20 | self._AUTH_KEY = self._download_json( | |
21 | 'https://api.rctiplus.com/api/v1/visitor?platform=web', # platform can be web, mweb, android, ios | |
22 | None, 'Fetching authorization key')['data']['access_token'] | |
23 | ||
24 | def _call_api(self, url, video_id, note=None): | |
25 | json = self._download_json( | |
26 | url, video_id, note=note, headers={'Authorization': self._AUTH_KEY}) | |
27 | if json.get('status', {}).get('code', 0) != 0: | |
28 | raise ExtractorError('%s said: %s' % (self.IE_NAME, json["status"]["message_client"]), cause=json) | |
29 | return json.get('data'), json.get('meta') | |
30 | ||
31 | ||
32 | class RCTIPlusIE(RCTIPlusBaseIE): | |
33 | _VALID_URL = r'https://www\.rctiplus\.com/programs/\d+?/.*?/(?P<type>episode|clip|extra)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' | |
34 | _TESTS = [{ | |
35 | 'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola', | |
36 | 'md5': '56ed45affad45fa18d5592a1bc199997', | |
37 | 'info_dict': { | |
38 | 'id': 'v_e22124', | |
39 | 'title': 'Untuk Lola', | |
40 | 'display_id': 'untuk-lola', | |
41 | 'description': 'md5:2b809075c0b1e071e228ad6d13e41deb', | |
42 | 'ext': 'mp4', | |
43 | 'duration': 1400, | |
44 | 'timestamp': 1615978800, | |
45 | 'upload_date': '20210317', | |
46 | 'series': 'Kiko : Untuk Lola', | |
47 | 'season_number': 1, | |
48 | 'episode_number': 1, | |
49 | 'channel': 'RCTI', | |
50 | }, | |
51 | 'params': { | |
52 | 'fixup': 'never', | |
53 | }, | |
54 | }, { # Clip; Series title doesn't appear on metadata JSON | |
55 | 'url': 'https://www.rctiplus.com/programs/316/cahaya-terindah/clip/3921/make-a-wish', | |
56 | 'md5': 'd179b2ff356f0e91a53bcc6a4d8504f0', | |
57 | 'info_dict': { | |
58 | 'id': 'v_c3921', | |
59 | 'title': 'Make A Wish', | |
60 | 'display_id': 'make-a-wish', | |
61 | 'description': 'Make A Wish', | |
62 | 'ext': 'mp4', | |
63 | 'duration': 288, | |
64 | 'timestamp': 1571652600, | |
65 | 'upload_date': '20191021', | |
66 | 'series': 'Cahaya Terindah', | |
67 | 'channel': 'RCTI', | |
68 | }, | |
69 | 'params': { | |
70 | 'fixup': 'never', | |
71 | }, | |
72 | }, { # Extra | |
73 | 'url': 'https://www.rctiplus.com/programs/616/inews-malam/extra/9438/diungkapkan-melalui-surat-terbuka-ceo-ruangguru-belva-devara-mundur-dari-staf-khusus-presiden', | |
74 | 'md5': 'c48106afdbce609749f5e0c007d9278a', | |
75 | 'info_dict': { | |
76 | 'id': 'v_ex9438', | |
77 | 'title': 'md5:2ede828c0f8bde249e0912be150314ca', | |
78 | 'display_id': 'md5:62b8d4e9ff096db527a1ad797e8a9933', | |
79 | 'description': 'md5:2ede828c0f8bde249e0912be150314ca', | |
80 | 'ext': 'mp4', | |
81 | 'duration': 93, | |
82 | 'timestamp': 1587561540, | |
83 | 'upload_date': '20200422', | |
84 | 'series': 'iNews Malam', | |
85 | 'channel': 'INews', | |
86 | }, | |
87 | 'params': { | |
88 | 'format': 'bestvideo', | |
89 | }, | |
90 | }] | |
91 | ||
92 | def _search_auth_key(self, webpage): | |
93 | try: | |
94 | self._AUTH_KEY = self._search_regex( | |
95 | r'\'Authorization\':"(?P<auth>[^"]+)"', webpage, 'auth-key') | |
96 | except RegexNotFoundError: | |
97 | pass | |
98 | ||
99 | def _real_extract(self, url): | |
100 | video_type, video_id, display_id = re.match(self._VALID_URL, url).groups() | |
101 | webpage = self._download_webpage(url, display_id) | |
102 | self._search_auth_key(webpage) | |
103 | ||
104 | video_json = self._call_api( | |
105 | 'https://api.rctiplus.com/api/v1/%s/%s/url?appierid=.1' % (video_type, video_id), display_id, 'Downloading video URL JSON')[0] | |
106 | video_url = video_json['url'] | |
107 | if 'akamaized' in video_url: | |
108 | # Akamai's CDN requires a session to at least be made via Conviva's API | |
109 | # TODO: Reverse-engineer Conviva's heartbeat code to avoid phantomJS | |
110 | phantom = None | |
111 | try: | |
112 | phantom = PhantomJSwrapper(self) | |
113 | phantom.get(url, webpage, display_id, note2='Initiating video session') | |
114 | except ExtractorError: | |
115 | self.report_warning('PhantomJS is highly recommended for this video, as it might load incredibly slowly otherwise.' | |
116 | 'You can also try opening the page in this device\'s browser first') | |
117 | ||
118 | video_meta, meta_paths = self._call_api( | |
119 | 'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata') | |
120 | ||
121 | thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/') | |
122 | if video_meta.get('portrait_image'): | |
123 | thumbnails.append({ | |
124 | 'id': 'portrait_image', | |
125 | 'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image']) # 2000px seems to be the highest resolution that can be given | |
126 | }) | |
127 | if video_meta.get('landscape_image'): | |
128 | thumbnails.append({ | |
129 | 'id': 'landscape_image', | |
130 | 'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image']) | |
131 | }) | |
132 | ||
133 | formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'}) | |
134 | for f in formats: | |
135 | if 'akamaized' in f['url']: | |
136 | f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai CDNs | |
137 | ||
138 | self._sort_formats(formats) | |
139 | ||
140 | return { | |
141 | 'id': video_meta.get('product_id') or video_json.get('product_id'), | |
142 | 'title': video_meta.get('title') or video_json.get('content_name'), | |
143 | 'display_id': display_id, | |
144 | 'description': video_meta.get('summary'), | |
145 | 'timestamp': video_meta.get('release_date'), | |
146 | 'duration': video_meta.get('duration'), | |
147 | 'categories': [video_meta.get('genre')], | |
148 | 'average_rating': video_meta.get('star_rating'), | |
149 | 'series': video_meta.get('program_title') or video_json.get('program_title'), | |
150 | 'season_number': video_meta.get('season'), | |
151 | 'episode_number': video_meta.get('episode'), | |
152 | 'channel': video_json.get('tv_name'), | |
153 | 'channel_id': video_json.get('tv_id'), | |
154 | 'formats': formats, | |
155 | 'thumbnails': thumbnails | |
156 | } | |
157 | ||
158 | ||
159 | class RCTIPlusSeriesIE(RCTIPlusBaseIE): | |
160 | _VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:\W)*$' | |
161 | _TESTS = [{ | |
162 | 'url': 'https://www.rctiplus.com/programs/540/upin-ipin', | |
163 | 'playlist_mincount': 417, | |
164 | 'info_dict': { | |
165 | 'id': '540', | |
166 | 'title': 'Upin & Ipin', | |
167 | 'description': 'md5:22cc912381f389664416844e1ec4f86b', | |
168 | }, | |
169 | }, { | |
170 | 'url': 'https://www.rctiplus.com/programs/540/upin-ipin/#', | |
171 | 'only_matching': True, | |
172 | }] | |
173 | _AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings | |
174 | 'S-SU': 2, | |
175 | 'SU': 2, | |
176 | 'P': 2, | |
177 | 'A': 7, | |
178 | 'R': 13, | |
179 | 'R-R/1': 17, # Labelled as 17+ despite being R | |
180 | 'D': 18, | |
181 | } | |
182 | ||
183 | def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}): | |
184 | total_pages = 0 | |
185 | try: | |
186 | total_pages = self._call_api( | |
187 | '%s&length=20&page=0' % url, | |
188 | display_id, note)[1]['pagination']['total_page'] | |
189 | except ExtractorError as e: | |
190 | if 'not found' in str(e): | |
191 | return [] | |
192 | raise e | |
193 | if total_pages <= 0: | |
194 | return [] | |
195 | ||
196 | for page_num in range(1, total_pages + 1): | |
197 | episode_list = self._call_api( | |
198 | '%s&length=20&page=%s' % (url, page_num), | |
199 | display_id, '%s page %s' % (note, page_num))[0] or [] | |
200 | ||
201 | for video_json in episode_list: | |
202 | link = video_json['share_link'] | |
203 | url_res = self.url_result(link, 'RCTIPlus', video_json.get('product_id'), video_json.get('title')) | |
204 | url_res.update(metadata) | |
205 | yield url_res | |
206 | ||
207 | def _real_extract(self, url): | |
208 | series_id, display_id = re.match(self._VALID_URL, url).groups() | |
209 | ||
210 | series_meta, meta_paths = self._call_api( | |
211 | 'https://api.rctiplus.com/api/v1/program/%s/detail' % series_id, display_id, 'Downloading series metadata') | |
212 | metadata = { | |
213 | 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]) | |
214 | } | |
215 | ||
216 | cast = [] | |
217 | for star in series_meta.get('starring', []): | |
218 | cast.append(strip_or_none(star.get('name'))) | |
219 | for star in series_meta.get('creator', []): | |
220 | cast.append(strip_or_none(star.get('name'))) | |
221 | for star in series_meta.get('writer', []): | |
222 | cast.append(strip_or_none(star.get('name'))) | |
223 | metadata['cast'] = cast | |
224 | ||
225 | tags = [] | |
226 | for tag in series_meta.get('tag', []): | |
227 | tags.append(strip_or_none(tag.get('name'))) | |
228 | metadata['tag'] = tags | |
229 | ||
230 | entries = [] | |
231 | seasons_list = self._call_api( | |
232 | 'https://api.rctiplus.com/api/v1/program/%s/season' % series_id, display_id, 'Downloading seasons list JSON')[0] | |
233 | for season in seasons_list: | |
234 | entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/episode?season=%s' % (series_id, season['season']), | |
235 | display_id, 'Downloading season %s episode entries' % season['season'], metadata)) | |
236 | ||
237 | entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/clip?content_id=0' % series_id, | |
238 | display_id, 'Downloading clip entries', metadata)) | |
239 | entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/extra?content_id=0' % series_id, | |
240 | display_id, 'Downloading extra entries', metadata)) | |
241 | ||
242 | return self.playlist_result(itertools.chain(*entries), series_id, series_meta.get('title'), series_meta.get('summary'), **metadata) |