]>
Commit | Line | Data |
---|---|---|
17f0eb66 M |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import itertools | |
d9e6e948 | 5 | import json |
3dbb2a9d | 6 | import random |
17f0eb66 | 7 | import re |
d9e6e948 | 8 | import time |
17f0eb66 M |
9 | |
10 | from .common import InfoExtractor | |
3dbb2a9d | 11 | from ..compat import compat_HTTPError |
17f0eb66 | 12 | from ..utils import ( |
3dbb2a9d | 13 | dict_get, |
17f0eb66 | 14 | ExtractorError, |
17f0eb66 M |
15 | strip_or_none, |
16 | try_get | |
17 | ) | |
18 | ||
19 | ||
20 | class RCTIPlusBaseIE(InfoExtractor): | |
21 | def _real_initialize(self): | |
22 | self._AUTH_KEY = self._download_json( | |
23 | 'https://api.rctiplus.com/api/v1/visitor?platform=web', # platform can be web, mweb, android, ios | |
24 | None, 'Fetching authorization key')['data']['access_token'] | |
25 | ||
26 | def _call_api(self, url, video_id, note=None): | |
27 | json = self._download_json( | |
28 | url, video_id, note=note, headers={'Authorization': self._AUTH_KEY}) | |
29 | if json.get('status', {}).get('code', 0) != 0: | |
30 | raise ExtractorError('%s said: %s' % (self.IE_NAME, json["status"]["message_client"]), cause=json) | |
31 | return json.get('data'), json.get('meta') | |
32 | ||
33 | ||
34 | class RCTIPlusIE(RCTIPlusBaseIE): | |
3dbb2a9d | 35 | _VALID_URL = r'https://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' |
17f0eb66 M |
36 | _TESTS = [{ |
37 | 'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola', | |
38 | 'md5': '56ed45affad45fa18d5592a1bc199997', | |
39 | 'info_dict': { | |
40 | 'id': 'v_e22124', | |
41 | 'title': 'Untuk Lola', | |
42 | 'display_id': 'untuk-lola', | |
43 | 'description': 'md5:2b809075c0b1e071e228ad6d13e41deb', | |
44 | 'ext': 'mp4', | |
45 | 'duration': 1400, | |
46 | 'timestamp': 1615978800, | |
47 | 'upload_date': '20210317', | |
48 | 'series': 'Kiko : Untuk Lola', | |
49 | 'season_number': 1, | |
50 | 'episode_number': 1, | |
51 | 'channel': 'RCTI', | |
52 | }, | |
53 | 'params': { | |
54 | 'fixup': 'never', | |
55 | }, | |
56 | }, { # Clip; Series title doesn't appear on metadata JSON | |
57 | 'url': 'https://www.rctiplus.com/programs/316/cahaya-terindah/clip/3921/make-a-wish', | |
58 | 'md5': 'd179b2ff356f0e91a53bcc6a4d8504f0', | |
59 | 'info_dict': { | |
60 | 'id': 'v_c3921', | |
61 | 'title': 'Make A Wish', | |
62 | 'display_id': 'make-a-wish', | |
63 | 'description': 'Make A Wish', | |
64 | 'ext': 'mp4', | |
65 | 'duration': 288, | |
66 | 'timestamp': 1571652600, | |
67 | 'upload_date': '20191021', | |
68 | 'series': 'Cahaya Terindah', | |
69 | 'channel': 'RCTI', | |
70 | }, | |
71 | 'params': { | |
72 | 'fixup': 'never', | |
73 | }, | |
74 | }, { # Extra | |
75 | 'url': 'https://www.rctiplus.com/programs/616/inews-malam/extra/9438/diungkapkan-melalui-surat-terbuka-ceo-ruangguru-belva-devara-mundur-dari-staf-khusus-presiden', | |
76 | 'md5': 'c48106afdbce609749f5e0c007d9278a', | |
77 | 'info_dict': { | |
78 | 'id': 'v_ex9438', | |
79 | 'title': 'md5:2ede828c0f8bde249e0912be150314ca', | |
80 | 'display_id': 'md5:62b8d4e9ff096db527a1ad797e8a9933', | |
81 | 'description': 'md5:2ede828c0f8bde249e0912be150314ca', | |
82 | 'ext': 'mp4', | |
83 | 'duration': 93, | |
84 | 'timestamp': 1587561540, | |
85 | 'upload_date': '20200422', | |
86 | 'series': 'iNews Malam', | |
87 | 'channel': 'INews', | |
88 | }, | |
89 | 'params': { | |
90 | 'format': 'bestvideo', | |
91 | }, | |
3dbb2a9d M |
92 | }, { # Missed event/replay |
93 | 'url': 'https://www.rctiplus.com/missed-event/2507/mou-signing-ceremony-27-juli-2021-1400-wib', | |
94 | 'md5': '649c5f27250faed1452ca8b91e06922d', | |
95 | 'info_dict': { | |
96 | 'id': 'v_pe2507', | |
97 | 'title': 'MOU Signing Ceremony | 27 Juli 2021 | 14.00 WIB', | |
98 | 'display_id': 'mou-signing-ceremony-27-juli-2021-1400-wib', | |
99 | 'ext': 'mp4', | |
100 | 'timestamp': 1627142400, | |
101 | 'upload_date': '20210724', | |
102 | 'was_live': True, | |
103 | 'release_timestamp': 1627369200, | |
104 | }, | |
105 | 'params': { | |
106 | 'fixup': 'never', | |
107 | }, | |
108 | }, { # Live event; Cloudfront CDN | |
109 | 'url': 'https://www.rctiplus.com/live-event/2530/dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', | |
110 | 'info_dict': { | |
111 | 'id': 'v_le2530', | |
112 | 'title': 'Dai Muda : Charging Imun dengan Iman | 4 Agustus 2021 | 16.00 WIB', | |
113 | 'display_id': 'dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', | |
114 | 'ext': 'mp4', | |
115 | 'timestamp': 1627898400, | |
116 | 'upload_date': '20210802', | |
117 | 'release_timestamp': 1628067600, | |
118 | }, | |
119 | 'params': { | |
120 | 'skip_download': True, | |
121 | }, | |
122 | 'skip': 'This live event has ended.', | |
123 | }, { # TV; live_at is null | |
124 | 'url': 'https://www.rctiplus.com/live-event/1/rcti', | |
125 | 'info_dict': { | |
126 | 'id': 'v_lt1', | |
127 | 'title': 'RCTI', | |
128 | 'display_id': 'rcti', | |
129 | 'ext': 'mp4', | |
130 | 'timestamp': 1546344000, | |
131 | 'upload_date': '20190101', | |
132 | 'is_live': True, | |
133 | }, | |
134 | 'params': { | |
135 | 'skip_download': True, | |
136 | 'format': 'bestvideo', | |
137 | }, | |
17f0eb66 | 138 | }] |
d9e6e948 M |
139 | _CONVIVA_JSON_TEMPLATE = { |
140 | 't': 'CwsSessionHb', | |
141 | 'cid': 'ff84ae928c3b33064b76dec08f12500465e59a6f', | |
142 | 'clid': '0', | |
143 | 'sid': 0, | |
144 | 'seq': 0, | |
145 | 'caps': 0, | |
146 | 'sf': 7, | |
147 | 'sdk': True, | |
148 | } | |
17f0eb66 | 149 | |
17f0eb66 | 150 | def _real_extract(self, url): |
3dbb2a9d M |
151 | match = re.match(self._VALID_URL, url).groupdict() |
152 | video_type, video_id, display_id = match['type'], match['id'], match['display_id'] | |
17f0eb66 | 153 | |
3dbb2a9d M |
154 | url_api_version = 'v2' if video_type == 'missed-event' else 'v1' |
155 | appier_id = '23984824_' + str(random.randint(0, 10000000000)) # Based on the webpage's uuidRandom generator | |
17f0eb66 | 156 | video_json = self._call_api( |
3dbb2a9d | 157 | f'https://api.rctiplus.com/api/{url_api_version}/{video_type}/{video_id}/url?appierid={appier_id}', display_id, 'Downloading video URL JSON')[0] |
17f0eb66 | 158 | video_url = video_json['url'] |
3dbb2a9d M |
159 | |
160 | is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['live_at']) | |
161 | if is_upcoming is None: | |
162 | is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date']) | |
163 | if is_upcoming: | |
164 | self.raise_no_formats( | |
165 | 'This event will start at %s.' % video_json['live_label'] if video_json.get('live_label') else 'This event has not started yet.', expected=True) | |
17f0eb66 | 166 | if 'akamaized' in video_url: |
d9e6e948 M |
167 | # For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API |
168 | conviva_json_data = { | |
169 | **self._CONVIVA_JSON_TEMPLATE, | |
170 | 'url': video_url, | |
171 | 'sst': int(time.time()) | |
172 | } | |
173 | conviva_json_res = self._download_json( | |
174 | 'https://ff84ae928c3b33064b76dec08f12500465e59a6f.cws.conviva.com/0/wsg', display_id, | |
175 | 'Creating Conviva session', 'Failed to create Conviva session', | |
176 | fatal=False, data=json.dumps(conviva_json_data).encode('utf-8')) | |
177 | if conviva_json_res and conviva_json_res.get('err') != 'ok': | |
178 | self.report_warning('Conviva said: %s' % str(conviva_json_res.get('err'))) | |
17f0eb66 M |
179 | |
180 | video_meta, meta_paths = self._call_api( | |
181 | 'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata') | |
182 | ||
183 | thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/') | |
184 | if video_meta.get('portrait_image'): | |
185 | thumbnails.append({ | |
186 | 'id': 'portrait_image', | |
187 | 'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image']) # 2000px seems to be the highest resolution that can be given | |
188 | }) | |
189 | if video_meta.get('landscape_image'): | |
190 | thumbnails.append({ | |
191 | 'id': 'landscape_image', | |
192 | 'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image']) | |
193 | }) | |
3dbb2a9d M |
194 | try: |
195 | formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'}) | |
196 | except ExtractorError as e: | |
197 | if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: | |
198 | self.raise_geo_restricted(countries=['ID'], metadata_available=True) | |
199 | else: | |
200 | raise e | |
17f0eb66 | 201 | for f in formats: |
3dbb2a9d M |
202 | if 'akamaized' in f['url'] or 'cloudfront' in f['url']: |
203 | f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs | |
17f0eb66 M |
204 | |
205 | self._sort_formats(formats) | |
206 | ||
207 | return { | |
208 | 'id': video_meta.get('product_id') or video_json.get('product_id'), | |
3dbb2a9d | 209 | 'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')), |
17f0eb66 M |
210 | 'display_id': display_id, |
211 | 'description': video_meta.get('summary'), | |
3dbb2a9d | 212 | 'timestamp': video_meta.get('release_date') or video_json.get('start_date'), |
17f0eb66 | 213 | 'duration': video_meta.get('duration'), |
3dbb2a9d | 214 | 'categories': [video_meta['genre']] if video_meta.get('genre') else None, |
17f0eb66 M |
215 | 'average_rating': video_meta.get('star_rating'), |
216 | 'series': video_meta.get('program_title') or video_json.get('program_title'), | |
217 | 'season_number': video_meta.get('season'), | |
218 | 'episode_number': video_meta.get('episode'), | |
219 | 'channel': video_json.get('tv_name'), | |
220 | 'channel_id': video_json.get('tv_id'), | |
221 | 'formats': formats, | |
3dbb2a9d M |
222 | 'thumbnails': thumbnails, |
223 | 'is_live': video_type == 'live-event' and not is_upcoming, | |
224 | 'was_live': video_type == 'missed-event', | |
225 | 'live_status': 'is_upcoming' if is_upcoming else None, | |
226 | 'release_timestamp': video_json.get('live_at'), | |
17f0eb66 M |
227 | } |
228 | ||
229 | ||
230 | class RCTIPlusSeriesIE(RCTIPlusBaseIE): | |
3dbb2a9d | 231 | _VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' |
17f0eb66 M |
232 | _TESTS = [{ |
233 | 'url': 'https://www.rctiplus.com/programs/540/upin-ipin', | |
234 | 'playlist_mincount': 417, | |
235 | 'info_dict': { | |
236 | 'id': '540', | |
237 | 'title': 'Upin & Ipin', | |
238 | 'description': 'md5:22cc912381f389664416844e1ec4f86b', | |
239 | }, | |
240 | }, { | |
3dbb2a9d | 241 | 'url': 'https://www.rctiplus.com/programs/540/upin-ipin/episodes?utm_source=Rplusdweb&utm_medium=share_copy&utm_campaign=programsupin-ipin', |
17f0eb66 M |
242 | 'only_matching': True, |
243 | }] | |
244 | _AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings | |
245 | 'S-SU': 2, | |
246 | 'SU': 2, | |
247 | 'P': 2, | |
248 | 'A': 7, | |
249 | 'R': 13, | |
250 | 'R-R/1': 17, # Labelled as 17+ despite being R | |
251 | 'D': 18, | |
252 | } | |
253 | ||
3dbb2a9d M |
254 | @classmethod |
255 | def suitable(cls, url): | |
256 | return False if RCTIPlusIE.suitable(url) else super(RCTIPlusSeriesIE, cls).suitable(url) | |
257 | ||
17f0eb66 M |
258 | def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}): |
259 | total_pages = 0 | |
260 | try: | |
261 | total_pages = self._call_api( | |
262 | '%s&length=20&page=0' % url, | |
263 | display_id, note)[1]['pagination']['total_page'] | |
264 | except ExtractorError as e: | |
265 | if 'not found' in str(e): | |
266 | return [] | |
267 | raise e | |
268 | if total_pages <= 0: | |
269 | return [] | |
270 | ||
271 | for page_num in range(1, total_pages + 1): | |
272 | episode_list = self._call_api( | |
273 | '%s&length=20&page=%s' % (url, page_num), | |
274 | display_id, '%s page %s' % (note, page_num))[0] or [] | |
275 | ||
276 | for video_json in episode_list: | |
277 | link = video_json['share_link'] | |
278 | url_res = self.url_result(link, 'RCTIPlus', video_json.get('product_id'), video_json.get('title')) | |
279 | url_res.update(metadata) | |
280 | yield url_res | |
281 | ||
282 | def _real_extract(self, url): | |
283 | series_id, display_id = re.match(self._VALID_URL, url).groups() | |
284 | ||
285 | series_meta, meta_paths = self._call_api( | |
286 | 'https://api.rctiplus.com/api/v1/program/%s/detail' % series_id, display_id, 'Downloading series metadata') | |
287 | metadata = { | |
288 | 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]) | |
289 | } | |
290 | ||
291 | cast = [] | |
292 | for star in series_meta.get('starring', []): | |
293 | cast.append(strip_or_none(star.get('name'))) | |
294 | for star in series_meta.get('creator', []): | |
295 | cast.append(strip_or_none(star.get('name'))) | |
296 | for star in series_meta.get('writer', []): | |
297 | cast.append(strip_or_none(star.get('name'))) | |
298 | metadata['cast'] = cast | |
299 | ||
300 | tags = [] | |
301 | for tag in series_meta.get('tag', []): | |
302 | tags.append(strip_or_none(tag.get('name'))) | |
303 | metadata['tag'] = tags | |
304 | ||
305 | entries = [] | |
306 | seasons_list = self._call_api( | |
307 | 'https://api.rctiplus.com/api/v1/program/%s/season' % series_id, display_id, 'Downloading seasons list JSON')[0] | |
308 | for season in seasons_list: | |
309 | entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/episode?season=%s' % (series_id, season['season']), | |
310 | display_id, 'Downloading season %s episode entries' % season['season'], metadata)) | |
311 | ||
312 | entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/clip?content_id=0' % series_id, | |
313 | display_id, 'Downloading clip entries', metadata)) | |
314 | entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/extra?content_id=0' % series_id, | |
315 | display_id, 'Downloading extra entries', metadata)) | |
316 | ||
317 | return self.playlist_result(itertools.chain(*entries), series_id, series_meta.get('title'), series_meta.get('summary'), **metadata) | |
3dbb2a9d M |
318 | |
319 | ||
320 | class RCTIPlusTVIE(RCTIPlusBaseIE): | |
321 | _VALID_URL = r'https://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))' | |
322 | _TESTS = [{ | |
323 | 'url': 'https://www.rctiplus.com/tv/rcti', | |
324 | 'info_dict': { | |
325 | 'id': 'v_lt1', | |
326 | 'title': 'RCTI', | |
327 | 'ext': 'mp4', | |
328 | 'timestamp': 1546344000, | |
329 | 'upload_date': '20190101', | |
330 | }, | |
331 | 'params': { | |
332 | 'skip_download': True, | |
333 | 'format': 'bestvideo', | |
334 | } | |
335 | }, { | |
336 | # Returned video will always change | |
337 | 'url': 'https://www.rctiplus.com/live-event', | |
338 | 'only_matching': True, | |
339 | }, { | |
340 | # Returned video will also always change | |
341 | 'url': 'https://www.rctiplus.com/missed-event', | |
342 | 'only_matching': True, | |
343 | }] | |
344 | ||
345 | @classmethod | |
346 | def suitable(cls, url): | |
347 | return False if RCTIPlusIE.suitable(url) else super(RCTIPlusTVIE, cls).suitable(url) | |
348 | ||
349 | def _real_extract(self, url): | |
350 | match = re.match(self._VALID_URL, url).groupdict() | |
351 | tv_id = match.get('tvname') or match.get('eventname') | |
352 | webpage = self._download_webpage(url, tv_id) | |
353 | video_type, video_id = self._search_regex( | |
354 | r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url', webpage, 'video link', group=('type', 'id')) | |
355 | return self.url_result(f'https://www.rctiplus.com/{video_type}/{video_id}/{tv_id}', 'RCTIPlus') |