]>
Commit | Line | Data |
---|---|---|
77c8ebe6 RA |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import base64 | |
5 | import json | |
6 | import hashlib | |
7 | import hmac | |
8 | import random | |
9 | import string | |
10 | import time | |
11 | ||
12 | from .common import InfoExtractor | |
13 | from ..compat import ( | |
4b85f0f9 | 14 | compat_HTTPError, |
77c8ebe6 | 15 | compat_urllib_parse_urlencode, |
16132cff | 16 | compat_urllib_parse, |
77c8ebe6 RA |
17 | ) |
18 | from ..utils import ( | |
4b85f0f9 | 19 | ExtractorError, |
77c8ebe6 RA |
20 | float_or_none, |
21 | int_or_none, | |
7bb64347 | 22 | traverse_obj, |
77c8ebe6 RA |
23 | ) |
24 | ||
25 | ||
48ab554f | 26 | class VRVBaseIE(InfoExtractor): |
77c8ebe6 RA |
27 | _API_DOMAIN = None |
28 | _API_PARAMS = {} | |
29 | _CMS_SIGNING = {} | |
4b85f0f9 RA |
30 | _TOKEN = None |
31 | _TOKEN_SECRET = '' | |
77c8ebe6 RA |
32 | |
33 | def _call_api(self, path, video_id, note, data=None): | |
4b85f0f9 | 34 | # https://tools.ietf.org/html/rfc5849#section-3 |
77c8ebe6 | 35 | base_url = self._API_DOMAIN + '/core/' + path |
503b604a RA |
36 | query = [ |
37 | ('oauth_consumer_key', self._API_PARAMS['oAuthKey']), | |
38 | ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])), | |
39 | ('oauth_signature_method', 'HMAC-SHA1'), | |
40 | ('oauth_timestamp', int(time.time())), | |
41 | ] | |
4b85f0f9 | 42 | if self._TOKEN: |
503b604a | 43 | query.append(('oauth_token', self._TOKEN)) |
4b85f0f9 | 44 | encoded_query = compat_urllib_parse_urlencode(query) |
77c8ebe6 RA |
45 | headers = self.geo_verification_headers() |
46 | if data: | |
47 | data = json.dumps(data).encode() | |
48 | headers['Content-Type'] = 'application/json' | |
4b85f0f9 RA |
49 | base_string = '&'.join([ |
50 | 'POST' if data else 'GET', | |
51 | compat_urllib_parse.quote(base_url, ''), | |
52 | compat_urllib_parse.quote(encoded_query, '')]) | |
77c8ebe6 | 53 | oauth_signature = base64.b64encode(hmac.new( |
4b85f0f9 | 54 | (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'), |
77c8ebe6 | 55 | base_string.encode(), hashlib.sha1).digest()).decode() |
16132cff | 56 | encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '') |
4b85f0f9 RA |
57 | try: |
58 | return self._download_json( | |
59 | '?'.join([base_url, encoded_query]), video_id, | |
60 | note='Downloading %s JSON metadata' % note, headers=headers, data=data) | |
61 | except ExtractorError as e: | |
62 | if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | |
63 | raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True) | |
64 | raise | |
77c8ebe6 RA |
65 | |
66 | def _call_cms(self, path, video_id, note): | |
48ab554f | 67 | if not self._CMS_SIGNING: |
1824bfdc RA |
68 | index = self._call_api('index', video_id, 'CMS Signing') |
69 | self._CMS_SIGNING = index.get('cms_signing') or {} | |
70 | if not self._CMS_SIGNING: | |
71 | for signing_policy in index.get('signing_policies', []): | |
72 | signing_path = signing_policy.get('path') | |
73 | if signing_path and signing_path.startswith('/cms/'): | |
74 | name, value = signing_policy.get('name'), signing_policy.get('value') | |
75 | if name and value: | |
76 | self._CMS_SIGNING[name] = value | |
77c8ebe6 RA |
77 | return self._download_json( |
78 | self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, | |
79 | note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) | |
80 | ||
48ab554f RA |
81 | def _get_cms_resource(self, resource_key, video_id): |
82 | return self._call_api( | |
83 | 'cms_resource', video_id, 'resource path', data={ | |
84 | 'resource_key': resource_key, | |
85 | })['__links__']['cms_resource']['href'] | |
86 | ||
4b85f0f9 RA |
87 | def _real_initialize(self): |
88 | webpage = self._download_webpage( | |
89 | 'https://vrv.co/', None, headers=self.geo_verification_headers()) | |
90 | self._API_PARAMS = self._parse_json(self._search_regex( | |
91 | [ | |
92 | r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)', | |
93 | r'window\.__APP_CONFIG__\s*=\s*({.+})' | |
94 | ], webpage, 'app config'), None)['cxApiParams'] | |
95 | self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') | |
96 | ||
48ab554f RA |
97 | |
98 | class VRVIE(VRVBaseIE): | |
99 | IE_NAME = 'vrv' | |
100 | _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)' | |
54a5be4d | 101 | _TESTS = [{ |
48ab554f RA |
102 | 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT', |
103 | 'info_dict': { | |
104 | 'id': 'GR9PNZ396', | |
105 | 'ext': 'mp4', | |
106 | 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT', | |
107 | 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f', | |
108 | 'uploader_id': 'seeso', | |
109 | }, | |
110 | 'params': { | |
111 | # m3u8 download | |
112 | 'skip_download': True, | |
113 | }, | |
1fa88937 RA |
114 | }, { |
115 | # movie listing | |
116 | 'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT', | |
117 | 'info_dict': { | |
118 | 'id': 'G6NQXZ1J6', | |
119 | 'title': 'Lily C.A.T', | |
120 | 'description': 'md5:988b031e7809a6aeb60968be4af7db07', | |
121 | }, | |
122 | 'playlist_count': 2, | |
54a5be4d | 123 | }] |
4b85f0f9 RA |
124 | _NETRC_MACHINE = 'vrv' |
125 | ||
126 | def _real_initialize(self): | |
127 | super(VRVIE, self)._real_initialize() | |
128 | ||
129 | email, password = self._get_login_info() | |
130 | if email is None: | |
131 | return | |
132 | ||
133 | token_credentials = self._call_api( | |
134 | 'authenticate/by:credentials', None, 'Token Credentials', data={ | |
135 | 'email': email, | |
136 | 'password': password, | |
137 | }) | |
138 | self._TOKEN = token_credentials['oauth_token'] | |
139 | self._TOKEN_SECRET = token_credentials['oauth_token_secret'] | |
54a5be4d RA |
140 | |
141 | def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): | |
0e2dd3fc | 142 | if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'): |
54a5be4d | 143 | return [] |
d9b0d118 S |
144 | stream_id_list = [] |
145 | if audio_lang: | |
146 | stream_id_list.append('audio-%s' % audio_lang) | |
147 | if hardsub_lang: | |
148 | stream_id_list.append('hardsub-%s' % hardsub_lang) | |
1fa88937 RA |
149 | format_id = stream_format |
150 | if stream_id_list: | |
151 | format_id += '-' + '-'.join(stream_id_list) | |
0e2dd3fc | 152 | if 'hls' in stream_format: |
54a5be4d RA |
153 | adaptive_formats = self._extract_m3u8_formats( |
154 | url, video_id, 'mp4', m3u8_id=format_id, | |
1fa88937 | 155 | note='Downloading %s information' % format_id, |
54a5be4d RA |
156 | fatal=False) |
157 | elif stream_format == 'dash': | |
158 | adaptive_formats = self._extract_mpd_formats( | |
159 | url, video_id, mpd_id=format_id, | |
1fa88937 | 160 | note='Downloading %s information' % format_id, |
54a5be4d RA |
161 | fatal=False) |
162 | if audio_lang: | |
163 | for f in adaptive_formats: | |
164 | if f.get('acodec') != 'none': | |
165 | f['language'] = audio_lang | |
166 | return adaptive_formats | |
77c8ebe6 RA |
167 | |
168 | def _real_extract(self, url): | |
169 | video_id = self._match_id(url) | |
4b85f0f9 | 170 | |
9f182c23 RA |
171 | object_data = self._call_cms(self._get_cms_resource( |
172 | 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0] | |
173 | resource_path = object_data['__links__']['resource']['href'] | |
174 | video_data = self._call_cms(resource_path, video_id, 'video') | |
77c8ebe6 | 175 | title = video_data['title'] |
1fa88937 RA |
176 | description = video_data.get('description') |
177 | ||
178 | if video_data.get('__class__') == 'movie_listing': | |
179 | items = self._call_cms( | |
180 | video_data['__links__']['movie_listing/movies']['href'], | |
181 | video_id, 'movie listing').get('items') or [] | |
182 | if len(items) != 1: | |
183 | entries = [] | |
184 | for item in items: | |
185 | item_id = item.get('id') | |
186 | if not item_id: | |
187 | continue | |
188 | entries.append(self.url_result( | |
189 | 'https://vrv.co/watch/' + item_id, | |
190 | self.ie_key(), item_id, item.get('title'))) | |
191 | return self.playlist_result(entries, video_id, title, description) | |
192 | video_data = items[0] | |
77c8ebe6 | 193 | |
4b85f0f9 RA |
194 | streams_path = video_data['__links__'].get('streams', {}).get('href') |
195 | if not streams_path: | |
196 | self.raise_login_required() | |
197 | streams_json = self._call_cms(streams_path, video_id, 'streams') | |
77c8ebe6 RA |
198 | |
199 | audio_locale = streams_json.get('audio_locale') | |
200 | formats = [] | |
afa0200b RA |
201 | for stream_type, streams in streams_json.get('streams', {}).items(): |
202 | if stream_type in ('adaptive_hls', 'adaptive_dash'): | |
203 | for stream in streams.values(): | |
54a5be4d RA |
204 | formats.extend(self._extract_vrv_formats( |
205 | stream.get('url'), video_id, stream_type.split('_')[1], | |
206 | audio_locale, stream.get('hardsub_locale'))) | |
77c8ebe6 RA |
207 | self._sort_formats(formats) |
208 | ||
afa0200b | 209 | subtitles = {} |
58317428 RA |
210 | for k in ('captions', 'subtitles'): |
211 | for subtitle in streams_json.get(k, {}).values(): | |
212 | subtitle_url = subtitle.get('url') | |
213 | if not subtitle_url: | |
214 | continue | |
215 | subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({ | |
216 | 'url': subtitle_url, | |
217 | 'ext': subtitle.get('format', 'ass'), | |
218 | }) | |
afa0200b | 219 | |
77c8ebe6 | 220 | thumbnails = [] |
f7590d47 | 221 | for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)) or []: |
77c8ebe6 RA |
222 | thumbnail_url = thumbnail.get('source') |
223 | if not thumbnail_url: | |
224 | continue | |
225 | thumbnails.append({ | |
226 | 'url': thumbnail_url, | |
227 | 'width': int_or_none(thumbnail.get('width')), | |
228 | 'height': int_or_none(thumbnail.get('height')), | |
229 | }) | |
230 | ||
231 | return { | |
232 | 'id': video_id, | |
233 | 'title': title, | |
234 | 'formats': formats, | |
afa0200b | 235 | 'subtitles': subtitles, |
77c8ebe6 | 236 | 'thumbnails': thumbnails, |
1fa88937 | 237 | 'description': description, |
77c8ebe6 RA |
238 | 'duration': float_or_none(video_data.get('duration_ms'), 1000), |
239 | 'uploader_id': video_data.get('channel_id'), | |
240 | 'series': video_data.get('series_title'), | |
241 | 'season': video_data.get('season_title'), | |
242 | 'season_number': int_or_none(video_data.get('season_number')), | |
243 | 'season_id': video_data.get('season_id'), | |
244 | 'episode': title, | |
245 | 'episode_number': int_or_none(video_data.get('episode_number')), | |
246 | 'episode_id': video_data.get('production_episode_id'), | |
247 | } | |
48ab554f RA |
248 | |
249 | ||
250 | class VRVSeriesIE(VRVBaseIE): | |
251 | IE_NAME = 'vrv:series' | |
252 | _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)' | |
253 | _TEST = { | |
254 | 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider', | |
255 | 'info_dict': { | |
256 | 'id': 'G68VXG3G6', | |
257 | }, | |
258 | 'playlist_mincount': 11, | |
259 | } | |
260 | ||
261 | def _real_extract(self, url): | |
262 | series_id = self._match_id(url) | |
48ab554f | 263 | |
48ab554f RA |
264 | seasons_path = self._get_cms_resource( |
265 | 'cms:/seasons?series_id=' + series_id, series_id) | |
266 | seasons_data = self._call_cms(seasons_path, series_id, 'seasons') | |
267 | ||
268 | entries = [] | |
269 | for season in seasons_data.get('items', []): | |
270 | episodes_path = season['__links__']['season/episodes']['href'] | |
271 | episodes = self._call_cms(episodes_path, series_id, 'episodes') | |
272 | for episode in episodes.get('items', []): | |
273 | episode_id = episode['id'] | |
274 | entries.append(self.url_result( | |
275 | 'https://vrv.co/watch/' + episode_id, | |
276 | 'VRV', episode_id, episode.get('title'))) | |
277 | ||
278 | return self.playlist_result(entries, series_id) |