]>
Commit | Line | Data |
---|---|---|
77c8ebe6 RA |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import base64 | |
5 | import json | |
6 | import hashlib | |
7 | import hmac | |
8 | import random | |
9 | import string | |
10 | import time | |
11 | ||
12 | from .common import InfoExtractor | |
13 | from ..compat import ( | |
14 | compat_urllib_parse_urlencode, | |
16132cff | 15 | compat_urllib_parse, |
77c8ebe6 RA |
16 | ) |
17 | from ..utils import ( | |
18 | float_or_none, | |
19 | int_or_none, | |
20 | ) | |
21 | ||
22 | ||
48ab554f | 23 | class VRVBaseIE(InfoExtractor): |
77c8ebe6 RA |
24 | _API_DOMAIN = None |
25 | _API_PARAMS = {} | |
26 | _CMS_SIGNING = {} | |
27 | ||
28 | def _call_api(self, path, video_id, note, data=None): | |
29 | base_url = self._API_DOMAIN + '/core/' + path | |
30 | encoded_query = compat_urllib_parse_urlencode({ | |
31 | 'oauth_consumer_key': self._API_PARAMS['oAuthKey'], | |
32 | 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), | |
33 | 'oauth_signature_method': 'HMAC-SHA1', | |
34 | 'oauth_timestamp': int(time.time()), | |
35 | 'oauth_version': '1.0', | |
36 | }) | |
37 | headers = self.geo_verification_headers() | |
38 | if data: | |
39 | data = json.dumps(data).encode() | |
40 | headers['Content-Type'] = 'application/json' | |
41 | method = 'POST' if data else 'GET' | |
16132cff | 42 | base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')]) |
77c8ebe6 RA |
43 | oauth_signature = base64.b64encode(hmac.new( |
44 | (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'), | |
45 | base_string.encode(), hashlib.sha1).digest()).decode() | |
16132cff | 46 | encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '') |
77c8ebe6 RA |
47 | return self._download_json( |
48 | '?'.join([base_url, encoded_query]), video_id, | |
49 | note='Downloading %s JSON metadata' % note, headers=headers, data=data) | |
50 | ||
51 | def _call_cms(self, path, video_id, note): | |
48ab554f RA |
52 | if not self._CMS_SIGNING: |
53 | self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing'] | |
77c8ebe6 RA |
54 | return self._download_json( |
55 | self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, | |
56 | note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) | |
57 | ||
58 | def _set_api_params(self, webpage, video_id): | |
59 | if not self._API_PARAMS: | |
60 | self._API_PARAMS = self._parse_json(self._search_regex( | |
61 | r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>', | |
62 | webpage, 'api config'), video_id)['cxApiParams'] | |
63 | self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') | |
64 | ||
48ab554f RA |
65 | def _get_cms_resource(self, resource_key, video_id): |
66 | return self._call_api( | |
67 | 'cms_resource', video_id, 'resource path', data={ | |
68 | 'resource_key': resource_key, | |
69 | })['__links__']['cms_resource']['href'] | |
70 | ||
71 | ||
72 | class VRVIE(VRVBaseIE): | |
73 | IE_NAME = 'vrv' | |
74 | _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)' | |
54a5be4d | 75 | _TESTS = [{ |
48ab554f RA |
76 | 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT', |
77 | 'info_dict': { | |
78 | 'id': 'GR9PNZ396', | |
79 | 'ext': 'mp4', | |
80 | 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT', | |
81 | 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f', | |
82 | 'uploader_id': 'seeso', | |
83 | }, | |
84 | 'params': { | |
85 | # m3u8 download | |
86 | 'skip_download': True, | |
87 | }, | |
54a5be4d RA |
88 | }] |
89 | ||
90 | def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang): | |
91 | if not url or stream_format not in ('hls', 'dash'): | |
92 | return [] | |
93 | stream_id = hardsub_lang or audio_lang | |
94 | format_id = '%s-%s' % (stream_format, stream_id) | |
95 | if stream_format == 'hls': | |
96 | adaptive_formats = self._extract_m3u8_formats( | |
97 | url, video_id, 'mp4', m3u8_id=format_id, | |
98 | note='Downloading %s m3u8 information' % stream_id, | |
99 | fatal=False) | |
100 | elif stream_format == 'dash': | |
101 | adaptive_formats = self._extract_mpd_formats( | |
102 | url, video_id, mpd_id=format_id, | |
103 | note='Downloading %s MPD information' % stream_id, | |
104 | fatal=False) | |
105 | if audio_lang: | |
106 | for f in adaptive_formats: | |
107 | if f.get('acodec') != 'none': | |
108 | f['language'] = audio_lang | |
109 | return adaptive_formats | |
77c8ebe6 RA |
110 | |
111 | def _real_extract(self, url): | |
112 | video_id = self._match_id(url) | |
113 | webpage = self._download_webpage( | |
114 | url, video_id, | |
115 | headers=self.geo_verification_headers()) | |
116 | media_resource = self._parse_json(self._search_regex( | |
117 | r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>', | |
118 | webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {} | |
119 | ||
120 | video_data = media_resource.get('json') | |
121 | if not video_data: | |
122 | self._set_api_params(webpage, video_id) | |
48ab554f RA |
123 | episode_path = self._get_cms_resource( |
124 | 'cms:/episodes/' + video_id, video_id) | |
77c8ebe6 RA |
125 | video_data = self._call_cms(episode_path, video_id, 'video') |
126 | title = video_data['title'] | |
127 | ||
128 | streams_json = media_resource.get('streams', {}).get('json', {}) | |
129 | if not streams_json: | |
130 | self._set_api_params(webpage, video_id) | |
131 | streams_path = video_data['__links__']['streams']['href'] | |
77c8ebe6 RA |
132 | streams_json = self._call_cms(streams_path, video_id, 'streams') |
133 | ||
134 | audio_locale = streams_json.get('audio_locale') | |
135 | formats = [] | |
afa0200b RA |
136 | for stream_type, streams in streams_json.get('streams', {}).items(): |
137 | if stream_type in ('adaptive_hls', 'adaptive_dash'): | |
138 | for stream in streams.values(): | |
54a5be4d RA |
139 | formats.extend(self._extract_vrv_formats( |
140 | stream.get('url'), video_id, stream_type.split('_')[1], | |
141 | audio_locale, stream.get('hardsub_locale'))) | |
77c8ebe6 RA |
142 | self._sort_formats(formats) |
143 | ||
afa0200b RA |
144 | subtitles = {} |
145 | for subtitle in streams_json.get('subtitles', {}).values(): | |
146 | subtitle_url = subtitle.get('url') | |
147 | if not subtitle_url: | |
148 | continue | |
149 | subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({ | |
150 | 'url': subtitle_url, | |
151 | 'ext': subtitle.get('format', 'ass'), | |
152 | }) | |
153 | ||
77c8ebe6 RA |
154 | thumbnails = [] |
155 | for thumbnail in video_data.get('images', {}).get('thumbnails', []): | |
156 | thumbnail_url = thumbnail.get('source') | |
157 | if not thumbnail_url: | |
158 | continue | |
159 | thumbnails.append({ | |
160 | 'url': thumbnail_url, | |
161 | 'width': int_or_none(thumbnail.get('width')), | |
162 | 'height': int_or_none(thumbnail.get('height')), | |
163 | }) | |
164 | ||
165 | return { | |
166 | 'id': video_id, | |
167 | 'title': title, | |
168 | 'formats': formats, | |
afa0200b | 169 | 'subtitles': subtitles, |
77c8ebe6 RA |
170 | 'thumbnails': thumbnails, |
171 | 'description': video_data.get('description'), | |
172 | 'duration': float_or_none(video_data.get('duration_ms'), 1000), | |
173 | 'uploader_id': video_data.get('channel_id'), | |
174 | 'series': video_data.get('series_title'), | |
175 | 'season': video_data.get('season_title'), | |
176 | 'season_number': int_or_none(video_data.get('season_number')), | |
177 | 'season_id': video_data.get('season_id'), | |
178 | 'episode': title, | |
179 | 'episode_number': int_or_none(video_data.get('episode_number')), | |
180 | 'episode_id': video_data.get('production_episode_id'), | |
181 | } | |
48ab554f RA |
182 | |
183 | ||
184 | class VRVSeriesIE(VRVBaseIE): | |
185 | IE_NAME = 'vrv:series' | |
186 | _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)' | |
187 | _TEST = { | |
188 | 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider', | |
189 | 'info_dict': { | |
190 | 'id': 'G68VXG3G6', | |
191 | }, | |
192 | 'playlist_mincount': 11, | |
193 | } | |
194 | ||
195 | def _real_extract(self, url): | |
196 | series_id = self._match_id(url) | |
197 | webpage = self._download_webpage( | |
198 | url, series_id, | |
199 | headers=self.geo_verification_headers()) | |
200 | ||
201 | self._set_api_params(webpage, series_id) | |
202 | seasons_path = self._get_cms_resource( | |
203 | 'cms:/seasons?series_id=' + series_id, series_id) | |
204 | seasons_data = self._call_cms(seasons_path, series_id, 'seasons') | |
205 | ||
206 | entries = [] | |
207 | for season in seasons_data.get('items', []): | |
208 | episodes_path = season['__links__']['season/episodes']['href'] | |
209 | episodes = self._call_cms(episodes_path, series_id, 'episodes') | |
210 | for episode in episodes.get('items', []): | |
211 | episode_id = episode['id'] | |
212 | entries.append(self.url_result( | |
213 | 'https://vrv.co/watch/' + episode_id, | |
214 | 'VRV', episode_id, episode.get('title'))) | |
215 | ||
216 | return self.playlist_result(entries, series_id) |