]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals, division | |
3 | ||
4 | import hashlib | |
5 | import hmac | |
6 | import re | |
7 | import time | |
8 | ||
9 | from .common import InfoExtractor | |
10 | from ..compat import compat_HTTPError | |
11 | from ..utils import ( | |
12 | determine_ext, | |
13 | float_or_none, | |
14 | int_or_none, | |
15 | orderedSet, | |
16 | parse_age_limit, | |
17 | parse_duration, | |
18 | url_or_none, | |
19 | ExtractorError | |
20 | ) | |
21 | ||
22 | ||
23 | class CrackleIE(InfoExtractor): | |
24 | _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' | |
25 | _TESTS = [{ | |
26 | # geo restricted to CA | |
27 | 'url': 'https://www.crackle.com/andromeda/2502343', | |
28 | 'info_dict': { | |
29 | 'id': '2502343', | |
30 | 'ext': 'mp4', | |
31 | 'title': 'Under The Night', | |
32 | 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a', | |
33 | 'duration': 2583, | |
34 | 'view_count': int, | |
35 | 'average_rating': 0, | |
36 | 'age_limit': 14, | |
37 | 'genre': 'Action, Sci-Fi', | |
38 | 'creator': 'Allan Kroeker', | |
39 | 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe', | |
40 | 'release_year': 2000, | |
41 | 'series': 'Andromeda', | |
42 | 'episode': 'Under The Night', | |
43 | 'season_number': 1, | |
44 | 'episode_number': 1, | |
45 | }, | |
46 | 'params': { | |
47 | # m3u8 download | |
48 | 'skip_download': True, | |
49 | } | |
50 | }, { | |
51 | 'url': 'https://www.sonycrackle.com/andromeda/2502343', | |
52 | 'only_matching': True, | |
53 | }] | |
54 | ||
55 | _MEDIA_FILE_SLOTS = { | |
56 | '360p.mp4': { | |
57 | 'width': 640, | |
58 | 'height': 360, | |
59 | }, | |
60 | '480p.mp4': { | |
61 | 'width': 768, | |
62 | 'height': 432, | |
63 | }, | |
64 | '480p_1mbps.mp4': { | |
65 | 'width': 852, | |
66 | 'height': 480, | |
67 | }, | |
68 | } | |
69 | ||
70 | def _download_json(self, url, *args, **kwargs): | |
71 | # Authorization generation algorithm is reverse engineered from: | |
72 | # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js | |
73 | timestamp = time.strftime('%Y%m%d%H%M', time.gmtime()) | |
74 | h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper() | |
75 | headers = { | |
76 | 'Accept': 'application/json', | |
77 | 'Authorization': '|'.join([h, timestamp, '117', '1']), | |
78 | } | |
79 | return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs) | |
80 | ||
81 | def _real_extract(self, url): | |
82 | video_id = self._match_id(url) | |
83 | ||
84 | geo_bypass_country = self.get_param('geo_bypass_country', None) | |
85 | countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', '')) | |
86 | num_countries, num = len(countries) - 1, 0 | |
87 | ||
88 | media = {} | |
89 | for num, country in enumerate(countries): | |
90 | if num == 1: # start hard-coded list | |
91 | self.report_warning('%s. Trying with a list of known countries' % ( | |
92 | 'Unable to obtain video formats from %s API' % geo_bypass_country if geo_bypass_country | |
93 | else 'No country code was given using --geo-bypass-country')) | |
94 | elif num == num_countries: # end of list | |
95 | geo_info = self._download_json( | |
96 | 'https://web-api-us.crackle.com/Service.svc/geo/country', | |
97 | video_id, fatal=False, note='Downloading geo-location information from crackle API', | |
98 | errnote='Unable to fetch geo-location information from crackle') or {} | |
99 | country = geo_info.get('CountryCode') | |
100 | if country is None: | |
101 | continue | |
102 | self.to_screen('%s identified country as %s' % (self.IE_NAME, country)) | |
103 | if country in countries: | |
104 | self.to_screen('Downloading from %s API was already attempted. Skipping...' % country) | |
105 | continue | |
106 | ||
107 | if country is None: | |
108 | continue | |
109 | try: | |
110 | media = self._download_json( | |
111 | 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country), | |
112 | video_id, note='Downloading media JSON from %s API' % country, | |
113 | errnote='Unable to download media JSON') | |
114 | except ExtractorError as e: | |
115 | # 401 means geo restriction, trying next country | |
116 | if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | |
117 | continue | |
118 | raise | |
119 | ||
120 | status = media.get('status') | |
121 | if status.get('messageCode') != '0': | |
122 | raise ExtractorError( | |
123 | '%s said: %s %s - %s' % ( | |
124 | self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')), | |
125 | expected=True) | |
126 | ||
127 | # Found video formats | |
128 | if isinstance(media.get('MediaURLs'), list): | |
129 | break | |
130 | ||
131 | ignore_no_formats = self.get_param('ignore_no_formats_error') | |
132 | allow_unplayable_formats = self.get_param('allow_unplayable_formats') | |
133 | ||
134 | if not media or (not media.get('MediaURLs') and not ignore_no_formats): | |
135 | raise ExtractorError( | |
136 | 'Unable to access the crackle API. Try passing your country code ' | |
137 | 'to --geo-bypass-country. If it still does not work and the ' | |
138 | 'video is available in your country') | |
139 | title = media['Title'] | |
140 | ||
141 | formats, subtitles = [], {} | |
142 | has_drm = False | |
143 | for e in media.get('MediaURLs') or []: | |
144 | if e.get('UseDRM'): | |
145 | has_drm = True | |
146 | if not allow_unplayable_formats: | |
147 | continue | |
148 | format_url = url_or_none(e.get('Path')) | |
149 | if not format_url: | |
150 | continue | |
151 | ext = determine_ext(format_url) | |
152 | if ext == 'm3u8': | |
153 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
154 | format_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
155 | m3u8_id='hls', fatal=False) | |
156 | formats.extend(fmts) | |
157 | subtitles = self._merge_subtitles(subtitles, subs) | |
158 | elif ext == 'mpd': | |
159 | fmts, subs = self._extract_mpd_formats_and_subtitles( | |
160 | format_url, video_id, mpd_id='dash', fatal=False) | |
161 | formats.extend(fmts) | |
162 | subtitles = self._merge_subtitles(subtitles, subs) | |
163 | elif format_url.endswith('.ism/Manifest'): | |
164 | fmts, subs = self._extract_ism_formats_and_subtitles( | |
165 | format_url, video_id, ism_id='mss', fatal=False) | |
166 | formats.extend(fmts) | |
167 | subtitles = self._merge_subtitles(subtitles, subs) | |
168 | else: | |
169 | mfs_path = e.get('Type') | |
170 | mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path) | |
171 | if not mfs_info: | |
172 | continue | |
173 | formats.append({ | |
174 | 'url': format_url, | |
175 | 'format_id': 'http-' + mfs_path.split('.')[0], | |
176 | 'width': mfs_info['width'], | |
177 | 'height': mfs_info['height'], | |
178 | }) | |
179 | if not formats and has_drm and not ignore_no_formats: | |
180 | raise ExtractorError('The video is DRM protected', expected=True) | |
181 | self._sort_formats(formats) | |
182 | ||
183 | description = media.get('Description') | |
184 | duration = int_or_none(media.get( | |
185 | 'DurationInSeconds')) or parse_duration(media.get('Duration')) | |
186 | view_count = int_or_none(media.get('CountViews')) | |
187 | average_rating = float_or_none(media.get('UserRating')) | |
188 | age_limit = parse_age_limit(media.get('Rating')) | |
189 | genre = media.get('Genre') | |
190 | release_year = int_or_none(media.get('ReleaseYear')) | |
191 | creator = media.get('Directors') | |
192 | artist = media.get('Cast') | |
193 | ||
194 | if media.get('MediaTypeDisplayValue') == 'Full Episode': | |
195 | series = media.get('ShowName') | |
196 | episode = title | |
197 | season_number = int_or_none(media.get('Season')) | |
198 | episode_number = int_or_none(media.get('Episode')) | |
199 | else: | |
200 | series = episode = season_number = episode_number = None | |
201 | ||
202 | cc_files = media.get('ClosedCaptionFiles') | |
203 | if isinstance(cc_files, list): | |
204 | for cc_file in cc_files: | |
205 | if not isinstance(cc_file, dict): | |
206 | continue | |
207 | cc_url = url_or_none(cc_file.get('Path')) | |
208 | if not cc_url: | |
209 | continue | |
210 | lang = cc_file.get('Locale') or 'en' | |
211 | subtitles.setdefault(lang, []).append({'url': cc_url}) | |
212 | ||
213 | thumbnails = [] | |
214 | images = media.get('Images') | |
215 | if isinstance(images, list): | |
216 | for image_key, image_url in images.items(): | |
217 | mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) | |
218 | if not mobj: | |
219 | continue | |
220 | thumbnails.append({ | |
221 | 'url': image_url, | |
222 | 'width': int(mobj.group(1)), | |
223 | 'height': int(mobj.group(2)), | |
224 | }) | |
225 | ||
226 | return { | |
227 | 'id': video_id, | |
228 | 'title': title, | |
229 | 'description': description, | |
230 | 'duration': duration, | |
231 | 'view_count': view_count, | |
232 | 'average_rating': average_rating, | |
233 | 'age_limit': age_limit, | |
234 | 'genre': genre, | |
235 | 'creator': creator, | |
236 | 'artist': artist, | |
237 | 'release_year': release_year, | |
238 | 'series': series, | |
239 | 'episode': episode, | |
240 | 'season_number': season_number, | |
241 | 'episode_number': episode_number, | |
242 | 'thumbnails': thumbnails, | |
243 | 'subtitles': subtitles, | |
244 | 'formats': formats, | |
245 | } |