]>
Commit | Line | Data |
---|---|---|
645c4885 RA |
1 | import hashlib |
2 | import hmac | |
b9f5a412 | 3 | import re |
645c4885 | 4 | import time |
b9f5a412 | 5 | |
80f772c2 | 6 | from .common import InfoExtractor |
3052a30d | 7 | from ..compat import compat_HTTPError |
b9f5a412 S |
8 | from ..utils import ( |
9 | determine_ext, | |
10 | float_or_none, | |
11 | int_or_none, | |
07e4a40a | 12 | orderedSet, |
b9f5a412 S |
13 | parse_age_limit, |
14 | parse_duration, | |
3052a30d | 15 | url_or_none, |
7d34016f | 16 | ExtractorError |
b9f5a412 | 17 | ) |
80f772c2 | 18 | |
19 | ||
20 | class CrackleIE(InfoExtractor): | |
dc53c786 S |
21 | _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' |
22 | _TESTS = [{ | |
1619836c | 23 | # Crackle is available in the United States and territories |
24 | 'url': 'https://www.crackle.com/thanksgiving/2510064', | |
80f772c2 | 25 | 'info_dict': { |
1619836c | 26 | 'id': '2510064', |
80f772c2 | 27 | 'ext': 'mp4', |
1619836c | 28 | 'title': 'Touch Football', |
29 | 'description': 'md5:cfbb513cf5de41e8b56d7ab756cff4df', | |
30 | 'duration': 1398, | |
b9f5a412 S |
31 | 'view_count': int, |
32 | 'average_rating': 0, | |
1619836c | 33 | 'age_limit': 17, |
34 | 'genre': 'Comedy', | |
35 | 'creator': 'Daniel Powell', | |
36 | 'artist': 'Chris Elliott, Amy Sedaris', | |
37 | 'release_year': 2016, | |
38 | 'series': 'Thanksgiving', | |
39 | 'episode': 'Touch Football', | |
b9f5a412 S |
40 | 'season_number': 1, |
41 | 'episode_number': 1, | |
80f772c2 | 42 | }, |
43 | 'params': { | |
44 | # m3u8 download | |
45 | 'skip_download': True, | |
1619836c | 46 | }, |
47 | 'expected_warnings': [ | |
48 | 'Trying with a list of known countries' | |
49 | ], | |
dc53c786 | 50 | }, { |
1619836c | 51 | 'url': 'https://www.sonycrackle.com/thanksgiving/2510064', |
dc53c786 S |
52 | 'only_matching': True, |
53 | }] | |
80f772c2 | 54 | |
835e45ab RA |
55 | _MEDIA_FILE_SLOTS = { |
56 | '360p.mp4': { | |
57 | 'width': 640, | |
58 | 'height': 360, | |
59 | }, | |
60 | '480p.mp4': { | |
61 | 'width': 768, | |
62 | 'height': 432, | |
63 | }, | |
64 | '480p_1mbps.mp4': { | |
65 | 'width': 852, | |
66 | 'height': 480, | |
67 | }, | |
68 | } | |
69 | ||
07e4a40a | 70 | def _download_json(self, url, *args, **kwargs): |
71 | # Authorization generation algorithm is reverse engineered from: | |
72 | # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js | |
73 | timestamp = time.strftime('%Y%m%d%H%M', time.gmtime()) | |
74 | h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper() | |
75 | headers = { | |
76 | 'Accept': 'application/json', | |
77 | 'Authorization': '|'.join([h, timestamp, '117', '1']), | |
78 | } | |
79 | return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs) | |
80 | ||
80f772c2 | 81 | def _real_extract(self, url): |
82 | video_id = self._match_id(url) | |
b54a2da4 | 83 | |
a06916d9 | 84 | geo_bypass_country = self.get_param('geo_bypass_country', None) |
07e4a40a | 85 | countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', '')) |
86 | num_countries, num = len(countries) - 1, 0 | |
87 | ||
88 | media = {} | |
89 | for num, country in enumerate(countries): | |
90 | if num == 1: # start hard-coded list | |
91 | self.report_warning('%s. Trying with a list of known countries' % ( | |
92 | 'Unable to obtain video formats from %s API' % geo_bypass_country if geo_bypass_country | |
93 | else 'No country code was given using --geo-bypass-country')) | |
94 | elif num == num_countries: # end of list | |
95 | geo_info = self._download_json( | |
96 | 'https://web-api-us.crackle.com/Service.svc/geo/country', | |
97 | video_id, fatal=False, note='Downloading geo-location information from crackle API', | |
98 | errnote='Unable to fetch geo-location information from crackle') or {} | |
99 | country = geo_info.get('CountryCode') | |
100 | if country is None: | |
101 | continue | |
102 | self.to_screen('%s identified country as %s' % (self.IE_NAME, country)) | |
103 | if country in countries: | |
104 | self.to_screen('Downloading from %s API was already attempted. Skipping...' % country) | |
105 | continue | |
b9f5a412 | 106 | |
07e4a40a | 107 | if country is None: |
108 | continue | |
7d34016f S |
109 | try: |
110 | media = self._download_json( | |
07e4a40a | 111 | 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country), |
112 | video_id, note='Downloading media JSON from %s API' % country, | |
113 | errnote='Unable to download media JSON') | |
7d34016f S |
114 | except ExtractorError as e: |
115 | # 401 means geo restriction, trying next country | |
116 | if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | |
b9f5a412 | 117 | continue |
7d34016f S |
118 | raise |
119 | ||
07e4a40a | 120 | status = media.get('status') |
121 | if status.get('messageCode') != '0': | |
122 | raise ExtractorError( | |
123 | '%s said: %s %s - %s' % ( | |
124 | self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')), | |
125 | expected=True) | |
126 | ||
127 | # Found video formats | |
128 | if isinstance(media.get('MediaURLs'), list): | |
129 | break | |
130 | ||
a06916d9 | 131 | ignore_no_formats = self.get_param('ignore_no_formats_error') |
07e4a40a | 132 | |
133 | if not media or (not media.get('MediaURLs') and not ignore_no_formats): | |
134 | raise ExtractorError( | |
135 | 'Unable to access the crackle API. Try passing your country code ' | |
136 | 'to --geo-bypass-country. If it still does not work and the ' | |
137 | 'video is available in your country') | |
138 | title = media['Title'] | |
139 | ||
140 | formats, subtitles = [], {} | |
141 | has_drm = False | |
142 | for e in media.get('MediaURLs') or []: | |
143 | if e.get('UseDRM'): | |
144 | has_drm = True | |
1619836c | 145 | format_url = url_or_none(e.get('DRMPath')) |
146 | else: | |
147 | format_url = url_or_none(e.get('Path')) | |
07e4a40a | 148 | if not format_url: |
7d34016f | 149 | continue |
07e4a40a | 150 | ext = determine_ext(format_url) |
151 | if ext == 'm3u8': | |
152 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
153 | format_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
154 | m3u8_id='hls', fatal=False) | |
155 | formats.extend(fmts) | |
156 | subtitles = self._merge_subtitles(subtitles, subs) | |
157 | elif ext == 'mpd': | |
158 | fmts, subs = self._extract_mpd_formats_and_subtitles( | |
159 | format_url, video_id, mpd_id='dash', fatal=False) | |
160 | formats.extend(fmts) | |
161 | subtitles = self._merge_subtitles(subtitles, subs) | |
162 | elif format_url.endswith('.ism/Manifest'): | |
163 | fmts, subs = self._extract_ism_formats_and_subtitles( | |
164 | format_url, video_id, ism_id='mss', fatal=False) | |
165 | formats.extend(fmts) | |
166 | subtitles = self._merge_subtitles(subtitles, subs) | |
167 | else: | |
168 | mfs_path = e.get('Type') | |
169 | mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path) | |
170 | if not mfs_info: | |
b9f5a412 | 171 | continue |
07e4a40a | 172 | formats.append({ |
173 | 'url': format_url, | |
174 | 'format_id': 'http-' + mfs_path.split('.')[0], | |
175 | 'width': mfs_info['width'], | |
176 | 'height': mfs_info['height'], | |
177 | }) | |
88acdbc2 | 178 | if not formats and has_drm: |
179 | self.report_drm(video_id) | |
07e4a40a | 180 | self._sort_formats(formats) |
181 | ||
182 | description = media.get('Description') | |
183 | duration = int_or_none(media.get( | |
184 | 'DurationInSeconds')) or parse_duration(media.get('Duration')) | |
185 | view_count = int_or_none(media.get('CountViews')) | |
186 | average_rating = float_or_none(media.get('UserRating')) | |
187 | age_limit = parse_age_limit(media.get('Rating')) | |
188 | genre = media.get('Genre') | |
189 | release_year = int_or_none(media.get('ReleaseYear')) | |
190 | creator = media.get('Directors') | |
191 | artist = media.get('Cast') | |
192 | ||
193 | if media.get('MediaTypeDisplayValue') == 'Full Episode': | |
194 | series = media.get('ShowName') | |
195 | episode = title | |
196 | season_number = int_or_none(media.get('Season')) | |
197 | episode_number = int_or_none(media.get('Episode')) | |
198 | else: | |
199 | series = episode = season_number = episode_number = None | |
200 | ||
201 | cc_files = media.get('ClosedCaptionFiles') | |
202 | if isinstance(cc_files, list): | |
203 | for cc_file in cc_files: | |
204 | if not isinstance(cc_file, dict): | |
b9f5a412 | 205 | continue |
07e4a40a | 206 | cc_url = url_or_none(cc_file.get('Path')) |
207 | if not cc_url: | |
208 | continue | |
209 | lang = cc_file.get('Locale') or 'en' | |
210 | subtitles.setdefault(lang, []).append({'url': cc_url}) | |
211 | ||
212 | thumbnails = [] | |
213 | images = media.get('Images') | |
214 | if isinstance(images, list): | |
215 | for image_key, image_url in images.items(): | |
216 | mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) | |
217 | if not mobj: | |
218 | continue | |
219 | thumbnails.append({ | |
220 | 'url': image_url, | |
221 | 'width': int(mobj.group(1)), | |
222 | 'height': int(mobj.group(2)), | |
223 | }) | |
224 | ||
225 | return { | |
226 | 'id': video_id, | |
227 | 'title': title, | |
228 | 'description': description, | |
229 | 'duration': duration, | |
230 | 'view_count': view_count, | |
231 | 'average_rating': average_rating, | |
232 | 'age_limit': age_limit, | |
233 | 'genre': genre, | |
234 | 'creator': creator, | |
235 | 'artist': artist, | |
236 | 'release_year': release_year, | |
237 | 'series': series, | |
238 | 'episode': episode, | |
239 | 'season_number': season_number, | |
240 | 'episode_number': episode_number, | |
241 | 'thumbnails': thumbnails, | |
242 | 'subtitles': subtitles, | |
243 | 'formats': formats, | |
244 | } |