]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/crackle.py
[TVer] Fix extractor (#3268)
[yt-dlp.git] / yt_dlp / extractor / crackle.py
1 # coding: utf-8
2 from __future__ import unicode_literals, division
3
4 import hashlib
5 import hmac
6 import re
7 import time
8
9 from .common import InfoExtractor
10 from ..compat import compat_HTTPError
11 from ..utils import (
12 determine_ext,
13 float_or_none,
14 int_or_none,
15 orderedSet,
16 parse_age_limit,
17 parse_duration,
18 url_or_none,
19 ExtractorError
20 )
21
22
23 class CrackleIE(InfoExtractor):
24 _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
25 _TESTS = [{
26 # Crackle is available in the United States and territories
27 'url': 'https://www.crackle.com/thanksgiving/2510064',
28 'info_dict': {
29 'id': '2510064',
30 'ext': 'mp4',
31 'title': 'Touch Football',
32 'description': 'md5:cfbb513cf5de41e8b56d7ab756cff4df',
33 'duration': 1398,
34 'view_count': int,
35 'average_rating': 0,
36 'age_limit': 17,
37 'genre': 'Comedy',
38 'creator': 'Daniel Powell',
39 'artist': 'Chris Elliott, Amy Sedaris',
40 'release_year': 2016,
41 'series': 'Thanksgiving',
42 'episode': 'Touch Football',
43 'season_number': 1,
44 'episode_number': 1,
45 },
46 'params': {
47 # m3u8 download
48 'skip_download': True,
49 },
50 'expected_warnings': [
51 'Trying with a list of known countries'
52 ],
53 }, {
54 'url': 'https://www.sonycrackle.com/thanksgiving/2510064',
55 'only_matching': True,
56 }]
57
58 _MEDIA_FILE_SLOTS = {
59 '360p.mp4': {
60 'width': 640,
61 'height': 360,
62 },
63 '480p.mp4': {
64 'width': 768,
65 'height': 432,
66 },
67 '480p_1mbps.mp4': {
68 'width': 852,
69 'height': 480,
70 },
71 }
72
73 def _download_json(self, url, *args, **kwargs):
74 # Authorization generation algorithm is reverse engineered from:
75 # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
76 timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
77 h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
78 headers = {
79 'Accept': 'application/json',
80 'Authorization': '|'.join([h, timestamp, '117', '1']),
81 }
82 return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs)
83
84 def _real_extract(self, url):
85 video_id = self._match_id(url)
86
87 geo_bypass_country = self.get_param('geo_bypass_country', None)
88 countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', ''))
89 num_countries, num = len(countries) - 1, 0
90
91 media = {}
92 for num, country in enumerate(countries):
93 if num == 1: # start hard-coded list
94 self.report_warning('%s. Trying with a list of known countries' % (
95 'Unable to obtain video formats from %s API' % geo_bypass_country if geo_bypass_country
96 else 'No country code was given using --geo-bypass-country'))
97 elif num == num_countries: # end of list
98 geo_info = self._download_json(
99 'https://web-api-us.crackle.com/Service.svc/geo/country',
100 video_id, fatal=False, note='Downloading geo-location information from crackle API',
101 errnote='Unable to fetch geo-location information from crackle') or {}
102 country = geo_info.get('CountryCode')
103 if country is None:
104 continue
105 self.to_screen('%s identified country as %s' % (self.IE_NAME, country))
106 if country in countries:
107 self.to_screen('Downloading from %s API was already attempted. Skipping...' % country)
108 continue
109
110 if country is None:
111 continue
112 try:
113 media = self._download_json(
114 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country),
115 video_id, note='Downloading media JSON from %s API' % country,
116 errnote='Unable to download media JSON')
117 except ExtractorError as e:
118 # 401 means geo restriction, trying next country
119 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
120 continue
121 raise
122
123 status = media.get('status')
124 if status.get('messageCode') != '0':
125 raise ExtractorError(
126 '%s said: %s %s - %s' % (
127 self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')),
128 expected=True)
129
130 # Found video formats
131 if isinstance(media.get('MediaURLs'), list):
132 break
133
134 ignore_no_formats = self.get_param('ignore_no_formats_error')
135
136 if not media or (not media.get('MediaURLs') and not ignore_no_formats):
137 raise ExtractorError(
138 'Unable to access the crackle API. Try passing your country code '
139 'to --geo-bypass-country. If it still does not work and the '
140 'video is available in your country')
141 title = media['Title']
142
143 formats, subtitles = [], {}
144 has_drm = False
145 for e in media.get('MediaURLs') or []:
146 if e.get('UseDRM'):
147 has_drm = True
148 format_url = url_or_none(e.get('DRMPath'))
149 else:
150 format_url = url_or_none(e.get('Path'))
151 if not format_url:
152 continue
153 ext = determine_ext(format_url)
154 if ext == 'm3u8':
155 fmts, subs = self._extract_m3u8_formats_and_subtitles(
156 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
157 m3u8_id='hls', fatal=False)
158 formats.extend(fmts)
159 subtitles = self._merge_subtitles(subtitles, subs)
160 elif ext == 'mpd':
161 fmts, subs = self._extract_mpd_formats_and_subtitles(
162 format_url, video_id, mpd_id='dash', fatal=False)
163 formats.extend(fmts)
164 subtitles = self._merge_subtitles(subtitles, subs)
165 elif format_url.endswith('.ism/Manifest'):
166 fmts, subs = self._extract_ism_formats_and_subtitles(
167 format_url, video_id, ism_id='mss', fatal=False)
168 formats.extend(fmts)
169 subtitles = self._merge_subtitles(subtitles, subs)
170 else:
171 mfs_path = e.get('Type')
172 mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
173 if not mfs_info:
174 continue
175 formats.append({
176 'url': format_url,
177 'format_id': 'http-' + mfs_path.split('.')[0],
178 'width': mfs_info['width'],
179 'height': mfs_info['height'],
180 })
181 if not formats and has_drm:
182 self.report_drm(video_id)
183 self._sort_formats(formats)
184
185 description = media.get('Description')
186 duration = int_or_none(media.get(
187 'DurationInSeconds')) or parse_duration(media.get('Duration'))
188 view_count = int_or_none(media.get('CountViews'))
189 average_rating = float_or_none(media.get('UserRating'))
190 age_limit = parse_age_limit(media.get('Rating'))
191 genre = media.get('Genre')
192 release_year = int_or_none(media.get('ReleaseYear'))
193 creator = media.get('Directors')
194 artist = media.get('Cast')
195
196 if media.get('MediaTypeDisplayValue') == 'Full Episode':
197 series = media.get('ShowName')
198 episode = title
199 season_number = int_or_none(media.get('Season'))
200 episode_number = int_or_none(media.get('Episode'))
201 else:
202 series = episode = season_number = episode_number = None
203
204 cc_files = media.get('ClosedCaptionFiles')
205 if isinstance(cc_files, list):
206 for cc_file in cc_files:
207 if not isinstance(cc_file, dict):
208 continue
209 cc_url = url_or_none(cc_file.get('Path'))
210 if not cc_url:
211 continue
212 lang = cc_file.get('Locale') or 'en'
213 subtitles.setdefault(lang, []).append({'url': cc_url})
214
215 thumbnails = []
216 images = media.get('Images')
217 if isinstance(images, list):
218 for image_key, image_url in images.items():
219 mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
220 if not mobj:
221 continue
222 thumbnails.append({
223 'url': image_url,
224 'width': int(mobj.group(1)),
225 'height': int(mobj.group(2)),
226 })
227
228 return {
229 'id': video_id,
230 'title': title,
231 'description': description,
232 'duration': duration,
233 'view_count': view_count,
234 'average_rating': average_rating,
235 'age_limit': age_limit,
236 'genre': genre,
237 'creator': creator,
238 'artist': artist,
239 'release_year': release_year,
240 'series': series,
241 'episode': episode,
242 'season_number': season_number,
243 'episode_number': episode_number,
244 'thumbnails': thumbnails,
245 'subtitles': subtitles,
246 'formats': formats,
247 }