]>
Commit | Line | Data |
---|---|---|
979568f2 M |
1 | import json |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | classproperty, | |
7 | int_or_none, | |
8 | traverse_obj, | |
9 | urljoin | |
10 | ) | |
11 | ||
12 | ||
13 | class BrainPOPBaseIE(InfoExtractor): | |
14 | _NETRC_MACHINE = 'brainpop' | |
15 | _ORIGIN = '' # So that _VALID_URL doesn't crash | |
16 | _LOGIN_ERRORS = { | |
17 | 1502: 'The username and password you entered did not match.', # LOGIN_FAILED | |
18 | 1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE | |
19 | 1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED | |
20 | 1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED | |
21 | 1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE | |
22 | 1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED | |
23 | 1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP | |
24 | 1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED | |
25 | 1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE | |
26 | 1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS | |
27 | 1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD | |
28 | 1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED | |
29 | } | |
30 | ||
31 | @classproperty | |
32 | def _VALID_URL(cls): | |
33 | root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?') | |
34 | return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))' | |
35 | ||
36 | def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}): | |
37 | formats = [] | |
38 | formats = self._extract_m3u8_formats( | |
39 | f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}', | |
40 | display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False) | |
41 | formats.append({ | |
42 | 'format_id': format_id, | |
43 | 'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}', | |
44 | }) | |
45 | for f in formats: | |
46 | f.update(extra_fields) | |
47 | return formats | |
48 | ||
49 | def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}): | |
50 | formats = [] | |
51 | additional_key_formats = { | |
52 | '%s': {}, | |
53 | 'ad_%s': { | |
54 | 'format_note': 'Audio description', | |
55 | 'source_preference': -2 | |
56 | } | |
57 | } | |
58 | for additional_key_format, additional_key_fields in additional_key_formats.items(): | |
59 | for key_quality, key_index in enumerate(('high', 'low')): | |
60 | full_key_index = additional_key_format % (key_format % key_index) | |
61 | if data.get(full_key_index): | |
62 | formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, { | |
63 | 'quality': -1 - key_quality, | |
64 | **additional_key_fields, | |
65 | **extra_fields | |
66 | })) | |
67 | return formats | |
68 | ||
69 | def _perform_login(self, username, password): | |
70 | login_res = self._download_json( | |
71 | 'https://api.brainpop.com/api/login', None, | |
72 | data=json.dumps({'username': username, 'password': password}).encode(), | |
73 | headers={ | |
74 | 'Content-Type': 'application/json', | |
75 | 'Referer': self._ORIGIN | |
76 | }, note='Logging in', errnote='Unable to log in', expected_status=400) | |
77 | status_code = int_or_none(login_res['status_code']) | |
78 | if status_code != 1505: | |
79 | self.report_warning( | |
80 | f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}' | |
81 | or f'Got status code {status_code}') | |
82 | ||
83 | ||
84 | class BrainPOPIE(BrainPOPBaseIE): | |
85 | _ORIGIN = 'https://www.brainpop.com' | |
86 | _VIDEO_URL = 'https://svideos.brainpop.com' | |
87 | _HLS_URL = 'https://hls.brainpop.com' | |
88 | _CDN_URL = 'https://cdn.brainpop.com' | |
89 | _TESTS = [{ | |
90 | 'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null', | |
91 | 'md5': '3ead374233ae74c7f1b0029a01c972f0', | |
92 | 'info_dict': { | |
93 | 'id': '1f3259fa457292b4', | |
94 | 'ext': 'mp4', | |
95 | 'title': 'Martin Luther King, Jr.', | |
96 | 'display_id': 'martinlutherkingjr', | |
97 | 'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349', | |
98 | }, | |
99 | }, { | |
100 | 'url': 'https://www.brainpop.com/science/space/bigbang/', | |
101 | 'md5': '9a1ff0e77444dd9e437354eb669c87ec', | |
102 | 'info_dict': { | |
103 | 'id': 'acae52cd48c99acf', | |
104 | 'ext': 'mp4', | |
105 | 'title': 'Big Bang', | |
106 | 'display_id': 'bigbang', | |
107 | 'description': 'md5:3e53b766b0f116f631b13f4cae185d38', | |
108 | }, | |
109 | 'skip': 'Requires login', | |
110 | }] | |
111 | ||
112 | def _real_extract(self, url): | |
113 | slug, display_id = self._match_valid_url(url).group('slug', 'id') | |
114 | movie_data = self._download_json( | |
115 | f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id, | |
116 | 'Downloading movie data JSON', 'Unable to download movie data')['data'] | |
117 | topic_data = traverse_obj(self._download_json( | |
118 | f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id, | |
119 | 'Downloading topic data JSON', 'Unable to download topic data', fatal=False), | |
120 | ('data', 'topic'), expected_type=dict) or movie_data['topic'] | |
121 | ||
122 | if not traverse_obj(movie_data, ('access', 'allow')): | |
123 | reason = traverse_obj(movie_data, ('access', 'reason')) | |
124 | if 'logged' in reason: | |
125 | self.raise_login_required(reason, metadata_available=True) | |
126 | else: | |
127 | self.raise_no_formats(reason, video_id=display_id) | |
128 | movie_feature = movie_data['feature'] | |
129 | movie_feature_data = movie_feature['data'] | |
130 | ||
131 | formats, subtitles = [], {} | |
132 | formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', { | |
133 | 'language': movie_feature.get('language') or 'en', | |
134 | 'language_preference': 10 | |
135 | })) | |
136 | for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items(): | |
137 | formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', { | |
138 | 'language': lang, | |
139 | 'language_preference': -10 | |
140 | })) | |
141 | ||
142 | # TODO: Do localization fields also have subtitles? | |
143 | for name, url in movie_feature_data.items(): | |
144 | lang = self._search_regex( | |
145 | r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None) | |
146 | if lang and url: | |
147 | subtitles.setdefault(lang, []).append({ | |
148 | 'url': urljoin(self._CDN_URL, url) | |
149 | }) | |
150 | ||
151 | return { | |
152 | 'id': topic_data['topic_id'], | |
153 | 'display_id': display_id, | |
154 | 'title': topic_data.get('name'), | |
155 | 'description': topic_data.get('synopsis'), | |
156 | 'formats': formats, | |
157 | 'subtitles': subtitles, | |
158 | } | |
159 | ||
160 | ||
161 | class BrainPOPLegacyBaseIE(BrainPOPBaseIE): | |
162 | def _parse_js_topic_data(self, topic_data, display_id, token): | |
163 | movie_data = topic_data['movies'] | |
164 | # TODO: Are there non-burned subtitles? | |
165 | formats = self._extract_adaptive_formats(movie_data, token, display_id) | |
166 | ||
167 | return { | |
168 | 'id': topic_data['EntryID'], | |
169 | 'display_id': display_id, | |
170 | 'title': topic_data.get('name'), | |
171 | 'alt_title': topic_data.get('title'), | |
172 | 'description': topic_data.get('synopsis'), | |
173 | 'formats': formats, | |
174 | } | |
175 | ||
176 | def _real_extract(self, url): | |
177 | slug, display_id = self._match_valid_url(url).group('slug', 'id') | |
178 | webpage = self._download_webpage(url, display_id) | |
179 | topic_data = self._search_json( | |
180 | r'var\s+content\s*=\s*', webpage, 'content data', | |
181 | display_id, end_pattern=';')['category']['unit']['topic'] | |
182 | token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token') | |
183 | return self._parse_js_topic_data(topic_data, display_id, token) | |
184 | ||
185 | ||
186 | class BrainPOPJrIE(BrainPOPLegacyBaseIE): | |
187 | _ORIGIN = 'https://jr.brainpop.com' | |
188 | _VIDEO_URL = 'https://svideos-jr.brainpop.com' | |
189 | _HLS_URL = 'https://hls-jr.brainpop.com' | |
190 | _CDN_URL = 'https://cdn-jr.brainpop.com' | |
191 | _TESTS = [{ | |
192 | 'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/', | |
193 | 'md5': '04e0561bb21770f305a0ce6cf0d869ab', | |
194 | 'info_dict': { | |
195 | 'id': '347', | |
196 | 'ext': 'mp4', | |
197 | 'title': 'Emotions', | |
198 | 'display_id': 'emotions', | |
199 | }, | |
200 | }, { | |
201 | 'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/', | |
202 | 'md5': 'b0ed063bbd1910df00220ee29340f5d6', | |
203 | 'info_dict': { | |
204 | 'id': '29', | |
205 | 'ext': 'mp4', | |
206 | 'title': 'Arctic Habitats', | |
207 | 'display_id': 'arctichabitats', | |
208 | }, | |
209 | 'skip': 'Requires login', | |
210 | }] | |
211 | ||
212 | ||
213 | class BrainPOPELLIE(BrainPOPLegacyBaseIE): | |
214 | _ORIGIN = 'https://ell.brainpop.com' | |
215 | _VIDEO_URL = 'https://svideos-esl.brainpop.com' | |
216 | _HLS_URL = 'https://hls-esl.brainpop.com' | |
217 | _CDN_URL = 'https://cdn-esl.brainpop.com' | |
218 | _TESTS = [{ | |
219 | 'url': 'https://ell.brainpop.com/level1/unit1/lesson1/', | |
220 | 'md5': 'a2012700cfb774acb7ad2e8834eed0d0', | |
221 | 'info_dict': { | |
222 | 'id': '1', | |
223 | 'ext': 'mp4', | |
224 | 'title': 'Lesson 1', | |
225 | 'display_id': 'lesson1', | |
226 | 'alt_title': 'Personal Pronouns', | |
227 | }, | |
228 | }, { | |
229 | 'url': 'https://ell.brainpop.com/level3/unit6/lesson5/', | |
230 | 'md5': 'be19c8292c87b24aacfb5fda2f3f8363', | |
231 | 'info_dict': { | |
232 | 'id': '101', | |
233 | 'ext': 'mp4', | |
234 | 'title': 'Lesson 5', | |
235 | 'display_id': 'lesson5', | |
236 | 'alt_title': 'Review: Unit 6', | |
237 | }, | |
238 | 'skip': 'Requires login', | |
239 | }] | |
240 | ||
241 | ||
242 | class BrainPOPEspIE(BrainPOPLegacyBaseIE): | |
243 | IE_DESC = 'BrainPOP Español' | |
244 | _ORIGIN = 'https://esp.brainpop.com' | |
245 | _VIDEO_URL = 'https://svideos.brainpop.com' | |
246 | _HLS_URL = 'https://hls.brainpop.com' | |
247 | _CDN_URL = 'https://cdn.brainpop.com/mx' | |
248 | _TESTS = [{ | |
249 | 'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/', | |
250 | 'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9', | |
251 | 'info_dict': { | |
252 | 'id': '3893', | |
253 | 'ext': 'mp4', | |
254 | 'title': 'Ecosistemas', | |
255 | 'display_id': 'ecosistemas', | |
256 | 'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3', | |
257 | }, | |
258 | }, { | |
259 | 'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/', | |
260 | 'md5': '98c1b9559e0e33777209c425cda7dac4', | |
261 | 'info_dict': { | |
262 | 'id': '7146', | |
263 | 'ext': 'mp4', | |
264 | 'title': 'Emily Dickinson', | |
265 | 'display_id': 'emily_dickinson', | |
266 | 'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b', | |
267 | }, | |
268 | 'skip': 'Requires login', | |
269 | }] | |
270 | ||
271 | ||
272 | class BrainPOPFrIE(BrainPOPLegacyBaseIE): | |
273 | IE_DESC = 'BrainPOP Français' | |
274 | _ORIGIN = 'https://fr.brainpop.com' | |
275 | _VIDEO_URL = 'https://svideos.brainpop.com' | |
276 | _HLS_URL = 'https://hls.brainpop.com' | |
277 | _CDN_URL = 'https://cdn.brainpop.com/fr' | |
278 | _TESTS = [{ | |
279 | 'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/', | |
280 | 'md5': '97e7f48af8af93f8a2be11709f239371', | |
281 | 'info_dict': { | |
282 | 'id': '1651', | |
283 | 'ext': 'mp4', | |
284 | 'title': 'Sources d\'énergie', | |
285 | 'display_id': 'sourcesdenergie', | |
286 | 'description': 'md5:7eece350f019a21ef9f64d4088b2d857', | |
287 | }, | |
288 | }, { | |
289 | 'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/', | |
290 | 'md5': '0cf2b4f89804d0dd4a360a51310d445a', | |
291 | 'info_dict': { | |
292 | 'id': '5803', | |
293 | 'ext': 'mp4', | |
294 | 'title': 'Plagiat', | |
295 | 'display_id': 'plagiat', | |
296 | 'description': 'md5:4496d87127ace28e8b1eda116e77cd2b', | |
297 | }, | |
298 | 'skip': 'Requires login', | |
299 | }] | |
300 | ||
301 | ||
302 | class BrainPOPIlIE(BrainPOPLegacyBaseIE): | |
303 | IE_DESC = 'BrainPOP Hebrew' | |
304 | _ORIGIN = 'https://il.brainpop.com' | |
305 | _VIDEO_URL = 'https://svideos.brainpop.com' | |
306 | _HLS_URL = 'https://hls.brainpop.com' | |
307 | _CDN_URL = 'https://cdn.brainpop.com/he' | |
308 | _TESTS = [{ | |
309 | 'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/', | |
310 | 'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641', | |
311 | 'info_dict': { | |
312 | 'id': '3782', | |
313 | 'ext': 'mp4', | |
314 | 'title': 'md5:e993632fcda0545d9205602ec314ad67', | |
315 | 'display_id': 'subjects_3782', | |
316 | 'description': 'md5:4cc084a8012beb01f037724423a4d4ed', | |
317 | }, | |
318 | }] |