]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/brainpop.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / brainpop.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5 from ..utils import (
6 classproperty,
7 int_or_none,
8 traverse_obj,
9 urljoin,
10 )
11
12
13 class BrainPOPBaseIE(InfoExtractor):
14 _NETRC_MACHINE = 'brainpop'
15 _ORIGIN = '' # So that _VALID_URL doesn't crash
16 _LOGIN_ERRORS = {
17 1502: 'The username and password you entered did not match.', # LOGIN_FAILED
18 1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
19 1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED
20 1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED
21 1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
22 1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED
23 1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP
24 1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED
25 1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE
26 1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS
27 1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD
28 1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED
29 }
30
31 @classproperty
32 def _VALID_URL(cls):
33 root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
34 return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
35
36 def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
37 formats = []
38 formats = self._extract_m3u8_formats(
39 f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
40 display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
41 formats.append({
42 'format_id': format_id,
43 'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
44 })
45 for f in formats:
46 f.update(extra_fields)
47 return formats
48
49 def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
50 formats = []
51 additional_key_formats = {
52 '%s': {},
53 'ad_%s': {
54 'format_note': 'Audio description',
55 'source_preference': -2
56 }
57 }
58 for additional_key_format, additional_key_fields in additional_key_formats.items():
59 for key_quality, key_index in enumerate(('high', 'low')):
60 full_key_index = additional_key_format % (key_format % key_index)
61 if data.get(full_key_index):
62 formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
63 'quality': -1 - key_quality,
64 **additional_key_fields,
65 **extra_fields
66 }))
67 return formats
68
69 def _perform_login(self, username, password):
70 login_res = self._download_json(
71 'https://api.brainpop.com/api/login', None,
72 data=json.dumps({'username': username, 'password': password}).encode(),
73 headers={
74 'Content-Type': 'application/json',
75 'Referer': self._ORIGIN
76 }, note='Logging in', errnote='Unable to log in', expected_status=400)
77 status_code = int_or_none(login_res['status_code'])
78 if status_code != 1505:
79 self.report_warning(
80 f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
81 or f'Got status code {status_code}')
82
83
84 class BrainPOPIE(BrainPOPBaseIE):
85 _ORIGIN = 'https://www.brainpop.com'
86 _VIDEO_URL = 'https://svideos.brainpop.com'
87 _HLS_URL = 'https://hls.brainpop.com'
88 _CDN_URL = 'https://cdn.brainpop.com'
89 _TESTS = [{
90 'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
91 'md5': '3ead374233ae74c7f1b0029a01c972f0',
92 'info_dict': {
93 'id': '1f3259fa457292b4',
94 'ext': 'mp4',
95 'title': 'Martin Luther King, Jr.',
96 'display_id': 'martinlutherkingjr',
97 'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
98 },
99 }, {
100 'url': 'https://www.brainpop.com/science/space/bigbang/',
101 'md5': '9a1ff0e77444dd9e437354eb669c87ec',
102 'info_dict': {
103 'id': 'acae52cd48c99acf',
104 'ext': 'mp4',
105 'title': 'Big Bang',
106 'display_id': 'bigbang',
107 'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
108 },
109 'skip': 'Requires login',
110 }]
111
112 def _real_extract(self, url):
113 slug, display_id = self._match_valid_url(url).group('slug', 'id')
114 movie_data = self._download_json(
115 f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
116 'Downloading movie data JSON', 'Unable to download movie data')['data']
117 topic_data = traverse_obj(self._download_json(
118 f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
119 'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
120 ('data', 'topic'), expected_type=dict) or movie_data['topic']
121
122 if not traverse_obj(movie_data, ('access', 'allow')):
123 reason = traverse_obj(movie_data, ('access', 'reason'))
124 if 'logged' in reason:
125 self.raise_login_required(reason, metadata_available=True)
126 else:
127 self.raise_no_formats(reason, video_id=display_id)
128 movie_feature = movie_data['feature']
129 movie_feature_data = movie_feature['data']
130
131 formats, subtitles = [], {}
132 formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
133 'language': movie_feature.get('language') or 'en',
134 'language_preference': 10
135 }))
136 for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
137 formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
138 'language': lang,
139 'language_preference': -10
140 }))
141
142 # TODO: Do localization fields also have subtitles?
143 for name, url in movie_feature_data.items():
144 lang = self._search_regex(
145 r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
146 if lang and url:
147 subtitles.setdefault(lang, []).append({
148 'url': urljoin(self._CDN_URL, url)
149 })
150
151 return {
152 'id': topic_data['topic_id'],
153 'display_id': display_id,
154 'title': topic_data.get('name'),
155 'description': topic_data.get('synopsis'),
156 'formats': formats,
157 'subtitles': subtitles,
158 }
159
160
161 class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
162 def _parse_js_topic_data(self, topic_data, display_id, token):
163 movie_data = topic_data['movies']
164 # TODO: Are there non-burned subtitles?
165 formats = self._extract_adaptive_formats(movie_data, token, display_id)
166
167 return {
168 'id': topic_data['EntryID'],
169 'display_id': display_id,
170 'title': topic_data.get('name'),
171 'alt_title': topic_data.get('title'),
172 'description': topic_data.get('synopsis'),
173 'formats': formats,
174 }
175
176 def _real_extract(self, url):
177 slug, display_id = self._match_valid_url(url).group('slug', 'id')
178 webpage = self._download_webpage(url, display_id)
179 topic_data = self._search_json(
180 r'var\s+content\s*=\s*', webpage, 'content data',
181 display_id, end_pattern=';')['category']['unit']['topic']
182 token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
183 return self._parse_js_topic_data(topic_data, display_id, token)
184
185
186 class BrainPOPJrIE(BrainPOPLegacyBaseIE):
187 _ORIGIN = 'https://jr.brainpop.com'
188 _VIDEO_URL = 'https://svideos-jr.brainpop.com'
189 _HLS_URL = 'https://hls-jr.brainpop.com'
190 _CDN_URL = 'https://cdn-jr.brainpop.com'
191 _TESTS = [{
192 'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
193 'md5': '04e0561bb21770f305a0ce6cf0d869ab',
194 'info_dict': {
195 'id': '347',
196 'ext': 'mp4',
197 'title': 'Emotions',
198 'display_id': 'emotions',
199 },
200 }, {
201 'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
202 'md5': 'b0ed063bbd1910df00220ee29340f5d6',
203 'info_dict': {
204 'id': '29',
205 'ext': 'mp4',
206 'title': 'Arctic Habitats',
207 'display_id': 'arctichabitats',
208 },
209 'skip': 'Requires login',
210 }]
211
212
213 class BrainPOPELLIE(BrainPOPLegacyBaseIE):
214 _ORIGIN = 'https://ell.brainpop.com'
215 _VIDEO_URL = 'https://svideos-esl.brainpop.com'
216 _HLS_URL = 'https://hls-esl.brainpop.com'
217 _CDN_URL = 'https://cdn-esl.brainpop.com'
218 _TESTS = [{
219 'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
220 'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
221 'info_dict': {
222 'id': '1',
223 'ext': 'mp4',
224 'title': 'Lesson 1',
225 'display_id': 'lesson1',
226 'alt_title': 'Personal Pronouns',
227 },
228 }, {
229 'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
230 'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
231 'info_dict': {
232 'id': '101',
233 'ext': 'mp4',
234 'title': 'Lesson 5',
235 'display_id': 'lesson5',
236 'alt_title': 'Review: Unit 6',
237 },
238 'skip': 'Requires login',
239 }]
240
241
242 class BrainPOPEspIE(BrainPOPLegacyBaseIE):
243 IE_DESC = 'BrainPOP Español'
244 _ORIGIN = 'https://esp.brainpop.com'
245 _VIDEO_URL = 'https://svideos.brainpop.com'
246 _HLS_URL = 'https://hls.brainpop.com'
247 _CDN_URL = 'https://cdn.brainpop.com/mx'
248 _TESTS = [{
249 'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
250 'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
251 'info_dict': {
252 'id': '3893',
253 'ext': 'mp4',
254 'title': 'Ecosistemas',
255 'display_id': 'ecosistemas',
256 'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
257 },
258 }, {
259 'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
260 'md5': '98c1b9559e0e33777209c425cda7dac4',
261 'info_dict': {
262 'id': '7146',
263 'ext': 'mp4',
264 'title': 'Emily Dickinson',
265 'display_id': 'emily_dickinson',
266 'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
267 },
268 'skip': 'Requires login',
269 }]
270
271
272 class BrainPOPFrIE(BrainPOPLegacyBaseIE):
273 IE_DESC = 'BrainPOP Français'
274 _ORIGIN = 'https://fr.brainpop.com'
275 _VIDEO_URL = 'https://svideos.brainpop.com'
276 _HLS_URL = 'https://hls.brainpop.com'
277 _CDN_URL = 'https://cdn.brainpop.com/fr'
278 _TESTS = [{
279 'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
280 'md5': '97e7f48af8af93f8a2be11709f239371',
281 'info_dict': {
282 'id': '1651',
283 'ext': 'mp4',
284 'title': 'Sources d\'énergie',
285 'display_id': 'sourcesdenergie',
286 'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
287 },
288 }, {
289 'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
290 'md5': '0cf2b4f89804d0dd4a360a51310d445a',
291 'info_dict': {
292 'id': '5803',
293 'ext': 'mp4',
294 'title': 'Plagiat',
295 'display_id': 'plagiat',
296 'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
297 },
298 'skip': 'Requires login',
299 }]
300
301
302 class BrainPOPIlIE(BrainPOPLegacyBaseIE):
303 IE_DESC = 'BrainPOP Hebrew'
304 _ORIGIN = 'https://il.brainpop.com'
305 _VIDEO_URL = 'https://svideos.brainpop.com'
306 _HLS_URL = 'https://hls.brainpop.com'
307 _CDN_URL = 'https://cdn.brainpop.com/he'
308 _TESTS = [{
309 'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
310 'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
311 'info_dict': {
312 'id': '3782',
313 'ext': 'mp4',
314 'title': 'md5:e993632fcda0545d9205602ec314ad67',
315 'display_id': 'subjects_3782',
316 'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
317 },
318 }]