]>
Commit | Line | Data |
---|---|---|
3d3538e4 PH |
1 | from __future__ import unicode_literals |
2 | ||
30a074c2 | 3 | import json |
3d3538e4 PH |
4 | |
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
30a074c2 | 7 | int_or_none, |
8 | parse_iso8601, | |
9 | try_get, | |
3d3538e4 PH |
10 | ) |
11 | ||
12 | ||
30a074c2 | 13 | class KhanAcademyBaseIE(InfoExtractor): |
14 | _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)' | |
3d3538e4 | 15 | |
30a074c2 | 16 | def _parse_video(self, video): |
17 | return { | |
18 | '_type': 'url_transparent', | |
19 | 'url': video['youtubeId'], | |
20 | 'id': video.get('slug'), | |
21 | 'title': video.get('title'), | |
22 | 'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'), | |
23 | 'duration': int_or_none(video.get('duration')), | |
24 | 'description': video.get('description'), | |
25 | 'ie_key': 'Youtube', | |
26 | } | |
27 | ||
28 | def _real_extract(self, url): | |
29 | display_id = self._match_id(url) | |
30 | component_props = self._parse_json(self._download_json( | |
31 | 'https://www.khanacademy.org/api/internal/graphql', | |
32 | display_id, query={ | |
33 | 'hash': 1604303425, | |
34 | 'variables': json.dumps({ | |
35 | 'path': display_id, | |
36 | 'queryParams': '', | |
37 | }), | |
38 | })['data']['contentJson'], display_id)['componentProps'] | |
39 | return self._parse_component_props(component_props) | |
40 | ||
41 | ||
42 | class KhanAcademyIE(KhanAcademyBaseIE): | |
43 | IE_NAME = 'khanacademy' | |
44 | _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/') | |
45 | _TEST = { | |
46 | 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad', | |
47 | 'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0', | |
3d3538e4 | 48 | 'info_dict': { |
30a074c2 | 49 | 'id': 'FlIG3TvQCBQ', |
50 | 'ext': 'mp4', | |
3d3538e4 PH |
51 | 'title': 'The one-time pad', |
52 | 'description': 'The perfect cipher', | |
53 | 'duration': 176, | |
54 | 'uploader': 'Brit Cruise', | |
628bc4d1 | 55 | 'uploader_id': 'khanacademy', |
3d3538e4 | 56 | 'upload_date': '20120411', |
30a074c2 | 57 | 'timestamp': 1334170113, |
58 | 'license': 'cc-by-nc-sa', | |
628bc4d1 JMF |
59 | }, |
60 | 'add_ie': ['Youtube'], | |
30a074c2 | 61 | } |
62 | ||
63 | def _parse_component_props(self, component_props): | |
64 | video = component_props['tutorialPageData']['contentModel'] | |
65 | info = self._parse_video(video) | |
66 | author_names = video.get('authorNames') | |
67 | info.update({ | |
68 | 'uploader': ', '.join(author_names) if author_names else None, | |
69 | 'timestamp': parse_iso8601(video.get('dateAdded')), | |
70 | 'license': video.get('kaUserLicense'), | |
71 | }) | |
72 | return info | |
73 | ||
74 | ||
75 | class KhanAcademyUnitIE(KhanAcademyBaseIE): | |
76 | IE_NAME = 'khanacademy:unit' | |
77 | _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)' | |
78 | _TEST = { | |
79 | 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography', | |
22a6f150 PH |
80 | 'info_dict': { |
81 | 'id': 'cryptography', | |
30a074c2 | 82 | 'title': 'Cryptography', |
22a6f150 PH |
83 | 'description': 'How have humans protected their secret messages through history? What has changed today?', |
84 | }, | |
30a074c2 | 85 | 'playlist_mincount': 31, |
86 | } | |
3d3538e4 | 87 | |
30a074c2 | 88 | def _parse_component_props(self, component_props): |
89 | curation = component_props['curation'] | |
3d3538e4 | 90 | |
30a074c2 | 91 | entries = [] |
92 | tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or [] | |
93 | for tutorial_number, tutorial in enumerate(tutorials, 1): | |
94 | chapter_info = { | |
95 | 'chapter': tutorial.get('title'), | |
96 | 'chapter_number': tutorial_number, | |
97 | 'chapter_id': tutorial.get('id'), | |
3d3538e4 | 98 | } |
30a074c2 | 99 | for content_item in (tutorial.get('contentItems') or []): |
100 | if content_item.get('kind') == 'Video': | |
101 | info = self._parse_video(content_item) | |
102 | info.update(chapter_info) | |
103 | entries.append(info) | |
3d3538e4 | 104 | |
30a074c2 | 105 | return self.playlist_result( |
106 | entries, curation.get('unit'), curation.get('title'), | |
107 | curation.get('description')) |