]>
Commit | Line | Data |
---|---|---|
30a074c2 | 1 | import json |
3d3538e4 PH |
2 | |
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
30a074c2 | 5 | int_or_none, |
4093eb1f | 6 | make_archive_id, |
30a074c2 | 7 | parse_iso8601, |
4093eb1f | 8 | str_or_none, |
9 | traverse_obj, | |
10 | url_or_none, | |
11 | urljoin, | |
3d3538e4 PH |
12 | ) |
13 | ||
14 | ||
30a074c2 | 15 | class KhanAcademyBaseIE(InfoExtractor): |
16 | _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)' | |
3d3538e4 | 17 | |
4093eb1f | 18 | _PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70' |
19 | ||
30a074c2 | 20 | def _parse_video(self, video): |
21 | return { | |
22 | '_type': 'url_transparent', | |
23 | 'url': video['youtubeId'], | |
4093eb1f | 24 | 'id': video['youtubeId'], |
30a074c2 | 25 | 'ie_key': 'Youtube', |
4093eb1f | 26 | **traverse_obj(video, { |
27 | 'display_id': ('id', {str_or_none}), | |
28 | 'title': ('translatedTitle', {str}), | |
29 | 'thumbnail': ('thumbnailUrls', ..., 'url', {url_or_none}), | |
30 | 'duration': ('duration', {int_or_none}), | |
31 | 'description': ('description', {str}), | |
32 | }, get_all=False), | |
30a074c2 | 33 | } |
34 | ||
35 | def _real_extract(self, url): | |
36 | display_id = self._match_id(url) | |
4f7a98c5 | 37 | content = self._download_json( |
4093eb1f | 38 | 'https://www.khanacademy.org/api/internal/graphql/ContentForPath', display_id, |
39 | query={ | |
4f7a98c5 | 40 | 'fastly_cacheable': 'persist_until_publish', |
4093eb1f | 41 | 'pcv': self._PUBLISHED_CONTENT_VERSION, |
42 | 'hash': '1242644265', | |
30a074c2 | 43 | 'variables': json.dumps({ |
44 | 'path': display_id, | |
4f7a98c5 | 45 | 'countryCode': 'US', |
4093eb1f | 46 | 'kaLocale': 'en', |
47 | 'clientPublishedContentVersion': self._PUBLISHED_CONTENT_VERSION, | |
30a074c2 | 48 | }), |
4093eb1f | 49 | 'lang': 'en', |
50 | })['data']['contentRoute']['listedPathData'] | |
51 | return self._parse_component_props(content, display_id) | |
30a074c2 | 52 | |
53 | ||
54 | class KhanAcademyIE(KhanAcademyBaseIE): | |
55 | IE_NAME = 'khanacademy' | |
56 | _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/') | |
57 | _TEST = { | |
58 | 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad', | |
4093eb1f | 59 | 'md5': '1d5c2e70fa6aa29c38eca419f12515ce', |
3d3538e4 | 60 | 'info_dict': { |
30a074c2 | 61 | 'id': 'FlIG3TvQCBQ', |
62 | 'ext': 'mp4', | |
3d3538e4 PH |
63 | 'title': 'The one-time pad', |
64 | 'description': 'The perfect cipher', | |
4093eb1f | 65 | 'display_id': '716378217', |
3d3538e4 | 66 | 'duration': 176, |
4093eb1f | 67 | 'uploader': 'Khan Academy', |
68 | 'uploader_id': '@khanacademy', | |
69 | 'uploader_url': 'https://www.youtube.com/@khanacademy', | |
3d3538e4 | 70 | 'upload_date': '20120411', |
30a074c2 | 71 | 'timestamp': 1334170113, |
72 | 'license': 'cc-by-nc-sa', | |
4093eb1f | 73 | 'live_status': 'not_live', |
74 | 'channel': 'Khan Academy', | |
75 | 'channel_id': 'UC4a-Gbdw7vOaccHmFo40b9g', | |
76 | 'channel_url': 'https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g', | |
77 | 'channel_is_verified': True, | |
78 | 'playable_in_embed': True, | |
79 | 'categories': ['Education'], | |
80 | 'creators': ['Brit Cruise'], | |
81 | 'tags': [], | |
82 | 'age_limit': 0, | |
83 | 'availability': 'public', | |
84 | 'comment_count': int, | |
85 | 'channel_follower_count': int, | |
86 | 'thumbnail': str, | |
87 | 'view_count': int, | |
88 | 'like_count': int, | |
89 | 'heatmap': list, | |
628bc4d1 JMF |
90 | }, |
91 | 'add_ie': ['Youtube'], | |
30a074c2 | 92 | } |
93 | ||
4093eb1f | 94 | def _parse_component_props(self, component_props, display_id): |
95 | video = component_props['content'] | |
96 | return { | |
97 | **self._parse_video(video), | |
98 | **traverse_obj(video, { | |
99 | 'creators': ('authorNames', ..., {str}), | |
100 | 'timestamp': ('dateAdded', {parse_iso8601}), | |
101 | 'license': ('kaUserLicense', {str}), | |
102 | }), | |
103 | } | |
30a074c2 | 104 | |
105 | ||
106 | class KhanAcademyUnitIE(KhanAcademyBaseIE): | |
107 | IE_NAME = 'khanacademy:unit' | |
4093eb1f | 108 | _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('1,2', '')) + '/?(?:[?#&]|$)' |
109 | _TESTS = [{ | |
30a074c2 | 110 | 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography', |
22a6f150 | 111 | 'info_dict': { |
4093eb1f | 112 | 'id': 'x48c910b6', |
30a074c2 | 113 | 'title': 'Cryptography', |
22a6f150 | 114 | 'description': 'How have humans protected their secret messages through history? What has changed today?', |
4093eb1f | 115 | 'display_id': 'computing/computer-science/cryptography', |
116 | '_old_archive_ids': ['khanacademyunit cryptography'], | |
22a6f150 | 117 | }, |
30a074c2 | 118 | 'playlist_mincount': 31, |
4093eb1f | 119 | }, { |
120 | 'url': 'https://www.khanacademy.org/computing/computer-science', | |
121 | 'info_dict': { | |
122 | 'id': 'x301707a0', | |
123 | 'title': 'Computer science theory', | |
124 | 'description': 'md5:4b472a4646e6cf6ec4ccb52c4062f8ba', | |
125 | 'display_id': 'computing/computer-science', | |
126 | '_old_archive_ids': ['khanacademyunit computer-science'], | |
127 | }, | |
128 | 'playlist_mincount': 50, | |
129 | }] | |
130 | ||
131 | def _parse_component_props(self, component_props, display_id): | |
132 | course = component_props['course'] | |
133 | selected_unit = traverse_obj(course, ( | |
134 | 'unitChildren', lambda _, v: v['relativeUrl'] == f'/{display_id}', any)) or course | |
3d3538e4 | 135 | |
4093eb1f | 136 | def build_entry(entry): |
137 | return self.url_result(urljoin( | |
138 | 'https://www.khanacademy.org', entry['canonicalUrl']), | |
139 | KhanAcademyIE, title=entry.get('translatedTitle')) | |
3d3538e4 | 140 | |
4093eb1f | 141 | entries = traverse_obj(selected_unit, ( |
142 | (('unitChildren', ...), None), 'allOrderedChildren', ..., 'curatedChildren', | |
143 | lambda _, v: v['contentKind'] == 'Video' and v['canonicalUrl'], {build_entry})) | |
3d3538e4 | 144 | |
30a074c2 | 145 | return self.playlist_result( |
4093eb1f | 146 | entries, |
147 | display_id=display_id, | |
148 | **traverse_obj(selected_unit, { | |
149 | 'id': ('id', {str}), | |
150 | 'title': ('translatedTitle', {str}), | |
151 | 'description': ('translatedDescription', {str}), | |
152 | '_old_archive_ids': ('slug', {str}, {lambda x: [make_archive_id(self, x)] if x else None}), | |
153 | })) |