]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/khanacademy.py
Update to ytdl-2021.01.16
[yt-dlp.git] / youtube_dlc / extractor / khanacademy.py
CommitLineData
3d3538e4
PH
1from __future__ import unicode_literals
2
30a074c2 3import json
3d3538e4
PH
4
5from .common import InfoExtractor
6from ..utils import (
30a074c2 7 int_or_none,
8 parse_iso8601,
9 try_get,
3d3538e4
PH
10)
11
12
30a074c2 13class KhanAcademyBaseIE(InfoExtractor):
14 _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
3d3538e4 15
30a074c2 16 def _parse_video(self, video):
17 return {
18 '_type': 'url_transparent',
19 'url': video['youtubeId'],
20 'id': video.get('slug'),
21 'title': video.get('title'),
22 'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
23 'duration': int_or_none(video.get('duration')),
24 'description': video.get('description'),
25 'ie_key': 'Youtube',
26 }
27
28 def _real_extract(self, url):
29 display_id = self._match_id(url)
30 component_props = self._parse_json(self._download_json(
31 'https://www.khanacademy.org/api/internal/graphql',
32 display_id, query={
33 'hash': 1604303425,
34 'variables': json.dumps({
35 'path': display_id,
36 'queryParams': '',
37 }),
38 })['data']['contentJson'], display_id)['componentProps']
39 return self._parse_component_props(component_props)
40
41
42class KhanAcademyIE(KhanAcademyBaseIE):
43 IE_NAME = 'khanacademy'
44 _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
45 _TEST = {
46 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
47 'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
3d3538e4 48 'info_dict': {
30a074c2 49 'id': 'FlIG3TvQCBQ',
50 'ext': 'mp4',
3d3538e4
PH
51 'title': 'The one-time pad',
52 'description': 'The perfect cipher',
53 'duration': 176,
54 'uploader': 'Brit Cruise',
628bc4d1 55 'uploader_id': 'khanacademy',
3d3538e4 56 'upload_date': '20120411',
30a074c2 57 'timestamp': 1334170113,
58 'license': 'cc-by-nc-sa',
628bc4d1
JMF
59 },
60 'add_ie': ['Youtube'],
30a074c2 61 }
62
63 def _parse_component_props(self, component_props):
64 video = component_props['tutorialPageData']['contentModel']
65 info = self._parse_video(video)
66 author_names = video.get('authorNames')
67 info.update({
68 'uploader': ', '.join(author_names) if author_names else None,
69 'timestamp': parse_iso8601(video.get('dateAdded')),
70 'license': video.get('kaUserLicense'),
71 })
72 return info
73
74
75class KhanAcademyUnitIE(KhanAcademyBaseIE):
76 IE_NAME = 'khanacademy:unit'
77 _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
78 _TEST = {
79 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
22a6f150
PH
80 'info_dict': {
81 'id': 'cryptography',
30a074c2 82 'title': 'Cryptography',
22a6f150
PH
83 'description': 'How have humans protected their secret messages through history? What has changed today?',
84 },
30a074c2 85 'playlist_mincount': 31,
86 }
3d3538e4 87
30a074c2 88 def _parse_component_props(self, component_props):
89 curation = component_props['curation']
3d3538e4 90
30a074c2 91 entries = []
92 tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
93 for tutorial_number, tutorial in enumerate(tutorials, 1):
94 chapter_info = {
95 'chapter': tutorial.get('title'),
96 'chapter_number': tutorial_number,
97 'chapter_id': tutorial.get('id'),
3d3538e4 98 }
30a074c2 99 for content_item in (tutorial.get('contentItems') or []):
100 if content_item.get('kind') == 'Video':
101 info = self._parse_video(content_item)
102 info.update(chapter_info)
103 entries.append(info)
3d3538e4 104
30a074c2 105 return self.playlist_result(
106 entries, curation.get('unit'), curation.get('title'),
107 curation.get('description'))