]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/khanacademy.py
3 from .common
import InfoExtractor
11 class KhanAcademyBaseIE(InfoExtractor
):
12 _VALID_URL_TEMPL
= r
'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
14 def _parse_video(self
, video
):
16 '_type': 'url_transparent',
17 'url': video
['youtubeId'],
18 'id': video
.get('slug'),
19 'title': video
.get('title'),
20 'thumbnail': video
.get('imageUrl') or video
.get('thumbnailUrl'),
21 'duration': int_or_none(video
.get('duration')),
22 'description': video
.get('description'),
26 def _real_extract(self
, url
):
27 display_id
= self
._match
_id
(url
)
28 content
= self
._download
_json
(
29 'https://www.khanacademy.org/api/internal/graphql/FetchContentData',
31 'fastly_cacheable': 'persist_until_publish',
34 'variables': json
.dumps({
36 'queryParams': 'lang=en',
38 'followRedirects': True,
41 })['data']['contentJson']
42 return self
._parse
_component
_props
(self
._parse
_json
(content
, display_id
)['componentProps'])
45 class KhanAcademyIE(KhanAcademyBaseIE
):
46 IE_NAME
= 'khanacademy'
47 _VALID_URL
= KhanAcademyBaseIE
._VALID
_URL
_TEMPL
% ('4', 'v/')
49 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
50 'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
54 'title': 'The one-time pad',
55 'description': 'The perfect cipher',
57 'uploader': 'Brit Cruise',
58 'uploader_id': 'khanacademy',
59 'upload_date': '20120411',
60 'timestamp': 1334170113,
61 'license': 'cc-by-nc-sa',
63 'add_ie': ['Youtube'],
66 def _parse_component_props(self
, component_props
):
67 video
= component_props
['tutorialPageData']['contentModel']
68 info
= self
._parse
_video
(video
)
69 author_names
= video
.get('authorNames')
71 'uploader': ', '.join(author_names
) if author_names
else None,
72 'timestamp': parse_iso8601(video
.get('dateAdded')),
73 'license': video
.get('kaUserLicense'),
78 class KhanAcademyUnitIE(KhanAcademyBaseIE
):
79 IE_NAME
= 'khanacademy:unit'
80 _VALID_URL
= (KhanAcademyBaseIE
._VALID
_URL
_TEMPL
% ('2', '')) + '/?(?:[?#&]|$)'
82 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
85 'title': 'Cryptography',
86 'description': 'How have humans protected their secret messages through history? What has changed today?',
88 'playlist_mincount': 31,
91 def _parse_component_props(self
, component_props
):
92 curation
= component_props
['curation']
95 tutorials
= try_get(curation
, lambda x
: x
['tabs'][0]['modules'][0]['tutorials'], list) or []
96 for tutorial_number
, tutorial
in enumerate(tutorials
, 1):
98 'chapter': tutorial
.get('title'),
99 'chapter_number': tutorial_number
,
100 'chapter_id': tutorial
.get('id'),
102 for content_item
in (tutorial
.get('contentItems') or []):
103 if content_item
.get('kind') == 'Video':
104 info
= self
._parse
_video
(content_item
)
105 info
.update(chapter_info
)
108 return self
.playlist_result(
109 entries
, curation
.get('unit'), curation
.get('title'),
110 curation
.get('description'))