]> jfr.im git - yt-dlp.git/blob - youtube_dlc/extractor/khanacademy.py
Update to ytdl-2021.01.16
[yt-dlp.git] / youtube_dlc / extractor / khanacademy.py
1 from __future__ import unicode_literals
2
3 import json
4
5 from .common import InfoExtractor
6 from ..utils import (
7 int_or_none,
8 parse_iso8601,
9 try_get,
10 )
11
12
13 class KhanAcademyBaseIE(InfoExtractor):
14 _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
15
16 def _parse_video(self, video):
17 return {
18 '_type': 'url_transparent',
19 'url': video['youtubeId'],
20 'id': video.get('slug'),
21 'title': video.get('title'),
22 'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
23 'duration': int_or_none(video.get('duration')),
24 'description': video.get('description'),
25 'ie_key': 'Youtube',
26 }
27
28 def _real_extract(self, url):
29 display_id = self._match_id(url)
30 component_props = self._parse_json(self._download_json(
31 'https://www.khanacademy.org/api/internal/graphql',
32 display_id, query={
33 'hash': 1604303425,
34 'variables': json.dumps({
35 'path': display_id,
36 'queryParams': '',
37 }),
38 })['data']['contentJson'], display_id)['componentProps']
39 return self._parse_component_props(component_props)
40
41
42 class KhanAcademyIE(KhanAcademyBaseIE):
43 IE_NAME = 'khanacademy'
44 _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
45 _TEST = {
46 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
47 'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
48 'info_dict': {
49 'id': 'FlIG3TvQCBQ',
50 'ext': 'mp4',
51 'title': 'The one-time pad',
52 'description': 'The perfect cipher',
53 'duration': 176,
54 'uploader': 'Brit Cruise',
55 'uploader_id': 'khanacademy',
56 'upload_date': '20120411',
57 'timestamp': 1334170113,
58 'license': 'cc-by-nc-sa',
59 },
60 'add_ie': ['Youtube'],
61 }
62
63 def _parse_component_props(self, component_props):
64 video = component_props['tutorialPageData']['contentModel']
65 info = self._parse_video(video)
66 author_names = video.get('authorNames')
67 info.update({
68 'uploader': ', '.join(author_names) if author_names else None,
69 'timestamp': parse_iso8601(video.get('dateAdded')),
70 'license': video.get('kaUserLicense'),
71 })
72 return info
73
74
75 class KhanAcademyUnitIE(KhanAcademyBaseIE):
76 IE_NAME = 'khanacademy:unit'
77 _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
78 _TEST = {
79 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
80 'info_dict': {
81 'id': 'cryptography',
82 'title': 'Cryptography',
83 'description': 'How have humans protected their secret messages through history? What has changed today?',
84 },
85 'playlist_mincount': 31,
86 }
87
88 def _parse_component_props(self, component_props):
89 curation = component_props['curation']
90
91 entries = []
92 tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
93 for tutorial_number, tutorial in enumerate(tutorials, 1):
94 chapter_info = {
95 'chapter': tutorial.get('title'),
96 'chapter_number': tutorial_number,
97 'chapter_id': tutorial.get('id'),
98 }
99 for content_item in (tutorial.get('contentItems') or []):
100 if content_item.get('kind') == 'Video':
101 info = self._parse_video(content_item)
102 info.update(chapter_info)
103 entries.append(info)
104
105 return self.playlist_result(
106 entries, curation.get('unit'), curation.get('title'),
107 curation.get('description'))