]>
Commit | Line | Data |
---|---|---|
74539995 S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..compat import compat_str | |
7 | from ..utils import ( | |
8 | clean_html, | |
9 | ExtractorError, | |
10 | remove_end, | |
11 | strip_or_none, | |
12 | unified_timestamp, | |
13 | urljoin, | |
14 | ) | |
15 | ||
16 | ||
17 | class PacktPubBaseIE(InfoExtractor): | |
18 | _PACKT_BASE = 'https://www.packtpub.com' | |
19 | _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE | |
20 | ||
21 | ||
22 | class PacktPubIE(PacktPubBaseIE): | |
23 | _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)' | |
24 | ||
25 | _TEST = { | |
26 | 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', | |
27 | 'md5': '1e74bd6cfd45d7d07666f4684ef58f70', | |
28 | 'info_dict': { | |
29 | 'id': '20530', | |
30 | 'ext': 'mp4', | |
31 | 'title': 'Project Intro', | |
32 | 'thumbnail': r're:(?i)^https?://.*\.jpg', | |
33 | 'timestamp': 1490918400, | |
34 | 'upload_date': '20170331', | |
35 | }, | |
36 | } | |
37 | ||
38 | def _handle_error(self, response): | |
39 | if response.get('status') != 'success': | |
40 | raise ExtractorError( | |
41 | '% said: %s' % (self.IE_NAME, response['message']), | |
42 | expected=True) | |
43 | ||
44 | def _download_json(self, *args, **kwargs): | |
45 | response = super(PacktPubIE, self)._download_json(*args, **kwargs) | |
46 | self._handle_error(response) | |
47 | return response | |
48 | ||
49 | def _real_extract(self, url): | |
50 | mobj = re.match(self._VALID_URL, url) | |
51 | course_id, chapter_id, video_id = mobj.group( | |
52 | 'course_id', 'chapter_id', 'id') | |
53 | ||
54 | video = self._download_json( | |
55 | '%s/users/me/products/%s/chapters/%s/sections/%s' | |
56 | % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, | |
57 | 'Downloading JSON video')['data'] | |
58 | ||
59 | content = video.get('content') | |
60 | if not content: | |
61 | raise ExtractorError('This video is locked', expected=True) | |
62 | ||
63 | video_url = content['file'] | |
64 | ||
65 | metadata = self._download_json( | |
66 | '%s/products/%s/chapters/%s/sections/%s/metadata' | |
67 | % (self._MAPT_REST, course_id, chapter_id, video_id), | |
68 | video_id)['data'] | |
69 | ||
70 | title = metadata['pageTitle'] | |
71 | course_title = metadata.get('title') | |
72 | if course_title: | |
73 | title = remove_end(title, ' - %s' % course_title) | |
74 | timestamp = unified_timestamp(metadata.get('publicationDate')) | |
75 | thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) | |
76 | ||
77 | return { | |
78 | 'id': video_id, | |
79 | 'url': video_url, | |
80 | 'title': title, | |
81 | 'thumbnail': thumbnail, | |
82 | 'timestamp': timestamp, | |
83 | } | |
84 | ||
85 | ||
86 | class PacktPubCourseIE(PacktPubBaseIE): | |
87 | _VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))' | |
88 | _TEST = { | |
89 | 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215', | |
90 | 'info_dict': { | |
91 | 'id': '9781787122215', | |
92 | 'title': 'Learn Nodejs by building 12 projects [Video]', | |
93 | }, | |
94 | 'playlist_count': 90, | |
95 | } | |
96 | ||
97 | @classmethod | |
98 | def suitable(cls, url): | |
99 | return False if PacktPubIE.suitable(url) else super( | |
100 | PacktPubCourseIE, cls).suitable(url) | |
101 | ||
102 | def _real_extract(self, url): | |
103 | mobj = re.match(self._VALID_URL, url) | |
104 | url, course_id = mobj.group('url', 'id') | |
105 | ||
106 | course = self._download_json( | |
107 | '%s/products/%s/metadata' % (self._MAPT_REST, course_id), | |
108 | course_id)['data'] | |
109 | ||
110 | entries = [] | |
111 | for chapter_num, chapter in enumerate(course['tableOfContents'], 1): | |
112 | if chapter.get('type') != 'chapter': | |
113 | continue | |
114 | children = chapter.get('children') | |
115 | if not isinstance(children, list): | |
116 | continue | |
117 | chapter_info = { | |
118 | 'chapter': chapter.get('title'), | |
119 | 'chapter_number': chapter_num, | |
120 | 'chapter_id': chapter.get('id'), | |
121 | } | |
122 | for section in children: | |
123 | if section.get('type') != 'section': | |
124 | continue | |
125 | section_url = section.get('seoUrl') | |
126 | if not isinstance(section_url, compat_str): | |
127 | continue | |
128 | entry = { | |
129 | '_type': 'url_transparent', | |
130 | 'url': urljoin(url + '/', section_url), | |
131 | 'title': strip_or_none(section.get('title')), | |
132 | 'description': clean_html(section.get('summary')), | |
133 | 'ie_key': PacktPubIE.ie_key(), | |
134 | } | |
135 | entry.update(chapter_info) | |
136 | entries.append(entry) | |
137 | ||
138 | return self.playlist_result(entries, course_id, course.get('title')) |