]>
Commit | Line | Data |
---|---|---|
d90df974 PH |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
d90df974 PH |
4 | |
5 | ||
6 | class AcademicEarthCourseIE(InfoExtractor): | |
9e57ce71 | 7 | _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' |
3798eadc | 8 | IE_NAME = 'AcademicEarth:Course' |
22a6f150 PH |
9 | _TEST = { |
10 | 'url': 'http://academicearth.org/playlists/laws-of-nature/', | |
11 | 'info_dict': { | |
12 | 'id': 'laws-of-nature', | |
13 | 'title': 'Laws of Nature', | |
14 | 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.', | |
15 | }, | |
a2f42a3b | 16 | 'playlist_count': 3, |
22a6f150 | 17 | } |
d90df974 PH |
18 | |
19 | def _real_extract(self, url): | |
f8aace93 | 20 | playlist_id = self._match_id(url) |
d90df974 PH |
21 | |
22 | webpage = self._download_webpage(url, playlist_id) | |
23 | title = self._html_search_regex( | |
f8aace93 | 24 | r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title') |
d90df974 | 25 | description = self._html_search_regex( |
9e57ce71 | 26 | r'<p class="excerpt"[^>]*?>(.*?)</p>', |
f8aace93 | 27 | webpage, 'description', fatal=False) |
d90df974 | 28 | urls = re.findall( |
9e57ce71 | 29 | r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">', |
d90df974 PH |
30 | webpage) |
31 | entries = [self.url_result(u) for u in urls] | |
32 | ||
33 | return { | |
34 | '_type': 'playlist', | |
35 | 'id': playlist_id, | |
36 | 'title': title, | |
37 | 'description': description, | |
38 | 'entries': entries, | |
39 | } |