]>
Commit | Line | Data |
---|---|---|
3798eadc | 1 | from __future__ import unicode_literals |
f8aace93 | 2 | |
d90df974 PH |
3 | import re |
4 | ||
5 | from .common import InfoExtractor | |
d90df974 PH |
6 | |
7 | ||
8 | class AcademicEarthCourseIE(InfoExtractor): | |
9e57ce71 | 9 | _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' |
3798eadc | 10 | IE_NAME = 'AcademicEarth:Course' |
22a6f150 PH |
11 | _TEST = { |
12 | 'url': 'http://academicearth.org/playlists/laws-of-nature/', | |
13 | 'info_dict': { | |
14 | 'id': 'laws-of-nature', | |
15 | 'title': 'Laws of Nature', | |
16 | 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.', | |
17 | }, | |
a2f42a3b | 18 | 'playlist_count': 3, |
22a6f150 | 19 | } |
d90df974 PH |
20 | |
21 | def _real_extract(self, url): | |
f8aace93 | 22 | playlist_id = self._match_id(url) |
d90df974 PH |
23 | |
24 | webpage = self._download_webpage(url, playlist_id) | |
25 | title = self._html_search_regex( | |
f8aace93 | 26 | r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title') |
d90df974 | 27 | description = self._html_search_regex( |
9e57ce71 | 28 | r'<p class="excerpt"[^>]*?>(.*?)</p>', |
f8aace93 | 29 | webpage, 'description', fatal=False) |
d90df974 | 30 | urls = re.findall( |
9e57ce71 | 31 | r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">', |
d90df974 PH |
32 | webpage) |
33 | entries = [self.url_result(u) for u in urls] | |
34 | ||
35 | return { | |
36 | '_type': 'playlist', | |
37 | 'id': playlist_id, | |
38 | 'title': title, | |
39 | 'description': description, | |
40 | 'entries': entries, | |
41 | } |