]>
Commit | Line | Data |
---|---|---|
3798eadc | 1 | from __future__ import unicode_literals |
d90df974 PH |
2 | import re |
3 | ||
4 | from .common import InfoExtractor | |
d90df974 PH |
5 | |
6 | ||
7 | class AcademicEarthCourseIE(InfoExtractor): | |
9e57ce71 | 8 | _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' |
3798eadc | 9 | IE_NAME = 'AcademicEarth:Course' |
22a6f150 PH |
10 | _TEST = { |
11 | 'url': 'http://academicearth.org/playlists/laws-of-nature/', | |
12 | 'info_dict': { | |
13 | 'id': 'laws-of-nature', | |
14 | 'title': 'Laws of Nature', | |
15 | 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.', | |
16 | }, | |
17 | 'playlist_count': 4, | |
18 | } | |
d90df974 PH |
19 | |
20 | def _real_extract(self, url): | |
21 | m = re.match(self._VALID_URL, url) | |
22 | playlist_id = m.group('id') | |
23 | ||
24 | webpage = self._download_webpage(url, playlist_id) | |
25 | title = self._html_search_regex( | |
9e57ce71 | 26 | r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title') |
d90df974 | 27 | description = self._html_search_regex( |
9e57ce71 | 28 | r'<p class="excerpt"[^>]*?>(.*?)</p>', |
d90df974 PH |
29 | webpage, u'description', fatal=False) |
30 | urls = re.findall( | |
9e57ce71 | 31 | r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">', |
d90df974 PH |
32 | webpage) |
33 | entries = [self.url_result(u) for u in urls] | |
34 | ||
35 | return { | |
36 | '_type': 'playlist', | |
37 | 'id': playlist_id, | |
38 | 'title': title, | |
39 | 'description': description, | |
40 | 'entries': entries, | |
41 | } |