]>
Commit | Line | Data |
---|---|---|
7c360e3a S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import int_or_none | |
7 | ||
8 | ||
9 | class SciVeeIE(InfoExtractor): | |
10 | _VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)' | |
11 | ||
12 | _TEST = { | |
13 | 'url': 'http://www.scivee.tv/node/62352', | |
957f27e5 | 14 | 'md5': 'b16699b74c9e6a120f6772a44960304f', |
7c360e3a S |
15 | 'info_dict': { |
16 | 'id': '62352', | |
17 | 'ext': 'mp4', | |
18 | 'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting', | |
19 | 'description': 'md5:81f1710638e11a481358fab1b11059d7', | |
8a7a4a97 | 20 | }, |
7c360e3a S |
21 | } |
22 | ||
23 | def _real_extract(self, url): | |
24 | mobj = re.match(self._VALID_URL, url) | |
25 | video_id = mobj.group('id') | |
26 | ||
27 | # annotations XML is malformed | |
28 | annotations = self._download_webpage( | |
29 | 'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations') | |
30 | ||
31 | title = self._html_search_regex(r'<title>([^<]+)</title>', annotations, 'title') | |
32 | description = self._html_search_regex(r'<abstract>([^<]+)</abstract>', annotations, 'abstract', fatal=False) | |
33 | filesize = int_or_none(self._html_search_regex( | |
34 | r'<filesize>([^<]+)</filesize>', annotations, 'filesize', fatal=False)) | |
35 | ||
36 | formats = [ | |
37 | { | |
38 | 'url': 'http://www.scivee.tv/assets/audio/%s' % video_id, | |
39 | 'ext': 'mp3', | |
40 | 'format_id': 'audio', | |
41 | }, | |
42 | { | |
43 | 'url': 'http://www.scivee.tv/assets/video/%s' % video_id, | |
44 | 'ext': 'mp4', | |
45 | 'format_id': 'video', | |
46 | 'filesize': filesize, | |
47 | }, | |
48 | ] | |
49 | ||
50 | return { | |
51 | 'id': video_id, | |
52 | 'title': title, | |
53 | 'description': description, | |
54 | 'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id, | |
55 | 'formats': formats, | |
56 | } |