]>
Commit | Line | Data |
---|---|---|
41e8bca4 PH |
1 | import json |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | determine_ext, | |
7 | ) | |
8 | ||
9 | ||
10 | class ViddlerIE(InfoExtractor): | |
12ebdd15 | 11 | _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)' |
41e8bca4 PH |
12 | _TEST = { |
13 | u"url": u"http://www.viddler.com/v/43903784", | |
14 | u'file': u'43903784.mp4', | |
15 | u'md5': u'fbbaedf7813e514eb7ca30410f439ac9', | |
16 | u'info_dict': { | |
17 | u"title": u"Video Made Easy", | |
18 | u"uploader": u"viddler", | |
19 | u"duration": 100.89, | |
20 | } | |
21 | } | |
22 | ||
23 | def _real_extract(self, url): | |
24 | mobj = re.match(self._VALID_URL, url) | |
25 | video_id = mobj.group('id') | |
26 | ||
27 | embed_url = mobj.group('domain') + u'/embed/' + video_id | |
28 | webpage = self._download_webpage(embed_url, video_id) | |
29 | ||
30 | video_sources_code = self._search_regex( | |
31 | r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs') | |
32 | video_sources = json.loads(video_sources_code.replace("'", '"')) | |
33 | ||
34 | formats = [{ | |
35 | 'url': video_url, | |
36 | 'format': format_id, | |
37 | } for video_url, format_id in video_sources.items()] | |
38 | ||
39 | title = self._html_search_regex( | |
40 | r"title\s*:\s*'([^']*)'", webpage, u'title') | |
41 | uploader = self._html_search_regex( | |
42 | r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False) | |
43 | duration_s = self._html_search_regex( | |
44 | r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False) | |
45 | duration = float(duration_s) if duration_s else None | |
46 | thumbnail = self._html_search_regex( | |
47 | r"thumbnail\s*:\s*'([^']*)'", | |
48 | webpage, u'thumbnail', fatal=False) | |
49 | ||
50 | info = { | |
51 | '_type': 'video', | |
52 | 'id': video_id, | |
53 | 'title': title, | |
54 | 'thumbnail': thumbnail, | |
55 | 'uploader': uploader, | |
56 | 'duration': duration, | |
57 | 'formats': formats, | |
58 | } | |
59 | ||
60 | # TODO: Remove when #980 has been merged | |
61 | info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) | |
62 | info.update(info['formats'][-1]) | |
63 | ||
64 | return info |