]>
Commit | Line | Data |
---|---|---|
466de688 PH |
1 | #coding: utf-8 |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | determine_ext, | |
8 | ) | |
9 | ||
10 | class ThisAVIE(InfoExtractor): | |
11 | _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' | |
12 | _TEST = { | |
13 | u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html", | |
14 | u"file": u"47734.flv", | |
ecd1c2f7 | 15 | u"md5": u"0480f1ef3932d901f0e0e719f188f19b", |
466de688 PH |
16 | u"info_dict": { |
17 | u"title": u"高樹マリア - Just fit", | |
18 | u"uploader": u"dj7970", | |
19 | u"uploader_id": u"dj7970" | |
20 | } | |
21 | } | |
22 | ||
23 | def _real_extract(self, url): | |
24 | mobj = re.match(self._VALID_URL, url) | |
25 | ||
26 | video_id = mobj.group('id') | |
27 | webpage = self._download_webpage(url, video_id) | |
28 | title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title') | |
29 | video_url = self._html_search_regex( | |
30 | r"addVariable\('file','([^']+)'\);", webpage, u'video url') | |
31 | uploader = self._html_search_regex( | |
32 | r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', | |
33 | webpage, u'uploader name', fatal=False) | |
34 | uploader_id = self._html_search_regex( | |
35 | r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', | |
36 | webpage, u'uploader id', fatal=False) | |
37 | ext = determine_ext(video_url) | |
38 | ||
39 | return { | |
40 | '_type': 'video', | |
41 | 'id': video_id, | |
42 | 'url': video_url, | |
43 | 'uploader': uploader, | |
44 | 'uploader_id': uploader_id, | |
45 | 'title': title, | |
46 | 'ext': ext, | |
47 | } |