]>
Commit | Line | Data |
---|---|---|
5f6a1245 | 1 | # coding: utf-8 |
fa7df757 | 2 | from __future__ import unicode_literals |
466de688 PH |
3 | |
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
fa7df757 S |
7 | from ..utils import determine_ext |
8 | ||
466de688 PH |
9 | |
10 | class ThisAVIE(InfoExtractor): | |
11 | _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' | |
12 | _TEST = { | |
fa7df757 S |
13 | 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', |
14 | 'md5': '0480f1ef3932d901f0e0e719f188f19b', | |
15 | 'info_dict': { | |
16 | 'id': '47734', | |
17 | 'ext': 'flv', | |
18 | 'title': '高樹マリア - Just fit', | |
19 | 'uploader': 'dj7970', | |
20 | 'uploader_id': 'dj7970' | |
466de688 PH |
21 | } |
22 | } | |
23 | ||
24 | def _real_extract(self, url): | |
25 | mobj = re.match(self._VALID_URL, url) | |
26 | ||
27 | video_id = mobj.group('id') | |
28 | webpage = self._download_webpage(url, video_id) | |
fa7df757 | 29 | title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title') |
466de688 | 30 | video_url = self._html_search_regex( |
fa7df757 | 31 | r"addVariable\('file','([^']+)'\);", webpage, 'video url') |
466de688 PH |
32 | uploader = self._html_search_regex( |
33 | r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', | |
fa7df757 | 34 | webpage, 'uploader name', fatal=False) |
466de688 PH |
35 | uploader_id = self._html_search_regex( |
36 | r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', | |
fa7df757 | 37 | webpage, 'uploader id', fatal=False) |
466de688 | 38 | ext = determine_ext(video_url) |
5f6a1245 | 39 | |
466de688 | 40 | return { |
8bcc8756 JW |
41 | 'id': video_id, |
42 | 'url': video_url, | |
43 | 'uploader': uploader, | |
466de688 | 44 | 'uploader_id': uploader_id, |
8bcc8756 JW |
45 | 'title': title, |
46 | 'ext': ext, | |
466de688 | 47 | } |