]>
Commit | Line | Data |
---|---|---|
5f6a1245 | 1 | # coding: utf-8 |
fa7df757 | 2 | from __future__ import unicode_literals |
466de688 | 3 | |
466de688 | 4 | |
a4a554a7 | 5 | from .common import InfoExtractor |
d8dbf870 | 6 | from ..utils import remove_end |
fa7df757 | 7 | |
466de688 | 8 | |
a4a554a7 | 9 | class ThisAVIE(InfoExtractor): |
466de688 | 10 | _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' |
584d6f34 | 11 | _TESTS = [{ |
a2e32866 | 12 | # jwplayer |
fa7df757 S |
13 | 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', |
14 | 'md5': '0480f1ef3932d901f0e0e719f188f19b', | |
15 | 'info_dict': { | |
16 | 'id': '47734', | |
17 | 'ext': 'flv', | |
18 | 'title': '高樹マリア - Just fit', | |
19 | 'uploader': 'dj7970', | |
20 | 'uploader_id': 'dj7970' | |
466de688 | 21 | } |
584d6f34 | 22 | }, { |
a2e32866 | 23 | # html5 media |
584d6f34 YCH |
24 | 'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html', |
25 | 'md5': 'ba90c076bd0f80203679e5b60bf523ee', | |
26 | 'info_dict': { | |
27 | 'id': '242352', | |
28 | 'ext': 'mp4', | |
29 | 'title': 'Nerdy 18yo Big Ass Tattoos and Glasses', | |
30 | 'uploader': 'cybersluts', | |
31 | 'uploader_id': 'cybersluts', | |
32 | }, | |
33 | }] | |
466de688 PH |
34 | |
35 | def _real_extract(self, url): | |
5ad28e7f | 36 | mobj = self._match_valid_url(url) |
466de688 PH |
37 | |
38 | video_id = mobj.group('id') | |
39 | webpage = self._download_webpage(url, video_id) | |
d8dbf870 YCH |
40 | title = remove_end(self._html_search_regex( |
41 | r'<title>([^<]+)</title>', webpage, 'title'), | |
42 | ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站') | |
466de688 | 43 | video_url = self._html_search_regex( |
584d6f34 YCH |
44 | r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) |
45 | if video_url: | |
46 | info_dict = { | |
47 | 'formats': [{ | |
48 | 'url': video_url, | |
49 | }], | |
50 | } | |
51 | else: | |
a2e32866 S |
52 | entries = self._parse_html5_media_entries(url, webpage, video_id) |
53 | if entries: | |
54 | info_dict = entries[0] | |
55 | else: | |
56 | info_dict = self._extract_jwplayer_data( | |
57 | webpage, video_id, require_title=False) | |
466de688 | 58 | uploader = self._html_search_regex( |
197224b7 | 59 | r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', |
fa7df757 | 60 | webpage, 'uploader name', fatal=False) |
466de688 | 61 | uploader_id = self._html_search_regex( |
197224b7 | 62 | r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', |
fa7df757 | 63 | webpage, 'uploader id', fatal=False) |
5f6a1245 | 64 | |
584d6f34 | 65 | info_dict.update({ |
8bcc8756 | 66 | 'id': video_id, |
8bcc8756 | 67 | 'uploader': uploader, |
466de688 | 68 | 'uploader_id': uploader_id, |
8bcc8756 | 69 | 'title': title, |
584d6f34 YCH |
70 | }) |
71 | ||
72 | return info_dict |