]>
Commit | Line | Data |
---|---|---|
47408645 C |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
1723edb1 C |
5 | import re |
6 | import datetime | |
47408645 C |
7 | |
8 | class SexyKarmaIE(InfoExtractor): | |
1723edb1 | 9 | _VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/.+\-(?P<id>[a-zA-Z0-9\-]+)(.html)' |
47408645 C |
10 | _TESTS = [{ |
11 | 'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html', | |
12 | 'md5': 'b9798e7d1ef1765116a8f516c8091dbd', | |
13 | 'info_dict': { | |
1723edb1 | 14 | 'id': 'yHI70cOyIHt', |
47408645 C |
15 | 'ext': 'mp4', |
16 | 'title': 'Taking a quick pee.', | |
1723edb1 C |
17 | 'uploader_id': 'wildginger7', |
18 | 'thumbnail': 're:^https?://.*\.jpg$', | |
19 | 'duration': int, | |
20 | 'view_count': int, | |
21 | 'upload_date': '20141007', | |
47408645 C |
22 | } |
23 | }, { | |
24 | 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html', | |
25 | 'md5': 'dd216c68d29b49b12842b9babe762a5d', | |
26 | 'info_dict': { | |
1723edb1 | 27 | 'id': '8Id6EZPbuHf', |
47408645 C |
28 | 'ext': 'mp4', |
29 | 'title': 'pot_pixie tribute', | |
1723edb1 C |
30 | 'uploader_id': 'banffite', |
31 | 'thumbnail': 're:^https?://.*\.jpg$', | |
32 | 'duration': int, | |
33 | 'view_count': int, | |
34 | 'upload_date': '20141013', | |
47408645 C |
35 | } |
36 | }] | |
37 | ||
38 | def _real_extract(self, url): | |
39 | video_id = self._match_id(url) | |
40 | ||
47408645 | 41 | webpage = self._download_webpage(url, video_id) |
1723edb1 | 42 | |
47408645 | 43 | title = self._html_search_regex(r'<h2 class="he2"><span>(.*?)</span>', webpage, 'title') |
1723edb1 | 44 | uploader_id = self._html_search_regex(r'class="aupa">\n*(.*?)</a>', webpage, 'uploader') |
47408645 | 45 | url = self._html_search_regex(r'<p><a href="(.*?)" ?\n*target="_blank"><font color', webpage, 'url') |
1723edb1 C |
46 | thumbnail = self._html_search_regex(r'<div id="player" style="z-index:1;"> <span id="edge"></span> <span id="container"><img[\n ]*src="(.+?)"', webpage, 'thumbnail') |
47 | ||
48 | str_duration = self._html_search_regex(r'<tr>[\n\s]*<td>Time: </td>[\n\s]*<td align="right"><span>(.+)\n*', webpage, 'duration') | |
49 | duration = self._to_seconds(str_duration) | |
50 | ||
51 | str_views = self._html_search_regex(r'<tr>[\n\s]*<td>Views: </td>[\n\s]*<td align="right"><span>(.+)</span>', webpage, 'view_count') | |
52 | view_count = int(str_views) | |
53 | # print view_count | |
54 | ||
55 | date = self._html_search_regex(r'class="aup">Added: <strong>(.*?)</strong>', webpage, 'date') | |
56 | d = datetime.datetime.strptime(date, '%B %d, %Y') | |
57 | upload_date = d.strftime('%Y%m%d') | |
47408645 | 58 | |
7da224c9 C |
59 | categories = re.findall(r'http://www.sexykarma.com/gonewild/search/video/(?:.+?)"><span>(.*?)</span>', webpage) |
60 | ||
47408645 C |
61 | return { |
62 | 'id': video_id, | |
63 | 'title': title, | |
1723edb1 C |
64 | 'uploader_id': uploader_id, |
65 | 'url': url, | |
66 | 'thumbnail': thumbnail, | |
67 | 'duration': duration, | |
68 | 'view_count': view_count, | |
69 | 'upload_date': upload_date, | |
7da224c9 | 70 | 'categories': categories, |
47408645 | 71 | } |
1723edb1 C |
72 | |
73 | def _to_seconds(self, timestr): | |
74 | seconds= 0 | |
75 | for part in timestr.split(':'): | |
76 | seconds= seconds*60 + int(part) | |
77 | return seconds |