]>
Commit | Line | Data |
---|---|---|
850837b6 S |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..compat import compat_urlparse | |
5 | from ..utils import ( | |
6 | int_or_none, | |
7 | js_to_json, | |
8 | parse_filesize, | |
9 | str_to_int, | |
10 | ) | |
11 | ||
12 | ||
13 | class PornComIE(InfoExtractor): | |
14 | _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)' | |
15 | _TESTS = [{ | |
16 | 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339', | |
17 | 'md5': '3f30ce76267533cd12ba999263156de7', | |
18 | 'info_dict': { | |
19 | 'id': '2603339', | |
20 | 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec', | |
ec85ded8 | 23 | 'thumbnail': r're:^https?://.*\.jpg$', |
850837b6 S |
24 | 'duration': 551, |
25 | 'view_count': int, | |
26 | 'age_limit': 18, | |
7a3e849f S |
27 | 'categories': list, |
28 | 'tags': list, | |
850837b6 S |
29 | }, |
30 | }, { | |
31 | 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', | |
32 | 'only_matching': True, | |
33 | }] | |
34 | ||
35 | def _real_extract(self, url): | |
5ad28e7f | 36 | mobj = self._match_valid_url(url) |
850837b6 S |
37 | video_id = mobj.group('id') |
38 | display_id = mobj.group('display_id') or video_id | |
39 | ||
40 | webpage = self._download_webpage(url, display_id) | |
41 | ||
42 | config = self._parse_json( | |
43 | self._search_regex( | |
c3bcd206 S |
44 | (r'=\s*({.+?})\s*;\s*v1ar\b', |
45 | r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='), | |
850837b6 S |
46 | webpage, 'config', default='{}'), |
47 | display_id, transform_source=js_to_json, fatal=False) | |
48 | ||
49 | if config: | |
50 | title = config['title'] | |
51 | formats = [{ | |
52 | 'url': stream['url'], | |
53 | 'format_id': stream.get('id'), | |
54 | 'height': int_or_none(self._search_regex( | |
55 | r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None)) | |
56 | } for stream in config['streams'] if stream.get('url')] | |
57 | thumbnail = (compat_urlparse.urljoin( | |
58 | config['thumbCDN'], config['poster']) | |
59 | if config.get('thumbCDN') and config.get('poster') else None) | |
60 | duration = int_or_none(config.get('length')) | |
61 | else: | |
62 | title = self._search_regex( | |
63 | (r'<title>([^<]+)</title>', r'<h1[^>]*>([^<]+)</h1>'), | |
64 | webpage, 'title') | |
65 | formats = [{ | |
66 | 'url': compat_urlparse.urljoin(url, format_url), | |
67 | 'format_id': '%sp' % height, | |
68 | 'height': int(height), | |
69 | 'filesize_approx': parse_filesize(filesize), | |
70 | } for format_url, height, filesize in re.findall( | |
c3bcd206 | 71 | r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<', |
850837b6 S |
72 | webpage)] |
73 | thumbnail = None | |
74 | duration = None | |
75 | ||
76 | self._sort_formats(formats) | |
77 | ||
78 | view_count = str_to_int(self._search_regex( | |
61d18c8a S |
79 | (r'Views:\s*</span>\s*<span>\s*([\d,.]+)', |
80 | r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage, | |
7a3e849f S |
81 | 'view count', fatal=False)) |
82 | ||
83 | def extract_list(kind): | |
84 | s = self._search_regex( | |
61d18c8a S |
85 | (r'(?s)%s:\s*</span>\s*<span>(.+?)</span>' % kind.capitalize(), |
86 | r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize()), | |
7a3e849f S |
87 | webpage, kind, fatal=False) |
88 | return re.findall(r'<a[^>]+>([^<]+)</a>', s or '') | |
850837b6 S |
89 | |
90 | return { | |
91 | 'id': video_id, | |
92 | 'display_id': display_id, | |
93 | 'title': title, | |
94 | 'thumbnail': thumbnail, | |
95 | 'duration': duration, | |
96 | 'view_count': view_count, | |
97 | 'formats': formats, | |
98 | 'age_limit': 18, | |
7a3e849f S |
99 | 'categories': extract_list('categories'), |
100 | 'tags': extract_list('tags'), | |
850837b6 | 101 | } |