]>
Commit | Line | Data |
---|---|---|
850837b6 S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..compat import compat_urlparse | |
7 | from ..utils import ( | |
8 | int_or_none, | |
9 | js_to_json, | |
10 | parse_filesize, | |
11 | str_to_int, | |
12 | ) | |
13 | ||
14 | ||
15 | class PornComIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)' | |
17 | _TESTS = [{ | |
18 | 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339', | |
19 | 'md5': '3f30ce76267533cd12ba999263156de7', | |
20 | 'info_dict': { | |
21 | 'id': '2603339', | |
22 | 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec', | |
23 | 'ext': 'mp4', | |
24 | 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec', | |
ec85ded8 | 25 | 'thumbnail': r're:^https?://.*\.jpg$', |
850837b6 S |
26 | 'duration': 551, |
27 | 'view_count': int, | |
28 | 'age_limit': 18, | |
7a3e849f S |
29 | 'categories': list, |
30 | 'tags': list, | |
850837b6 S |
31 | }, |
32 | }, { | |
33 | 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', | |
34 | 'only_matching': True, | |
35 | }] | |
36 | ||
37 | def _real_extract(self, url): | |
38 | mobj = re.match(self._VALID_URL, url) | |
39 | video_id = mobj.group('id') | |
40 | display_id = mobj.group('display_id') or video_id | |
41 | ||
42 | webpage = self._download_webpage(url, display_id) | |
43 | ||
44 | config = self._parse_json( | |
45 | self._search_regex( | |
46 | r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=', | |
47 | webpage, 'config', default='{}'), | |
48 | display_id, transform_source=js_to_json, fatal=False) | |
49 | ||
50 | if config: | |
51 | title = config['title'] | |
52 | formats = [{ | |
53 | 'url': stream['url'], | |
54 | 'format_id': stream.get('id'), | |
55 | 'height': int_or_none(self._search_regex( | |
56 | r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None)) | |
57 | } for stream in config['streams'] if stream.get('url')] | |
58 | thumbnail = (compat_urlparse.urljoin( | |
59 | config['thumbCDN'], config['poster']) | |
60 | if config.get('thumbCDN') and config.get('poster') else None) | |
61 | duration = int_or_none(config.get('length')) | |
62 | else: | |
63 | title = self._search_regex( | |
64 | (r'<title>([^<]+)</title>', r'<h1[^>]*>([^<]+)</h1>'), | |
65 | webpage, 'title') | |
66 | formats = [{ | |
67 | 'url': compat_urlparse.urljoin(url, format_url), | |
68 | 'format_id': '%sp' % height, | |
69 | 'height': int(height), | |
70 | 'filesize_approx': parse_filesize(filesize), | |
71 | } for format_url, height, filesize in re.findall( | |
72 | r'<a[^>]+href="(/download/[^"]+)">MPEG4 (\d+)p<span[^>]*>(\d+\s+[a-zA-Z]+)<', | |
73 | webpage)] | |
74 | thumbnail = None | |
75 | duration = None | |
76 | ||
77 | self._sort_formats(formats) | |
78 | ||
79 | view_count = str_to_int(self._search_regex( | |
61d18c8a S |
80 | (r'Views:\s*</span>\s*<span>\s*([\d,.]+)', |
81 | r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage, | |
7a3e849f S |
82 | 'view count', fatal=False)) |
83 | ||
84 | def extract_list(kind): | |
85 | s = self._search_regex( | |
61d18c8a S |
86 | (r'(?s)%s:\s*</span>\s*<span>(.+?)</span>' % kind.capitalize(), |
87 | r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize()), | |
7a3e849f S |
88 | webpage, kind, fatal=False) |
89 | return re.findall(r'<a[^>]+>([^<]+)</a>', s or '') | |
850837b6 S |
90 | |
91 | return { | |
92 | 'id': video_id, | |
93 | 'display_id': display_id, | |
94 | 'title': title, | |
95 | 'thumbnail': thumbnail, | |
96 | 'duration': duration, | |
97 | 'view_count': view_count, | |
98 | 'formats': formats, | |
99 | 'age_limit': 18, | |
7a3e849f S |
100 | 'categories': extract_list('categories'), |
101 | 'tags': extract_list('tags'), | |
850837b6 | 102 | } |