]>
Commit | Line | Data |
---|---|---|
9933b574 PH |
1 | from __future__ import unicode_literals |
2 | ||
125cfd78 | 3 | import os |
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
1cc79574 PH |
7 | from ..compat import ( |
8 | compat_urllib_parse, | |
125cfd78 | 9 | compat_urllib_parse_urlparse, |
10 | compat_urllib_request, | |
1cc79574 PH |
11 | ) |
12 | from ..utils import ( | |
50789175 | 13 | ExtractorError, |
0320ddc1 | 14 | str_to_int, |
125cfd78 | 15 | ) |
16 | from ..aes import ( | |
17 | aes_decrypt_text | |
18 | ) | |
19 | ||
9933b574 | 20 | |
125cfd78 | 21 | class PornHubIE(InfoExtractor): |
9fcbd5db | 22 | _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-f]+)' |
125cfd78 | 23 | _TEST = { |
9933b574 | 24 | 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', |
9933b574 PH |
25 | 'md5': '882f488fa1f0026f023f33576004a2ed', |
26 | 'info_dict': { | |
249efaf4 PH |
27 | 'id': '648719015', |
28 | 'ext': 'mp4', | |
29 | "uploader": "Babes", | |
9933b574 PH |
30 | "title": "Seductive Indian beauty strips down and fingers her pink pussy", |
31 | "age_limit": 18 | |
125cfd78 | 32 | } |
33 | } | |
34 | ||
65d161c4 S |
35 | @classmethod |
36 | def _extract_url(cls, webpage): | |
37 | mobj = re.search( | |
38 | r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage) | |
39 | if mobj: | |
40 | return mobj.group('url') | |
41 | ||
0320ddc1 | 42 | def _extract_count(self, pattern, webpage, name): |
7700207e S |
43 | return str_to_int(self._search_regex( |
44 | pattern, webpage, '%s count' % name, fatal=False)) | |
0320ddc1 | 45 | |
125cfd78 | 46 | def _real_extract(self, url): |
249efaf4 | 47 | video_id = self._match_id(url) |
125cfd78 | 48 | |
9fcbd5db S |
49 | req = compat_urllib_request.Request( |
50 | 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id) | |
125cfd78 | 51 | req.add_header('Cookie', 'age_verified=1') |
52 | webpage = self._download_webpage(req, video_id) | |
53 | ||
50789175 PH |
54 | error_msg = self._html_search_regex( |
55 | r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>', | |
56 | webpage, 'error message', default=None) | |
57 | if error_msg: | |
58 | error_msg = re.sub(r'\s+', ' ', error_msg) | |
59 | raise ExtractorError( | |
60 | 'PornHub said: %s' % error_msg, | |
61 | expected=True, video_id=video_id) | |
62 | ||
9933b574 | 63 | video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') |
0320ddc1 | 64 | video_uploader = self._html_search_regex( |
8fc642eb | 65 | r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', |
0320ddc1 | 66 | webpage, 'uploader', fatal=False) |
9933b574 | 67 | thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) |
125cfd78 | 68 | if thumbnail: |
69 | thumbnail = compat_urllib_parse.unquote(thumbnail) | |
70 | ||
7700207e S |
71 | view_count = self._extract_count( |
72 | r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') | |
73 | like_count = self._extract_count( | |
74 | r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') | |
75 | dislike_count = self._extract_count( | |
76 | r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') | |
0320ddc1 | 77 | comment_count = self._extract_count( |
7700207e | 78 | r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') |
0320ddc1 | 79 | |
5f6a1245 | 80 | video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) |
125cfd78 | 81 | if webpage.find('"encrypted":true') != -1: |
7a372b64 S |
82 | password = compat_urllib_parse.unquote_plus( |
83 | self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) | |
125cfd78 | 84 | video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) |
85 | ||
86 | formats = [] | |
87 | for video_url in video_urls: | |
a56f9de1 JMF |
88 | path = compat_urllib_parse_urlparse(video_url).path |
89 | extension = os.path.splitext(path)[1][1:] | |
125cfd78 | 90 | format = path.split('/')[5].split('_')[:2] |
a56f9de1 | 91 | format = "-".join(format) |
9933b574 PH |
92 | |
93 | m = re.match(r'^(?P<height>[0-9]+)P-(?P<tbr>[0-9]+)K$', format) | |
94 | if m is None: | |
95 | height = None | |
96 | tbr = None | |
97 | else: | |
98 | height = int(m.group('height')) | |
99 | tbr = int(m.group('tbr')) | |
100 | ||
125cfd78 | 101 | formats.append({ |
102 | 'url': video_url, | |
103 | 'ext': extension, | |
104 | 'format': format, | |
105 | 'format_id': format, | |
9933b574 PH |
106 | 'tbr': tbr, |
107 | 'height': height, | |
125cfd78 | 108 | }) |
9933b574 | 109 | self._sort_formats(formats) |
125cfd78 | 110 | |
111 | return { | |
112 | 'id': video_id, | |
113 | 'uploader': video_uploader, | |
114 | 'title': video_title, | |
115 | 'thumbnail': thumbnail, | |
0320ddc1 S |
116 | 'view_count': view_count, |
117 | 'like_count': like_count, | |
118 | 'dislike_count': dislike_count, | |
119 | 'comment_count': comment_count, | |
125cfd78 | 120 | 'formats': formats, |
750e9833 | 121 | 'age_limit': 18, |
125cfd78 | 122 | } |
e66e1a00 S |
123 | |
124 | ||
125 | class PornHubPlaylistIE(InfoExtractor): | |
126 | _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' | |
127 | _TESTS = [{ | |
128 | 'url': 'http://www.pornhub.com/playlist/6201671', | |
129 | 'info_dict': { | |
130 | 'id': '6201671', | |
131 | 'title': 'P0p4', | |
132 | }, | |
133 | 'playlist_mincount': 35, | |
134 | }] | |
135 | ||
136 | def _real_extract(self, url): | |
137 | playlist_id = self._match_id(url) | |
138 | ||
139 | webpage = self._download_webpage(url, playlist_id) | |
140 | ||
141 | entries = [ | |
142 | self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') | |
143 | for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage)) | |
144 | ] | |
145 | ||
146 | playlist = self._parse_json( | |
147 | self._search_regex( | |
148 | r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), | |
149 | playlist_id) | |
150 | ||
151 | return self.playlist_result( | |
152 | entries, playlist_id, playlist.get('title'), playlist.get('description')) |