]>
Commit | Line | Data |
---|---|---|
9933b574 PH |
1 | from __future__ import unicode_literals |
2 | ||
125cfd78 | 3 | import os |
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
1cc79574 PH |
7 | from ..compat import ( |
8 | compat_urllib_parse, | |
125cfd78 | 9 | compat_urllib_parse_urlparse, |
10 | compat_urllib_request, | |
1cc79574 PH |
11 | ) |
12 | from ..utils import ( | |
50789175 | 13 | ExtractorError, |
0320ddc1 | 14 | str_to_int, |
125cfd78 | 15 | ) |
16 | from ..aes import ( | |
17 | aes_decrypt_text | |
18 | ) | |
19 | ||
9933b574 | 20 | |
125cfd78 | 21 | class PornHubIE(InfoExtractor): |
1cc79574 | 22 | _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)' |
125cfd78 | 23 | _TEST = { |
9933b574 | 24 | 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', |
9933b574 PH |
25 | 'md5': '882f488fa1f0026f023f33576004a2ed', |
26 | 'info_dict': { | |
249efaf4 PH |
27 | 'id': '648719015', |
28 | 'ext': 'mp4', | |
29 | "uploader": "Babes", | |
9933b574 PH |
30 | "title": "Seductive Indian beauty strips down and fingers her pink pussy", |
31 | "age_limit": 18 | |
125cfd78 | 32 | } |
33 | } | |
34 | ||
0320ddc1 S |
35 | def _extract_count(self, pattern, webpage, name): |
36 | count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False) | |
37 | if count: | |
38 | count = str_to_int(count) | |
39 | return count | |
40 | ||
125cfd78 | 41 | def _real_extract(self, url): |
249efaf4 | 42 | video_id = self._match_id(url) |
125cfd78 | 43 | |
44 | req = compat_urllib_request.Request(url) | |
45 | req.add_header('Cookie', 'age_verified=1') | |
46 | webpage = self._download_webpage(req, video_id) | |
47 | ||
50789175 PH |
48 | error_msg = self._html_search_regex( |
49 | r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>', | |
50 | webpage, 'error message', default=None) | |
51 | if error_msg: | |
52 | error_msg = re.sub(r'\s+', ' ', error_msg) | |
53 | raise ExtractorError( | |
54 | 'PornHub said: %s' % error_msg, | |
55 | expected=True, video_id=video_id) | |
56 | ||
9933b574 | 57 | video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') |
0320ddc1 | 58 | video_uploader = self._html_search_regex( |
8fc642eb | 59 | r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', |
0320ddc1 | 60 | webpage, 'uploader', fatal=False) |
9933b574 | 61 | thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) |
125cfd78 | 62 | if thumbnail: |
63 | thumbnail = compat_urllib_parse.unquote(thumbnail) | |
64 | ||
0320ddc1 S |
65 | view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') |
66 | like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') | |
67 | dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') | |
68 | comment_count = self._extract_count( | |
69 | r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment') | |
70 | ||
5f6a1245 | 71 | video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) |
125cfd78 | 72 | if webpage.find('"encrypted":true') != -1: |
ee95c093 | 73 | password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password')) |
125cfd78 | 74 | video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) |
75 | ||
76 | formats = [] | |
77 | for video_url in video_urls: | |
a56f9de1 JMF |
78 | path = compat_urllib_parse_urlparse(video_url).path |
79 | extension = os.path.splitext(path)[1][1:] | |
125cfd78 | 80 | format = path.split('/')[5].split('_')[:2] |
a56f9de1 | 81 | format = "-".join(format) |
9933b574 PH |
82 | |
83 | m = re.match(r'^(?P<height>[0-9]+)P-(?P<tbr>[0-9]+)K$', format) | |
84 | if m is None: | |
85 | height = None | |
86 | tbr = None | |
87 | else: | |
88 | height = int(m.group('height')) | |
89 | tbr = int(m.group('tbr')) | |
90 | ||
125cfd78 | 91 | formats.append({ |
92 | 'url': video_url, | |
93 | 'ext': extension, | |
94 | 'format': format, | |
95 | 'format_id': format, | |
9933b574 PH |
96 | 'tbr': tbr, |
97 | 'height': height, | |
125cfd78 | 98 | }) |
9933b574 | 99 | self._sort_formats(formats) |
125cfd78 | 100 | |
101 | return { | |
102 | 'id': video_id, | |
103 | 'uploader': video_uploader, | |
104 | 'title': video_title, | |
105 | 'thumbnail': thumbnail, | |
0320ddc1 S |
106 | 'view_count': view_count, |
107 | 'like_count': like_count, | |
108 | 'dislike_count': dislike_count, | |
109 | 'comment_count': comment_count, | |
125cfd78 | 110 | 'formats': formats, |
750e9833 | 111 | 'age_limit': 18, |
125cfd78 | 112 | } |
e66e1a00 S |
113 | |
114 | ||
115 | class PornHubPlaylistIE(InfoExtractor): | |
116 | _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' | |
117 | _TESTS = [{ | |
118 | 'url': 'http://www.pornhub.com/playlist/6201671', | |
119 | 'info_dict': { | |
120 | 'id': '6201671', | |
121 | 'title': 'P0p4', | |
122 | }, | |
123 | 'playlist_mincount': 35, | |
124 | }] | |
125 | ||
126 | def _real_extract(self, url): | |
127 | playlist_id = self._match_id(url) | |
128 | ||
129 | webpage = self._download_webpage(url, playlist_id) | |
130 | ||
131 | entries = [ | |
132 | self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') | |
133 | for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage)) | |
134 | ] | |
135 | ||
136 | playlist = self._parse_json( | |
137 | self._search_regex( | |
138 | r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), | |
139 | playlist_id) | |
140 | ||
141 | return self.playlist_result( | |
142 | entries, playlist_id, playlist.get('title'), playlist.get('description')) |