]>
Commit | Line | Data |
---|---|---|
35409e11 PH |
1 | from __future__ import unicode_literals |
2 | ||
cb10cded PH |
3 | import re |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
065c4b27 | 7 | dict_get, |
44731e30 | 8 | float_or_none, |
ccb079ee | 9 | int_or_none, |
44731e30 | 10 | unified_strdate, |
cb10cded PH |
11 | ) |
12 | ||
13 | ||
14 | class XHamsterIE(InfoExtractor): | |
a4690b32 | 15 | _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?' |
6b43132c S |
16 | _TESTS = [{ |
17 | 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', | |
18 | 'md5': '8281348b8d3c53d39fffb377d24eac4e', | |
19 | 'info_dict': { | |
20 | 'id': '1509445', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'FemaleAgent Shy beauty takes the bait', | |
23 | 'upload_date': '20121014', | |
24 | 'uploader': 'Ruseful2011', | |
25 | 'duration': 893.52, | |
26 | 'age_limit': 18, | |
ccb079ee | 27 | }, |
6b43132c S |
28 | }, { |
29 | 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', | |
30 | 'info_dict': { | |
31 | 'id': '2221348', | |
32 | 'ext': 'mp4', | |
33 | 'title': 'Britney Spears Sexy Booty', | |
34 | 'upload_date': '20130914', | |
35 | 'uploader': 'jojo747400', | |
36 | 'duration': 200.48, | |
37 | 'age_limit': 18, | |
5b9aefef | 38 | }, |
6b43132c S |
39 | 'params': { |
40 | 'skip_download': True, | |
a4690b32 | 41 | }, |
6b43132c S |
42 | }, { |
43 | # empty seo | |
44 | 'url': 'http://xhamster.com/movies/5667973/.html', | |
45 | 'info_dict': { | |
46 | 'id': '5667973', | |
47 | 'ext': 'mp4', | |
48 | 'title': '....', | |
49 | 'upload_date': '20160208', | |
50 | 'uploader': 'parejafree', | |
51 | 'duration': 72.0, | |
52 | 'age_limit': 18, | |
5b9aefef | 53 | }, |
6b43132c S |
54 | 'params': { |
55 | 'skip_download': True, | |
56 | }, | |
57 | }, { | |
58 | 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', | |
59 | 'only_matching': True, | |
60 | }] | |
cb10cded | 61 | |
5f6a1245 | 62 | def _real_extract(self, url): |
be7a8379 S |
63 | def extract_video_url(webpage, name): |
64 | return self._search_regex( | |
65 | [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''', | |
66 | r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''', | |
67 | r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''], | |
68 | webpage, name, group='mp4') | |
65d78112 | 69 | |
5d0c9754 | 70 | def is_hd(webpage): |
22ff1c4a | 71 | return '<div class=\'icon iconHD\'' in webpage |
5d0c9754 | 72 | |
cb10cded PH |
73 | mobj = re.match(self._VALID_URL, url) |
74 | ||
75 | video_id = mobj.group('id') | |
1237c9a3 | 76 | seo = mobj.group('seo') |
5b9aefef | 77 | proto = mobj.group('proto') |
78 | mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo) | |
cb10cded PH |
79 | webpage = self._download_webpage(mrss_url, video_id) |
80 | ||
4395ca2e | 81 | title = self._html_search_regex( |
1a6d9284 S |
82 | [r'<h1[^>]*>([^<]+)</h1>', |
83 | r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"', | |
84 | r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'], | |
85 | webpage, 'title') | |
cb10cded | 86 | |
4353cf51 | 87 | # Only a few videos have an description |
22ff1c4a | 88 | mobj = re.search(r'<span>Description: </span>([^<]+)', webpage) |
ccb079ee | 89 | description = mobj.group(1) if mobj else None |
cb10cded | 90 | |
4763b624 S |
91 | upload_date = unified_strdate(self._search_regex( |
92 | r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}', | |
93 | webpage, 'upload date', fatal=False)) | |
cb10cded | 94 | |
3e485224 | 95 | uploader = self._html_search_regex( |
bcac2a07 | 96 | r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+href=["\'].+?xhamster\.com/user/[^>]+>(?P<uploader>.+?)</a>', |
3e485224 | 97 | webpage, 'uploader', default='anonymous') |
cb10cded | 98 | |
251a44b7 S |
99 | thumbnail = self._search_regex( |
100 | [r'''thumb\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''', | |
101 | r'''<video[^>]+poster=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''], | |
c73cdd80 | 102 | webpage, 'thumbnail', fatal=False, group='thumbnail') |
ccb079ee | 103 | |
44731e30 S |
104 | duration = float_or_none(self._search_regex( |
105 | r'(["\'])duration\1\s*:\s*(["\'])(?P<duration>.+?)\2', | |
106 | webpage, 'duration', fatal=False, group='duration')) | |
ccb079ee | 107 | |
6a16fd4a S |
108 | view_count = int_or_none(self._search_regex( |
109 | r'content=["\']User(?:View|Play)s:(\d+)', | |
110 | webpage, 'view count', fatal=False)) | |
ccb079ee S |
111 | |
112 | mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage) | |
113 | (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None) | |
114 | ||
115 | mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage) | |
116 | comment_count = mobj.group('commentcount') if mobj else 0 | |
cb10cded | 117 | |
9d92015d PH |
118 | age_limit = self._rta_search(webpage) |
119 | ||
5d0c9754 | 120 | hd = is_hd(webpage) |
ccb079ee | 121 | |
be7a8379 S |
122 | format_id = 'hd' if hd else 'sd' |
123 | ||
124 | video_url = extract_video_url(webpage, format_id) | |
5d0c9754 | 125 | formats = [{ |
126 | 'url': video_url, | |
5d0c9754 | 127 | 'format_id': 'hd' if hd else 'sd', |
ccb079ee | 128 | 'preference': 1, |
5d0c9754 | 129 | }] |
65d78112 | 130 | |
5d0c9754 | 131 | if not hd: |
b8e1471d | 132 | mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url') |
ccb079ee | 133 | webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage') |
5d0c9754 | 134 | if is_hd(webpage): |
be7a8379 | 135 | video_url = extract_video_url(webpage, 'hd') |
5d0c9754 | 136 | formats.append({ |
137 | 'url': video_url, | |
5d0c9754 | 138 | 'format_id': 'hd', |
22ff1c4a | 139 | 'preference': 2, |
5d0c9754 | 140 | }) |
141 | ||
22ff1c4a PH |
142 | self._sort_formats(formats) |
143 | ||
5d0c9754 | 144 | return { |
145 | 'id': video_id, | |
ccb079ee S |
146 | 'title': title, |
147 | 'description': description, | |
148 | 'upload_date': upload_date, | |
3e485224 | 149 | 'uploader': uploader, |
ccb079ee S |
150 | 'thumbnail': thumbnail, |
151 | 'duration': duration, | |
152 | 'view_count': view_count, | |
153 | 'like_count': int_or_none(like_count), | |
154 | 'dislike_count': int_or_none(dislike_count), | |
155 | 'comment_count': int_or_none(comment_count), | |
9d92015d | 156 | 'age_limit': age_limit, |
ccb079ee | 157 | 'formats': formats, |
5d0c9754 | 158 | } |
0bbba43e S |
159 | |
160 | ||
161 | class XHamsterEmbedIE(InfoExtractor): | |
162 | _VALID_URL = r'https?://(?:www\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)' | |
163 | _TEST = { | |
164 | 'url': 'http://xhamster.com/xembed.php?video=3328539', | |
165 | 'info_dict': { | |
166 | 'id': '3328539', | |
167 | 'ext': 'mp4', | |
168 | 'title': 'Pen Masturbation', | |
169 | 'upload_date': '20140728', | |
170 | 'uploader_id': 'anonymous', | |
171 | 'duration': 5, | |
172 | 'age_limit': 18, | |
173 | } | |
174 | } | |
175 | ||
2bb5b6d0 S |
176 | @staticmethod |
177 | def _extract_urls(webpage): | |
178 | return [url for _, url in re.findall( | |
179 | r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1', | |
180 | webpage)] | |
181 | ||
0bbba43e S |
182 | def _real_extract(self, url): |
183 | video_id = self._match_id(url) | |
184 | ||
185 | webpage = self._download_webpage(url, video_id) | |
186 | ||
187 | video_url = self._search_regex( | |
a4690b32 | 188 | r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id, |
065c4b27 S |
189 | webpage, 'xhamster url', default=None) |
190 | ||
191 | if not video_url: | |
192 | vars = self._parse_json( | |
193 | self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'), | |
194 | video_id) | |
195 | video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl')) | |
0bbba43e | 196 | |
25701d5a | 197 | return self.url_result(video_url, 'XHamster') |