1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
13 class XHamsterIE(InfoExtractor
):
14 """Information Extractor for xHamster"""
15 _VALID_URL
= r
'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
17 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
18 'file': '1509445.mp4',
19 'md5': '8281348b8d3c53d39fffb377d24eac4e',
21 "upload_date": "20121014",
22 "uploader_id": "Ruseful2011",
23 "title": "FemaleAgent Shy beauty takes the bait",
28 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
29 'file': '2221348.flv',
30 'md5': 'e767b9475de189320f691f49c679c4c7',
32 "upload_date": "20130914",
33 "uploader_id": "jojo747400",
34 "title": "Britney Spears Sexy Booty",
39 def _real_extract(self
,url
):
40 def extract_video_url(webpage
):
41 mobj
= re
.search(r
'\'srv
\': \'(?P
<server
>[^
\']*)\',\s
*\'file\': \'(?P
<file>[^
\']+)\',', webpage)
43 raise ExtractorError('Unable to extract media URL
')
44 if len(mobj.group('server
')) == 0:
45 return compat_urllib_parse.unquote(mobj.group('file'))
47 return mobj.group('server
')+'/key
='+mobj.group('file')
49 def extract_mp4_video_url(webpage):
50 mp4 = re.search(r'<a href
=\"(.+?
)\" class=\"mp4Play
\"',webpage)
57 return '<div
class=\'icon iconHD
\'' in webpage
59 mobj = re.match(self._VALID_URL, url)
61 video_id = mobj.group('id')
62 seo = mobj.group('seo
')
63 mrss_url = 'http
://xhamster
.com
/movies
/%s/%s.html
' % (video_id, seo)
64 webpage = self._download_webpage(mrss_url, video_id)
66 video_title = self._html_search_regex(
67 r'<title
>(?P
<title
>.+?
) - xHamster\
.com
</title
>', webpage, 'title
')
69 # Only a few videos have an description
70 mobj = re.search(r'<span
>Description
: </span
>([^
<]+)', webpage)
71 video_description = mobj.group(1) if mobj else None
73 mobj = re.search(r'hint
=\'(?P
<upload_date_Y
>[0-9]{4}
)-(?P
<upload_date_m
>[0-9]{2}
)-(?P
<upload_date_d
>[0-9]{2}
) [0-9]{2}
:[0-9]{2}
:[0-9]{2}
[A
-Z
]{3,4}
\'', webpage)
75 video_upload_date = mobj.group('upload_date_Y
')+mobj.group('upload_date_m
')+mobj.group('upload_date_d
')
77 video_upload_date = None
78 self._downloader.report_warning('Unable to extract upload date
')
80 video_uploader_id = self._html_search_regex(
81 r'<a href
=\'/user
/[^
>]+>(?P
<uploader_id
>[^
<]+)',
82 webpage, 'uploader
id', default='anonymous
')
84 video_thumbnail = self._search_regex(
85 r'\'image
\':\'(?P
<thumbnail
>[^
\']+)\'',
86 webpage, 'thumbnail
', fatal=False)
88 age_limit = self._rta_search(webpage)
91 video_url = extract_video_url(webpage)
94 'format_id
': 'hd
' if hd else 'sd
',
98 video_mp4_url = extract_mp4_video_url(webpage)
99 if video_mp4_url is not None:
101 'url
': video_mp4_url,
103 'format_id
': 'mp4
-hd
' if hd else 'mp4
-sd
',
108 webpage = self._download_webpage(
109 mrss_url + '?hd
', video_id, note='Downloading HD webpage
')
111 video_url = extract_video_url(webpage)
118 self._sort_formats(formats)
122 'title
': video_title,
124 'description
': video_description,
125 'upload_date
': video_upload_date,
126 'uploader_id
': video_uploader_id,
127 'thumbnail
': video_thumbnail,
128 'age_limit
': age_limit,