]>
Commit | Line | Data |
---|---|---|
e299f6d2 PH |
1 | from __future__ import unicode_literals |
2 | ||
8e05c870 | 3 | import re |
4b9cced1 | 4 | import json |
8e05c870 MO |
5 | |
6 | from .common import InfoExtractor | |
842cca7d S |
7 | from ..utils import ( |
8 | int_or_none, | |
9 | js_to_json, | |
10 | qualities, | |
842cca7d | 11 | ) |
8e05c870 | 12 | |
6f5dcd4e | 13 | |
8e05c870 | 14 | class PornHdIE(InfoExtractor): |
842cca7d | 15 | _VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' |
8e05c870 | 16 | _TEST = { |
e299f6d2 | 17 | 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', |
65a40ab8 | 18 | 'md5': '956b8ca569f7f4d8ec563e2c41598441', |
e299f6d2 | 19 | 'info_dict': { |
4b9cced1 | 20 | 'id': '1962', |
842cca7d | 21 | 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video', |
4b9cced1 S |
22 | 'ext': 'mp4', |
23 | 'title': 'Sierra loves doing laundry', | |
24 | 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', | |
842cca7d S |
25 | 'thumbnail': 're:^https?://.*\.jpg', |
26 | 'view_count': int, | |
4b9cced1 | 27 | 'age_limit': 18, |
8e05c870 MO |
28 | } |
29 | } | |
30 | ||
31 | def _real_extract(self, url): | |
32 | mobj = re.match(self._VALID_URL, url) | |
4b9cced1 | 33 | video_id = mobj.group('id') |
842cca7d | 34 | display_id = mobj.group('display_id') |
8e05c870 | 35 | |
842cca7d | 36 | webpage = self._download_webpage(url, display_id or video_id) |
8e05c870 | 37 | |
ceff3fd8 S |
38 | title = self._html_search_regex( |
39 | r'<title>(.+) porn HD.+?</title>', webpage, 'title') | |
4b9cced1 S |
40 | description = self._html_search_regex( |
41 | r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False) | |
42 | view_count = int_or_none(self._html_search_regex( | |
ceff3fd8 | 43 | r'(\d+) views\s*</span>', webpage, 'view count', fatal=False)) |
842cca7d S |
44 | thumbnail = self._search_regex( |
45 | r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) | |
4b9cced1 | 46 | |
807962f4 PH |
47 | quality = qualities(['sd', 'hd']) |
48 | sources = json.loads(js_to_json(self._search_regex( | |
f7a211dc PH |
49 | r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", |
50 | webpage, 'sources'))) | |
807962f4 | 51 | formats = [] |
f7a211dc PH |
52 | for qname, video_url in sources.items(): |
53 | if not video_url: | |
54 | continue | |
55 | formats.append({ | |
56 | 'url': video_url, | |
57 | 'format_id': qname, | |
58 | 'quality': quality(qname), | |
59 | }) | |
4b9cced1 | 60 | self._sort_formats(formats) |
8e05c870 MO |
61 | |
62 | return { | |
6f5dcd4e | 63 | 'id': video_id, |
842cca7d | 64 | 'display_id': display_id, |
4b9cced1 S |
65 | 'title': title, |
66 | 'description': description, | |
67 | 'thumbnail': thumbnail, | |
68 | 'view_count': view_count, | |
69 | 'formats': formats, | |
70 | 'age_limit': 18, | |
8e05c870 | 71 | } |