-# coding: utf-8
-from __future__ import unicode_literals
-
import functools
import itertools
import math
import operator
import re
+import urllib.request
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_request,
-)
from .openload import PhantomJSwrapper
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
+ format_field,
int_or_none,
merge_dicts,
- NO_DEFAULT,
orderedSet,
remove_quotes,
+ remove_start,
str_to_int,
update_url_query,
- urlencode_postdata,
url_or_none,
+ urlencode_postdata,
)
class PornHubBaseIE(InfoExtractor):
_NETRC_MACHINE = 'pornhub'
- _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
+ _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs):
r'document\.location\.reload\(true\)')):
url_or_request = args[0]
url = (url_or_request.get_full_url()
- if isinstance(url_or_request, compat_urllib_request.Request)
+ if isinstance(url_or_request, urllib.request.Request)
else url_or_request)
phantom = PhantomJSwrapper(self, required_version='2.0')
phantom.get(url, html=webpage)
'skip_download': True,
},
'skip': 'This video has been disabled',
+ }, {
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
+ 'info_dict': {
+ 'id': 'ph601dc30bae19a',
+ 'uploader': 'Projekt Melody',
+ 'uploader_id': 'projekt-melody',
+ 'upload_date': '20210205',
+ 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
+ 'thumbnail': r're:https?://.+',
+ },
}, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}, {
- 'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
+ 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}]
webpage)
def _extract_count(self, pattern, webpage, name):
- return str_to_int(self._search_regex(
- pattern, webpage, '%s count' % name, fatal=False))
+ return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None))
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host = mobj.group('host') or 'pornhub.com'
video_id = mobj.group('id')
default=None))
formats.append({
'url': format_url,
- 'format_id': '%dp' % height if height else None,
+ 'format_id': format_field(height, None, '%dp'),
'height': height,
})
self._sort_formats(
formats, field_preference=('height', 'width', 'fps', 'format_id'))
+ model_profile = self._search_json(
+ r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
video_uploader = self._html_search_regex(
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
- webpage, 'uploader', default=None)
+ webpage, 'uploader', default=None) or model_profile.get('username')
def extract_vote_count(kind, name):
return self._extract_count(
return merge_dicts({
'id': video_id,
'uploader': video_uploader,
+ 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'),
'upload_date': upload_date,
'title': title,
'thumbnail': thumbnail,
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
'only_matching': True,
}, {
- 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
+ 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph',
'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
user_id = mobj.group('id')
videos_url = '%s/videos' % mobj.group('url')
page = self._extract_page(url)
break
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host = mobj.group('host')
item_id = mobj.group('id')
'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
'only_matching': True,
}, {
- 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
+ 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos',
'only_matching': True,
}]
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
'only_matching': True,
}, {
- 'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
+ 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload',
'only_matching': True,
}]
yield e
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
host = mobj.group('host')
item_id = mobj.group('id')