import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
ExtractorError,
clean_html,
class XHamsterIE(InfoExtractor):
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
- _VALID_URL = r'''(?x)
+ _VALID_URL = rf'''(?x)
https?://
- (?:[^/?#]+\.)?%s/
+ (?:[^/?#]+\.)?{_DOMAINS}/
(?:
movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
)
- ''' % _DOMAINS
+ '''
_TESTS = [{
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'md5': '34e1ab926db5dc2750fed9e1f34304bb',
continue
format_urls.add(format_url)
formats.append({
- 'format_id': '%s-%s' % (format_id, quality),
+ 'format_id': f'{format_id}-{quality}',
'url': format_url,
'ext': determine_ext(format_url, 'mp4'),
'height': get_height(quality),
or str_or_none(standard_format.get('label'))
or '')
formats.append({
- 'format_id': '%s-%s' % (format_id, quality),
+ 'format_id': f'{format_id}-{quality}',
'url': standard_url,
'ext': ext,
'height': get_height(quality),
if not isinstance(c, dict):
continue
c_name = c.get('name')
- if isinstance(c_name, compat_str):
+ if isinstance(c_name, str):
categories.append(c_name)
else:
categories = None
'description': video.get('description'),
'timestamp': int_or_none(video.get('created')),
'uploader': try_get(
- video, lambda x: x['author']['name'], compat_str),
+ video, lambda x: x['author']['name'], str),
'uploader_url': uploader_url,
'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
'thumbnail': video.get('thumbURL'),
class XHamsterEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/?#]+\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
+ _VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/xembed\.php\?video=(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1']
_TEST = {
'url': 'http://xhamster.com/xembed.php?video=3328539',
'uploader': 'ManyakisArt',
'duration': 5,
'age_limit': 18,
- }
+ },
}
def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
- r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
+ rf'href="(https?://xhamster\.com/(?:movies/{video_id}/[^"]*\.html|videos/[^/]*-{video_id})[^"]*)"',
webpage, 'xhamster url', default=None)
if not video_url:
- vars = self._parse_json(
+ player_vars = self._parse_json(
self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
video_id)
- video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
+ video_url = dict_get(player_vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
return self.url_result(video_url, 'XHamster')
next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1'
for pagenum in itertools.count(1):
page = self._download_webpage(
- next_page_url, user_id, 'Downloading page %s' % pagenum)
+ next_page_url, user_id, f'Downloading page {pagenum}')
for video_tag in re.findall(
r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
page):