]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/xhamster.py
[extractors] Use new framework for existing embeds (#4307)
[yt-dlp.git] / yt_dlp / extractor / xhamster.py
index 1c93989868e7e8fd03dacc5011930f4610c647df..688c6b952272954359e530ff2eecd2ce2e0a168b 100644 (file)
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
 import itertools
 import re
 
@@ -23,7 +21,7 @@
 
 
 class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:.+?\.)?%s/
@@ -34,7 +32,7 @@ class XHamsterIE(InfoExtractor):
                     ''' % _DOMAINS
     _TESTS = [{
         'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
-        'md5': '98b4687efb1ffd331c4197854dc09e8f',
+        'md5': '34e1ab926db5dc2750fed9e1f34304bb',
         'info_dict': {
             'id': '1509445',
             'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@@ -43,6 +41,7 @@ class XHamsterIE(InfoExtractor):
             'timestamp': 1350194821,
             'upload_date': '20121014',
             'uploader': 'Ruseful2011',
+            'uploader_id': 'ruseful2011',
             'duration': 893,
             'age_limit': 18,
         },
@@ -72,6 +71,7 @@ class XHamsterIE(InfoExtractor):
             'timestamp': 1454948101,
             'upload_date': '20160208',
             'uploader': 'parejafree',
+            'uploader_id': 'parejafree',
             'duration': 72,
             'age_limit': 18,
         },
@@ -117,10 +117,13 @@ class XHamsterIE(InfoExtractor):
     }, {
         'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
         'only_matching': True,
+    }, {
+        'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
         video_id = mobj.group('id') or mobj.group('id_2')
         display_id = mobj.group('display_id') or mobj.group('display_id_2')
 
@@ -245,6 +248,7 @@ def get_height(s):
             else:
                 categories = None
 
+            uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
             return {
                 'id': video_id,
                 'display_id': display_id,
@@ -253,6 +257,8 @@ def get_height(s):
                 'timestamp': int_or_none(video.get('created')),
                 'uploader': try_get(
                     video, lambda x: x['author']['name'], compat_str),
+                'uploader_url': uploader_url,
+                'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
                 'thumbnail': video.get('thumbURL'),
                 'duration': int_or_none(video.get('duration')),
                 'view_count': int_or_none(video.get('views')),
@@ -261,7 +267,7 @@ def get_height(s):
                 'dislike_count': int_or_none(try_get(
                     video, lambda x: x['rating']['dislikes'], int)),
                 'comment_count': int_or_none(video.get('views')),
-                'age_limit': age_limit,
+                'age_limit': age_limit if age_limit is not None else 18,
                 'categories': categories,
                 'formats': formats,
             }
@@ -352,6 +358,7 @@ def get_height(s):
             'description': description,
             'upload_date': upload_date,
             'uploader': uploader,
+            'uploader_id': uploader.lower() if uploader else None,
             'thumbnail': thumbnail,
             'duration': duration,
             'view_count': view_count,
@@ -366,6 +373,7 @@ def get_height(s):
 
 class XHamsterEmbedIE(InfoExtractor):
     _VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
+    _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1']
     _TEST = {
         'url': 'http://xhamster.com/xembed.php?video=3328539',
         'info_dict': {
@@ -380,12 +388,6 @@ class XHamsterEmbedIE(InfoExtractor):
         }
     }
 
-    @staticmethod
-    def _extract_urls(webpage):
-        return [url for _, url in re.findall(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
-            webpage)]
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -420,6 +422,9 @@ class XHamsterUserIE(InfoExtractor):
             'id': 'firatkaan',
         },
         'playlist_mincount': 1,
+    }, {
+        'url': 'https://xhday.com/users/mobhunter',
+        'only_matching': True,
     }]
 
     def _entries(self, user_id):