[extractors] Use new framework for existing embeds (#4307)

[yt-dlp.git] / yt_dlp / extractor / xhamster.py
diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py

index 1c93989868e7e8fd03dacc5011930f4610c647df..688c6b952272954359e530ff2eecd2ce2e0a168b 100644 (file)
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
  import itertools
  import re
  
@@ -23,7 +21,7 @@
  
  
  class XHamsterIE(InfoExtractor):
-    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+    _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
      _VALID_URL = r'''(?x)
                      https?://
                          (?:.+?\.)?%s/
@@ -34,7 +32,7 @@ class XHamsterIE(InfoExtractor):
                      ''' % _DOMAINS
      _TESTS = [{
          'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
-        'md5': '98b4687efb1ffd331c4197854dc09e8f',
+        'md5': '34e1ab926db5dc2750fed9e1f34304bb',
          'info_dict': {
              'id': '1509445',
              'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@@ -43,6 +41,7 @@ class XHamsterIE(InfoExtractor):
              'timestamp': 1350194821,
              'upload_date': '20121014',
              'uploader': 'Ruseful2011',
+            'uploader_id': 'ruseful2011',
              'duration': 893,
              'age_limit': 18,
          },
@@ -72,6 +71,7 @@ class XHamsterIE(InfoExtractor):
              'timestamp': 1454948101,
              'upload_date': '20160208',
              'uploader': 'parejafree',
+            'uploader_id': 'parejafree',
              'duration': 72,
              'age_limit': 18,
          },
@@ -117,10 +117,13 @@ class XHamsterIE(InfoExtractor):
      }, {
          'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
          'only_matching': True,
+    }, {
+        'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
          video_id = mobj.group('id') or mobj.group('id_2')
          display_id = mobj.group('display_id') or mobj.group('display_id_2')
  
@@ -245,6 +248,7 @@ def get_height(s):
              else:
                  categories = None
  
+            uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
              return {
                  'id': video_id,
                  'display_id': display_id,
@@ -253,6 +257,8 @@ def get_height(s):
                  'timestamp': int_or_none(video.get('created')),
                  'uploader': try_get(
                      video, lambda x: x['author']['name'], compat_str),
+                'uploader_url': uploader_url,
+                'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
                  'thumbnail': video.get('thumbURL'),
                  'duration': int_or_none(video.get('duration')),
                  'view_count': int_or_none(video.get('views')),
@@ -261,7 +267,7 @@ def get_height(s):
                  'dislike_count': int_or_none(try_get(
                      video, lambda x: x['rating']['dislikes'], int)),
                  'comment_count': int_or_none(video.get('views')),
-                'age_limit': age_limit,
+                'age_limit': age_limit if age_limit is not None else 18,
                  'categories': categories,
                  'formats': formats,
              }
@@ -352,6 +358,7 @@ def get_height(s):
              'description': description,
              'upload_date': upload_date,
              'uploader': uploader,
+            'uploader_id': uploader.lower() if uploader else None,
              'thumbnail': thumbnail,
              'duration': duration,
              'view_count': view_count,
@@ -366,6 +373,7 @@ def get_height(s):
  
  class XHamsterEmbedIE(InfoExtractor):
      _VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
+    _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1']
      _TEST = {
          'url': 'http://xhamster.com/xembed.php?video=3328539',
          'info_dict': {
@@ -380,12 +388,6 @@ class XHamsterEmbedIE(InfoExtractor):
          }
      }
  
-    @staticmethod
-    def _extract_urls(webpage):
-        return [url for _, url in re.findall(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
-            webpage)]
-
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
@@ -420,6 +422,9 @@ class XHamsterUserIE(InfoExtractor):
              'id': 'firatkaan',
          },
          'playlist_mincount': 1,
+    }, {
+        'url': 'https://xhday.com/users/mobhunter',
+        'only_matching': True,
      }]
  
      def _entries(self, user_id):