[ie/mlbtv] Fix extraction (#10296)

[yt-dlp.git] / yt_dlp / extractor / motherless.py
diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py

index 769b52ce6de4be8c489bfdac187154fa9c3836a9..86551950b7adf1663bb243e75f3d47928c84b9c0 100644 (file)
--- a/yt_dlp/extractor/motherless.py
+++ b/yt_dlp/extractor/motherless.py
@@ -1,4 +1,4 @@
-import datetime
+import datetime as dt
  import re
  import urllib.parse
  
@@ -115,10 +115,10 @@ def _real_extract(self, url):
          if any(p in webpage for p in (
                  '<title>404 - MOTHERLESS.COM<',
                  ">The page you're looking for cannot be found.<")):
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+            raise ExtractorError(f'Video {video_id} does not exist', expected=True)
  
          if '>The content you are trying to view is for friends only.' in webpage:
-            raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
+            raise ExtractorError(f'Video {video_id} is for friends only', expected=True)
  
          title = self._html_search_regex(
              (r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
@@ -127,7 +127,7 @@ def _real_extract(self, url):
              (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
               r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
              webpage, 'video URL', default=None, group='url')
-            or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
+            or f'http://cdn4.videos.motherlessmedia.com/videos/{video_id}.mp4?fs=opencloud')
          age_limit = self._rta_search(webpage)
          view_count = str_to_int(self._html_search_regex(
              (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
@@ -151,7 +151,7 @@ def _real_extract(self, url):
                      'd': 'days',
                  }
                  kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
-                upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
+                upload_date = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(**kwargs)).strftime('%Y%m%d')
  
          comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
          uploader_id = self._html_search_regex(
@@ -177,6 +177,7 @@ def _real_extract(self, url):
  
  
  class MotherlessPaginatedIE(InfoExtractor):
+    _EXTRA_QUERY = {}
      _PAGE_SIZE = 60
  
      def _correct_path(self, url, item_id):
@@ -199,7 +200,7 @@ def _real_extract(self, url):
          def get_page(idx):
              page = idx + 1
              current_page = webpage if not idx else self._download_webpage(
-                real_url, item_id, note=f'Downloading page {page}', query={'page': page})
+                real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
              yield from self._extract_entries(current_page, real_url)
  
          return self.playlist_result(
@@ -213,7 +214,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
          'url': 'http://motherless.com/gv/movie_scenes',
          'info_dict': {
              'id': 'movie_scenes',
-            'title': 'Movie Scenes',
+            'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
          },
          'playlist_mincount': 540,
      }, {
@@ -244,7 +245,7 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
              'id': '338999F',
              'title': 'Random',
          },
-        'playlist_mincount': 190,
+        'playlist_mincount': 171,
      }, {
          'url': 'https://motherless.com/GVABD6213',
          'info_dict': {
@@ -270,3 +271,27 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
  
      def _correct_path(self, url, item_id):
          return urllib.parse.urljoin(url, f'/GV{item_id}')
+
+
+class MotherlessUploaderIE(MotherlessPaginatedIE):
+    _VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
+    _TESTS = [{
+        'url': 'https://motherless.com/u/Mrgo4hrs2023',
+        'info_dict': {
+            'id': 'Mrgo4hrs2023',
+            'title': "Mrgo4hrs2023's Uploads - Videos",
+        },
+        'playlist_mincount': 32,
+    }, {
+        'url': 'https://motherless.com/u/Happy_couple?t=v',
+        'info_dict': {
+            'id': 'Happy_couple',
+            'title': "Happy_couple's Uploads - Videos",
+        },
+        'playlist_mincount': 8,
+    }]
+
+    _EXTRA_QUERY = {'t': 'v'}
+
+    def _correct_path(self, url, item_id):
+        return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')