]> jfr.im git - yt-dlp.git/commitdiff
[eroprofile] Add album downloader (#658)
authorjhwgh1968 <redacted>
Tue, 10 Aug 2021 13:51:12 +0000 (13:51 +0000)
committerGitHub <redacted>
Tue, 10 Aug 2021 13:51:12 +0000 (19:21 +0530)
Authored by: jhwgh1968

yt_dlp/extractor/eroprofile.py
yt_dlp/extractor/extractors.py

index c460dc7f9826815cb965d61686ee880502575144..54ed9a49e20b67fb86e919ee5d1c6a12809f3f9a 100644 (file)
@@ -90,3 +90,40 @@ def _real_extract(self, url):
             'title': title,
             'age_limit': 18,
         })
+
+
+class EroProfileAlbumIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
+    IE_NAME = 'EroProfile:album'
+
+    _TESTS = [{
+        'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
+        'info_dict': {
+            'id': 'BBW-2-893',
+            'title': 'BBW 2'
+        },
+        'playlist_mincount': 486,
+    },
+    ]
+
+    def _extract_from_page(self, page):
+        for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
+            yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
+
+    def _entries(self, playlist_id, first_page):
+        yield from self._extract_from_page(first_page)
+
+        page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
+
+        for url, n in page_urls[1:]:
+            yield from self._extract_from_page(self._download_webpage(
+                f'https://www.eroprofile.com{url}',
+                playlist_id, note=f'Downloading playlist page {int(n) - 1}'))
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
+        playlist_title = self._search_regex(
+            r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
+
+        return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)
index 975fb032882bc0788f54fe924756ed35ee00002f..5b15bb8e74bb5f0ae9ccb194e21cf5570aa32e9c 100644 (file)
 from .embedly import EmbedlyIE
 from .engadget import EngadgetIE
 from .eporner import EpornerIE
-from .eroprofile import EroProfileIE
+from .eroprofile import (
+    EroProfileIE,
+    EroProfileAlbumIE,
+)
 from .escapist import EscapistIE
 from .espn import (
     ESPNIE,