]>
Commit | Line | Data |
---|---|---|
af140002 | 1 | import re |
add96eb9 | 2 | import urllib.parse |
af140002 | 3 | |
461b00f3 | 4 | from .common import InfoExtractor |
32fffff2 S |
5 | from ..utils import ( |
6 | ExtractorError, | |
b73612a2 | 7 | merge_dicts, |
32fffff2 | 8 | ) |
461b00f3 | 9 | |
f9b9e886 | 10 | |
461b00f3 | 11 | class EroProfileIE(InfoExtractor): |
f9b9e886 | 12 | _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)' |
af140002 NJ |
13 | _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?' |
14 | _NETRC_MACHINE = 'eroprofile' | |
15 | _TESTS = [{ | |
461b00f3 | 16 | 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore', |
17 | 'md5': 'c26f351332edf23e1ea28ce9ec9de32f', | |
18 | 'info_dict': { | |
19 | 'id': '3733775', | |
f9b9e886 | 20 | 'display_id': 'sexy-babe-softcore', |
461b00f3 | 21 | 'ext': 'm4v', |
f9b9e886 | 22 | 'title': 'sexy babe softcore', |
ec85ded8 | 23 | 'thumbnail': r're:https?://.*\.jpg', |
461b00f3 | 24 | 'age_limit': 18, |
b73612a2 | 25 | }, |
26 | 'skip': 'Video not found', | |
af140002 NJ |
27 | }, { |
28 | 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', | |
29 | 'md5': '1baa9602ede46ce904c431f5418d8916', | |
30 | 'info_dict': { | |
31 | 'id': '1133519', | |
32 | 'ext': 'm4v', | |
33 | 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file', | |
ec85ded8 | 34 | 'thumbnail': r're:https?://.*\.jpg', |
af140002 NJ |
35 | 'age_limit': 18, |
36 | }, | |
37 | 'skip': 'Requires login', | |
38 | }] | |
39 | ||
52efa4b3 | 40 | def _perform_login(self, username, password): |
add96eb9 | 41 | query = urllib.parse.urlencode({ |
af140002 NJ |
42 | 'username': username, |
43 | 'password': password, | |
44 | 'url': 'http://www.eroprofile.com/', | |
45 | }) | |
46 | login_url = self._LOGIN_URL + query | |
47 | login_page = self._download_webpage(login_url, None, False) | |
48 | ||
49 | m = re.search(r'Your username or password was incorrect\.', login_page) | |
50 | if m: | |
51 | raise ExtractorError( | |
52 | 'Wrong username and/or password.', expected=True) | |
53 | ||
54 | self.report_login() | |
55 | redirect_url = self._search_regex( | |
56 | r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url') | |
57 | self._download_webpage(redirect_url, None, False) | |
58 | ||
461b00f3 | 59 | def _real_extract(self, url): |
f9b9e886 | 60 | display_id = self._match_id(url) |
461b00f3 | 61 | |
f9b9e886 | 62 | webpage = self._download_webpage(url, display_id) |
461b00f3 | 63 | |
af140002 NJ |
64 | m = re.search(r'You must be logged in to view this video\.', webpage) |
65 | if m: | |
3c53455d | 66 | self.raise_login_required('This video requires login') |
af140002 | 67 | |
f9b9e886 S |
68 | video_id = self._search_regex( |
69 | [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], | |
70 | webpage, 'video id', default=None) | |
461b00f3 | 71 | |
72 | title = self._html_search_regex( | |
b73612a2 | 73 | (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'), |
74 | webpage, 'title') | |
75 | ||
76 | info = self._parse_html5_media_entries(url, webpage, video_id)[0] | |
461b00f3 | 77 | |
b73612a2 | 78 | return merge_dicts(info, { |
461b00f3 | 79 | 'id': video_id, |
f9b9e886 | 80 | 'display_id': display_id, |
461b00f3 | 81 | 'title': title, |
461b00f3 | 82 | 'age_limit': 18, |
b73612a2 | 83 | }) |
c196640f | 84 | |
85 | ||
86 | class EroProfileAlbumIE(InfoExtractor): | |
87 | _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)' | |
88 | IE_NAME = 'EroProfile:album' | |
89 | ||
90 | _TESTS = [{ | |
91 | 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893', | |
92 | 'info_dict': { | |
93 | 'id': 'BBW-2-893', | |
add96eb9 | 94 | 'title': 'BBW 2', |
c196640f | 95 | }, |
96 | 'playlist_mincount': 486, | |
97 | }, | |
98 | ] | |
99 | ||
100 | def _extract_from_page(self, page): | |
101 | for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page): | |
102 | yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key()) | |
103 | ||
104 | def _entries(self, playlist_id, first_page): | |
105 | yield from self._extract_from_page(first_page) | |
106 | ||
107 | page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page) | |
d967c68e | 108 | max_page = max(int(n) for _, n in page_urls) |
c196640f | 109 | |
d967c68e | 110 | for n in range(2, max_page + 1): |
111 | url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}' | |
112 | yield from self._extract_from_page( | |
113 | self._download_webpage(url, playlist_id, | |
114 | note=f'Downloading playlist page {int(n) - 1}')) | |
c196640f | 115 | |
116 | def _real_extract(self, url): | |
117 | playlist_id = self._match_id(url) | |
118 | first_page = self._download_webpage(url, playlist_id, note='Downloading playlist') | |
119 | playlist_title = self._search_regex( | |
120 | r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title') | |
121 | ||
122 | return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title) |