]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | import urllib.parse | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | ExtractorError, | |
7 | merge_dicts, | |
8 | ) | |
9 | ||
10 | ||
11 | class EroProfileIE(InfoExtractor): | |
12 | _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)' | |
13 | _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?' | |
14 | _NETRC_MACHINE = 'eroprofile' | |
15 | _TESTS = [{ | |
16 | 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore', | |
17 | 'md5': 'c26f351332edf23e1ea28ce9ec9de32f', | |
18 | 'info_dict': { | |
19 | 'id': '3733775', | |
20 | 'display_id': 'sexy-babe-softcore', | |
21 | 'ext': 'm4v', | |
22 | 'title': 'sexy babe softcore', | |
23 | 'thumbnail': r're:https?://.*\.jpg', | |
24 | 'age_limit': 18, | |
25 | }, | |
26 | 'skip': 'Video not found', | |
27 | }, { | |
28 | 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', | |
29 | 'md5': '1baa9602ede46ce904c431f5418d8916', | |
30 | 'info_dict': { | |
31 | 'id': '1133519', | |
32 | 'ext': 'm4v', | |
33 | 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file', | |
34 | 'thumbnail': r're:https?://.*\.jpg', | |
35 | 'age_limit': 18, | |
36 | }, | |
37 | 'skip': 'Requires login', | |
38 | }] | |
39 | ||
40 | def _perform_login(self, username, password): | |
41 | query = urllib.parse.urlencode({ | |
42 | 'username': username, | |
43 | 'password': password, | |
44 | 'url': 'http://www.eroprofile.com/', | |
45 | }) | |
46 | login_url = self._LOGIN_URL + query | |
47 | login_page = self._download_webpage(login_url, None, False) | |
48 | ||
49 | m = re.search(r'Your username or password was incorrect\.', login_page) | |
50 | if m: | |
51 | raise ExtractorError( | |
52 | 'Wrong username and/or password.', expected=True) | |
53 | ||
54 | self.report_login() | |
55 | redirect_url = self._search_regex( | |
56 | r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url') | |
57 | self._download_webpage(redirect_url, None, False) | |
58 | ||
59 | def _real_extract(self, url): | |
60 | display_id = self._match_id(url) | |
61 | ||
62 | webpage = self._download_webpage(url, display_id) | |
63 | ||
64 | m = re.search(r'You must be logged in to view this video\.', webpage) | |
65 | if m: | |
66 | self.raise_login_required('This video requires login') | |
67 | ||
68 | video_id = self._search_regex( | |
69 | [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], | |
70 | webpage, 'video id', default=None) | |
71 | ||
72 | title = self._html_search_regex( | |
73 | (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'), | |
74 | webpage, 'title') | |
75 | ||
76 | info = self._parse_html5_media_entries(url, webpage, video_id)[0] | |
77 | ||
78 | return merge_dicts(info, { | |
79 | 'id': video_id, | |
80 | 'display_id': display_id, | |
81 | 'title': title, | |
82 | 'age_limit': 18, | |
83 | }) | |
84 | ||
85 | ||
86 | class EroProfileAlbumIE(InfoExtractor): | |
87 | _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)' | |
88 | IE_NAME = 'EroProfile:album' | |
89 | ||
90 | _TESTS = [{ | |
91 | 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893', | |
92 | 'info_dict': { | |
93 | 'id': 'BBW-2-893', | |
94 | 'title': 'BBW 2', | |
95 | }, | |
96 | 'playlist_mincount': 486, | |
97 | }, | |
98 | ] | |
99 | ||
100 | def _extract_from_page(self, page): | |
101 | for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page): | |
102 | yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key()) | |
103 | ||
104 | def _entries(self, playlist_id, first_page): | |
105 | yield from self._extract_from_page(first_page) | |
106 | ||
107 | page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page) | |
108 | max_page = max(int(n) for _, n in page_urls) | |
109 | ||
110 | for n in range(2, max_page + 1): | |
111 | url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}' | |
112 | yield from self._extract_from_page( | |
113 | self._download_webpage(url, playlist_id, | |
114 | note=f'Downloading playlist page {int(n) - 1}')) | |
115 | ||
116 | def _real_extract(self, url): | |
117 | playlist_id = self._match_id(url) | |
118 | first_page = self._download_webpage(url, playlist_id, note='Downloading playlist') | |
119 | playlist_title = self._search_regex( | |
120 | r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title') | |
121 | ||
122 | return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title) |