]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/eroprofile.py
[extractor] Add `_perform_login` function (#2943)
[yt-dlp.git] / yt_dlp / extractor / eroprofile.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..compat import compat_urllib_parse_urlencode
7 from ..utils import (
8 ExtractorError,
9 merge_dicts,
10 )
11
12
13 class EroProfileIE(InfoExtractor):
14 _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
15 _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
16 _NETRC_MACHINE = 'eroprofile'
17 _TESTS = [{
18 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
19 'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
20 'info_dict': {
21 'id': '3733775',
22 'display_id': 'sexy-babe-softcore',
23 'ext': 'm4v',
24 'title': 'sexy babe softcore',
25 'thumbnail': r're:https?://.*\.jpg',
26 'age_limit': 18,
27 },
28 'skip': 'Video not found',
29 }, {
30 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
31 'md5': '1baa9602ede46ce904c431f5418d8916',
32 'info_dict': {
33 'id': '1133519',
34 'ext': 'm4v',
35 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
36 'thumbnail': r're:https?://.*\.jpg',
37 'age_limit': 18,
38 },
39 'skip': 'Requires login',
40 }]
41
42 def _perform_login(self, username, password):
43 query = compat_urllib_parse_urlencode({
44 'username': username,
45 'password': password,
46 'url': 'http://www.eroprofile.com/',
47 })
48 login_url = self._LOGIN_URL + query
49 login_page = self._download_webpage(login_url, None, False)
50
51 m = re.search(r'Your username or password was incorrect\.', login_page)
52 if m:
53 raise ExtractorError(
54 'Wrong username and/or password.', expected=True)
55
56 self.report_login()
57 redirect_url = self._search_regex(
58 r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
59 self._download_webpage(redirect_url, None, False)
60
61 def _real_extract(self, url):
62 display_id = self._match_id(url)
63
64 webpage = self._download_webpage(url, display_id)
65
66 m = re.search(r'You must be logged in to view this video\.', webpage)
67 if m:
68 self.raise_login_required('This video requires login')
69
70 video_id = self._search_regex(
71 [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
72 webpage, 'video id', default=None)
73
74 title = self._html_search_regex(
75 (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
76 webpage, 'title')
77
78 info = self._parse_html5_media_entries(url, webpage, video_id)[0]
79
80 return merge_dicts(info, {
81 'id': video_id,
82 'display_id': display_id,
83 'title': title,
84 'age_limit': 18,
85 })
86
87
88 class EroProfileAlbumIE(InfoExtractor):
89 _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
90 IE_NAME = 'EroProfile:album'
91
92 _TESTS = [{
93 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
94 'info_dict': {
95 'id': 'BBW-2-893',
96 'title': 'BBW 2'
97 },
98 'playlist_mincount': 486,
99 },
100 ]
101
102 def _extract_from_page(self, page):
103 for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
104 yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
105
106 def _entries(self, playlist_id, first_page):
107 yield from self._extract_from_page(first_page)
108
109 page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
110 max_page = max(int(n) for _, n in page_urls)
111
112 for n in range(2, max_page + 1):
113 url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}'
114 yield from self._extract_from_page(
115 self._download_webpage(url, playlist_id,
116 note=f'Downloading playlist page {int(n) - 1}'))
117
118 def _real_extract(self, url):
119 playlist_id = self._match_id(url)
120 first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
121 playlist_title = self._search_regex(
122 r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
123
124 return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)