]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/eroprofile.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / eroprofile.py
CommitLineData
af140002 1import re
add96eb9 2import urllib.parse
af140002 3
461b00f3 4from .common import InfoExtractor
32fffff2
S
5from ..utils import (
6 ExtractorError,
b73612a2 7 merge_dicts,
32fffff2 8)
461b00f3 9
f9b9e886 10
461b00f3 11class EroProfileIE(InfoExtractor):
f9b9e886 12 _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
af140002
NJ
13 _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
14 _NETRC_MACHINE = 'eroprofile'
15 _TESTS = [{
461b00f3 16 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
17 'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
18 'info_dict': {
19 'id': '3733775',
f9b9e886 20 'display_id': 'sexy-babe-softcore',
461b00f3 21 'ext': 'm4v',
f9b9e886 22 'title': 'sexy babe softcore',
ec85ded8 23 'thumbnail': r're:https?://.*\.jpg',
461b00f3 24 'age_limit': 18,
b73612a2 25 },
26 'skip': 'Video not found',
af140002
NJ
27 }, {
28 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
29 'md5': '1baa9602ede46ce904c431f5418d8916',
30 'info_dict': {
31 'id': '1133519',
32 'ext': 'm4v',
33 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
ec85ded8 34 'thumbnail': r're:https?://.*\.jpg',
af140002
NJ
35 'age_limit': 18,
36 },
37 'skip': 'Requires login',
38 }]
39
52efa4b3 40 def _perform_login(self, username, password):
add96eb9 41 query = urllib.parse.urlencode({
af140002
NJ
42 'username': username,
43 'password': password,
44 'url': 'http://www.eroprofile.com/',
45 })
46 login_url = self._LOGIN_URL + query
47 login_page = self._download_webpage(login_url, None, False)
48
49 m = re.search(r'Your username or password was incorrect\.', login_page)
50 if m:
51 raise ExtractorError(
52 'Wrong username and/or password.', expected=True)
53
54 self.report_login()
55 redirect_url = self._search_regex(
56 r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
57 self._download_webpage(redirect_url, None, False)
58
461b00f3 59 def _real_extract(self, url):
f9b9e886 60 display_id = self._match_id(url)
461b00f3 61
f9b9e886 62 webpage = self._download_webpage(url, display_id)
461b00f3 63
af140002
NJ
64 m = re.search(r'You must be logged in to view this video\.', webpage)
65 if m:
3c53455d 66 self.raise_login_required('This video requires login')
af140002 67
f9b9e886
S
68 video_id = self._search_regex(
69 [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
70 webpage, 'video id', default=None)
461b00f3 71
72 title = self._html_search_regex(
b73612a2 73 (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
74 webpage, 'title')
75
76 info = self._parse_html5_media_entries(url, webpage, video_id)[0]
461b00f3 77
b73612a2 78 return merge_dicts(info, {
461b00f3 79 'id': video_id,
f9b9e886 80 'display_id': display_id,
461b00f3 81 'title': title,
461b00f3 82 'age_limit': 18,
b73612a2 83 })
c196640f 84
85
86class EroProfileAlbumIE(InfoExtractor):
87 _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)'
88 IE_NAME = 'EroProfile:album'
89
90 _TESTS = [{
91 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893',
92 'info_dict': {
93 'id': 'BBW-2-893',
add96eb9 94 'title': 'BBW 2',
c196640f 95 },
96 'playlist_mincount': 486,
97 },
98 ]
99
100 def _extract_from_page(self, page):
101 for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page):
102 yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key())
103
104 def _entries(self, playlist_id, first_page):
105 yield from self._extract_from_page(first_page)
106
107 page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page)
d967c68e 108 max_page = max(int(n) for _, n in page_urls)
c196640f 109
d967c68e 110 for n in range(2, max_page + 1):
111 url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}'
112 yield from self._extract_from_page(
113 self._download_webpage(url, playlist_id,
114 note=f'Downloading playlist page {int(n) - 1}'))
c196640f 115
116 def _real_extract(self, url):
117 playlist_id = self._match_id(url)
118 first_page = self._download_webpage(url, playlist_id, note='Downloading playlist')
119 playlist_title = self._search_regex(
120 r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title')
121
122 return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)