]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | determine_ext, | |
6 | ExtractorError, | |
7 | merge_dicts, | |
8 | parse_duration, | |
9 | parse_resolution, | |
10 | str_to_int, | |
11 | url_or_none, | |
12 | urlencode_postdata, | |
13 | urljoin, | |
14 | ) | |
15 | ||
16 | ||
17 | class SpankBangIE(InfoExtractor): | |
18 | _VALID_URL = r'''(?x) | |
19 | https?:// | |
20 | (?:[^/]+\.)?spankbang\.com/ | |
21 | (?: | |
22 | (?P<id>[\da-z]+)/(?:video|play|embed)\b| | |
23 | [\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+ | |
24 | ) | |
25 | ''' | |
26 | _TESTS = [{ | |
27 | 'url': 'https://spankbang.com/56b3d/video/the+slut+maker+hmv', | |
28 | 'md5': '2D13903DE4ECC7895B5D55930741650A', | |
29 | 'info_dict': { | |
30 | 'id': '56b3d', | |
31 | 'ext': 'mp4', | |
32 | 'title': 'The Slut Maker HMV', | |
33 | 'description': 'Girls getting converted into cock slaves.', | |
34 | 'thumbnail': r're:^https?://.*\.jpg$', | |
35 | 'uploader': 'Mindself', | |
36 | 'uploader_id': 'mindself', | |
37 | 'timestamp': 1617109572, | |
38 | 'upload_date': '20210330', | |
39 | 'age_limit': 18, | |
40 | } | |
41 | }, { | |
42 | # 480p only | |
43 | 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang', | |
44 | 'only_matching': True, | |
45 | }, { | |
46 | # no uploader | |
47 | 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', | |
48 | 'only_matching': True, | |
49 | }, { | |
50 | # mobile page | |
51 | 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', | |
52 | 'only_matching': True, | |
53 | }, { | |
54 | # 4k | |
55 | 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k', | |
56 | 'only_matching': True, | |
57 | }, { | |
58 | 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/', | |
59 | 'only_matching': True, | |
60 | }, { | |
61 | 'url': 'https://m.spankbang.com/3vvn/play', | |
62 | 'only_matching': True, | |
63 | }, { | |
64 | 'url': 'https://spankbang.com/2y3td/embed/', | |
65 | 'only_matching': True, | |
66 | }, { | |
67 | 'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty', | |
68 | 'only_matching': True, | |
69 | }] | |
70 | ||
71 | def _real_extract(self, url): | |
72 | mobj = self._match_valid_url(url) | |
73 | video_id = mobj.group('id') or mobj.group('id_2') | |
74 | webpage = self._download_webpage( | |
75 | url.replace('/%s/embed' % video_id, '/%s/video' % video_id), | |
76 | video_id, headers={'Cookie': 'country=US'}) | |
77 | ||
78 | if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): | |
79 | raise ExtractorError( | |
80 | 'Video %s is not available' % video_id, expected=True) | |
81 | ||
82 | formats = [] | |
83 | ||
84 | def extract_format(format_id, format_url): | |
85 | f_url = url_or_none(format_url) | |
86 | if not f_url: | |
87 | return | |
88 | f = parse_resolution(format_id) | |
89 | ext = determine_ext(f_url) | |
90 | if format_id.startswith('m3u8') or ext == 'm3u8': | |
91 | formats.extend(self._extract_m3u8_formats( | |
92 | f_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
93 | m3u8_id='hls', fatal=False)) | |
94 | elif format_id.startswith('mpd') or ext == 'mpd': | |
95 | formats.extend(self._extract_mpd_formats( | |
96 | f_url, video_id, mpd_id='dash', fatal=False)) | |
97 | elif ext == 'mp4' or f.get('width') or f.get('height'): | |
98 | f.update({ | |
99 | 'url': f_url, | |
100 | 'format_id': format_id, | |
101 | }) | |
102 | formats.append(f) | |
103 | ||
104 | STREAM_URL_PREFIX = 'stream_url_' | |
105 | ||
106 | for mobj in re.finditer( | |
107 | r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2' | |
108 | % STREAM_URL_PREFIX, webpage): | |
109 | extract_format(mobj.group('id', 'url')) | |
110 | ||
111 | if not formats: | |
112 | stream_key = self._search_regex( | |
113 | r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', | |
114 | webpage, 'stream key', group='value') | |
115 | ||
116 | stream = self._download_json( | |
117 | 'https://spankbang.com/api/videos/stream', video_id, | |
118 | 'Downloading stream JSON', data=urlencode_postdata({ | |
119 | 'id': stream_key, | |
120 | 'data': 0, | |
121 | }), headers={ | |
122 | 'Referer': url, | |
123 | 'X-Requested-With': 'XMLHttpRequest', | |
124 | }) | |
125 | ||
126 | for format_id, format_url in stream.items(): | |
127 | if format_url and isinstance(format_url, list): | |
128 | format_url = format_url[0] | |
129 | extract_format(format_id, format_url) | |
130 | ||
131 | info = self._search_json_ld(webpage, video_id, default={}) | |
132 | ||
133 | title = self._html_search_regex( | |
134 | r'(?s)<h1[^>]+\btitle=["\']([^"]+)["\']>', webpage, 'title', default=None) | |
135 | description = self._search_regex( | |
136 | r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)', | |
137 | webpage, 'description', default=None) | |
138 | thumbnail = self._og_search_thumbnail(webpage, default=None) | |
139 | uploader = self._html_search_regex( | |
140 | r'<svg[^>]+\bclass="(?:[^"]*?user[^"]*?)">.*?</svg>([^<]+)', webpage, 'uploader', default=None) | |
141 | uploader_id = self._html_search_regex( | |
142 | r'<a[^>]+href="/profile/([^"]+)"', webpage, 'uploader_id', default=None) | |
143 | duration = parse_duration(self._search_regex( | |
144 | r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)', | |
145 | webpage, 'duration', default=None)) | |
146 | view_count = str_to_int(self._search_regex( | |
147 | r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) | |
148 | ||
149 | age_limit = self._rta_search(webpage) | |
150 | ||
151 | return merge_dicts({ | |
152 | 'id': video_id, | |
153 | 'title': title or video_id, | |
154 | 'description': description, | |
155 | 'thumbnail': thumbnail, | |
156 | 'uploader': uploader, | |
157 | 'uploader_id': uploader_id, | |
158 | 'duration': duration, | |
159 | 'view_count': view_count, | |
160 | 'formats': formats, | |
161 | 'age_limit': age_limit, | |
162 | }, info | |
163 | ) | |
164 | ||
165 | ||
166 | class SpankBangPlaylistIE(InfoExtractor): | |
167 | _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)' | |
168 | _TEST = { | |
169 | 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', | |
170 | 'info_dict': { | |
171 | 'id': 'ug0k', | |
172 | 'title': 'Big Ass Titties', | |
173 | }, | |
174 | 'playlist_mincount': 40, | |
175 | } | |
176 | ||
177 | def _real_extract(self, url): | |
178 | mobj = self._match_valid_url(url) | |
179 | playlist_id = mobj.group('id') | |
180 | ||
181 | webpage = self._download_webpage( | |
182 | url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) | |
183 | ||
184 | entries = [self.url_result( | |
185 | urljoin(url, mobj.group('path')), | |
186 | ie=SpankBangIE.ie_key(), video_id=mobj.group('id')) | |
187 | for mobj in re.finditer( | |
188 | r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/[^"\'](?:(?!\1).)*)\1', | |
189 | webpage)] | |
190 | ||
191 | title = self._html_search_regex( | |
192 | r'<em>([^<]+)</em>\s+playlist\s*<', webpage, 'playlist title', | |
193 | fatal=False) | |
194 | ||
195 | return self.playlist_result(entries, playlist_id, title) |