]>
Commit | Line | Data |
---|---|---|
64102296 | 1 | from __future__ import unicode_literals |
2 | ||
64102296 | 3 | import re |
4 | ||
d97aae75 | 5 | from .common import InfoExtractor |
7773a928 | 6 | from ..utils import ( |
2b845c40 | 7 | determine_ext, |
7773a928 | 8 | ExtractorError, |
f9eeeda3 | 9 | merge_dicts, |
7773a928 S |
10 | parse_duration, |
11 | parse_resolution, | |
12 | str_to_int, | |
7465e0ae S |
13 | url_or_none, |
14 | urlencode_postdata, | |
29f7c58a | 15 | urljoin, |
7773a928 | 16 | ) |
d97aae75 S |
17 | |
18 | ||
64102296 | 19 | class SpankBangIE(InfoExtractor): |
29f7c58a | 20 | _VALID_URL = r'''(?x) |
21 | https?:// | |
22 | (?:[^/]+\.)?spankbang\.com/ | |
23 | (?: | |
24 | (?P<id>[\da-z]+)/(?:video|play|embed)\b| | |
25 | [\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+ | |
26 | ) | |
27 | ''' | |
d9e543b6 | 28 | _TESTS = [{ |
d97aae75 S |
29 | 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', |
30 | 'md5': '1cc433e1d6aa14bc376535b8679302f7', | |
31 | 'info_dict': { | |
32 | 'id': '3vvn', | |
33 | 'ext': 'mp4', | |
34 | 'title': 'fantasy solo', | |
7773a928 | 35 | 'description': 'dillion harper masturbates on a bed', |
ec85ded8 | 36 | 'thumbnail': r're:^https?://.*\.jpg$', |
d97aae75 | 37 | 'uploader': 'silly2587', |
f9eeeda3 S |
38 | 'timestamp': 1422571989, |
39 | 'upload_date': '20150129', | |
d97aae75 | 40 | 'age_limit': 18, |
5c1d459a | 41 | } |
d9e543b6 S |
42 | }, { |
43 | # 480p only | |
44 | 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang', | |
45 | 'only_matching': True, | |
69263044 S |
46 | }, { |
47 | # no uploader | |
48 | 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', | |
49 | 'only_matching': True, | |
3192d4bc W |
50 | }, { |
51 | # mobile page | |
52 | 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', | |
53 | 'only_matching': True, | |
7773a928 S |
54 | }, { |
55 | # 4k | |
56 | 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k', | |
57 | 'only_matching': True, | |
f06a1cab S |
58 | }, { |
59 | 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/', | |
60 | 'only_matching': True, | |
61 | }, { | |
62 | 'url': 'https://m.spankbang.com/3vvn/play', | |
63 | 'only_matching': True, | |
64 | }, { | |
65 | 'url': 'https://spankbang.com/2y3td/embed/', | |
66 | 'only_matching': True, | |
29f7c58a | 67 | }, { |
68 | 'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty', | |
69 | 'only_matching': True, | |
d9e543b6 | 70 | }] |
64102296 | 71 | |
72 | def _real_extract(self, url): | |
29f7c58a | 73 | mobj = re.match(self._VALID_URL, url) |
74 | video_id = mobj.group('id') or mobj.group('id_2') | |
f06a1cab S |
75 | webpage = self._download_webpage( |
76 | url.replace('/%s/embed' % video_id, '/%s/video' % video_id), | |
77 | video_id, headers={'Cookie': 'country=US'}) | |
64102296 | 78 | |
0a02732b | 79 | if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): |
8fe767e0 S |
80 | raise ExtractorError( |
81 | 'Video %s is not available' % video_id, expected=True) | |
82 | ||
7773a928 | 83 | formats = [] |
7465e0ae S |
84 | |
85 | def extract_format(format_id, format_url): | |
86 | f_url = url_or_none(format_url) | |
87 | if not f_url: | |
88 | return | |
7773a928 | 89 | f = parse_resolution(format_id) |
2b845c40 S |
90 | ext = determine_ext(f_url) |
91 | if format_id.startswith('m3u8') or ext == 'm3u8': | |
92 | formats.extend(self._extract_m3u8_formats( | |
93 | f_url, video_id, 'mp4', entry_protocol='m3u8_native', | |
94 | m3u8_id='hls', fatal=False)) | |
95 | elif format_id.startswith('mpd') or ext == 'mpd': | |
96 | formats.extend(self._extract_mpd_formats( | |
97 | f_url, video_id, mpd_id='dash', fatal=False)) | |
98 | elif ext == 'mp4' or f.get('width') or f.get('height'): | |
99 | f.update({ | |
100 | 'url': f_url, | |
101 | 'format_id': format_id, | |
102 | }) | |
103 | formats.append(f) | |
7465e0ae S |
104 | |
105 | STREAM_URL_PREFIX = 'stream_url_' | |
106 | ||
107 | for mobj in re.finditer( | |
108 | r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2' | |
109 | % STREAM_URL_PREFIX, webpage): | |
110 | extract_format(mobj.group('id', 'url')) | |
111 | ||
112 | if not formats: | |
113 | stream_key = self._search_regex( | |
114 | r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', | |
115 | webpage, 'stream key', group='value') | |
116 | ||
7465e0ae S |
117 | stream = self._download_json( |
118 | 'https://spankbang.com/api/videos/stream', video_id, | |
119 | 'Downloading stream JSON', data=urlencode_postdata({ | |
120 | 'id': stream_key, | |
121 | 'data': 0, | |
7465e0ae S |
122 | }), headers={ |
123 | 'Referer': url, | |
2b845c40 | 124 | 'X-Requested-With': 'XMLHttpRequest', |
7465e0ae S |
125 | }) |
126 | ||
127 | for format_id, format_url in stream.items(): | |
2b845c40 S |
128 | if format_url and isinstance(format_url, list): |
129 | format_url = format_url[0] | |
130 | extract_format(format_id, format_url) | |
7465e0ae | 131 | |
2b845c40 | 132 | self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) |
d97aae75 | 133 | |
f9eeeda3 S |
134 | info = self._search_json_ld(webpage, video_id, default={}) |
135 | ||
d97aae75 | 136 | title = self._html_search_regex( |
f9eeeda3 | 137 | r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None) |
7773a928 S |
138 | description = self._search_regex( |
139 | r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)', | |
f9eeeda3 S |
140 | webpage, 'description', default=None) |
141 | thumbnail = self._og_search_thumbnail(webpage, default=None) | |
142 | uploader = self._html_search_regex( | |
143 | (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>', | |
144 | r'class="user"[^>]*><img[^>]+>([^<]+)'), | |
69263044 | 145 | webpage, 'uploader', default=None) |
7773a928 S |
146 | duration = parse_duration(self._search_regex( |
147 | r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)', | |
f9eeeda3 | 148 | webpage, 'duration', default=None)) |
7773a928 | 149 | view_count = str_to_int(self._search_regex( |
f9eeeda3 | 150 | r'([\d,.]+)\s+plays', webpage, 'view count', default=None)) |
d97aae75 S |
151 | |
152 | age_limit = self._rta_search(webpage) | |
64102296 | 153 | |
f9eeeda3 | 154 | return merge_dicts({ |
d97aae75 | 155 | 'id': video_id, |
f9eeeda3 | 156 | 'title': title or video_id, |
d97aae75 S |
157 | 'description': description, |
158 | 'thumbnail': thumbnail, | |
159 | 'uploader': uploader, | |
7773a928 S |
160 | 'duration': duration, |
161 | 'view_count': view_count, | |
d97aae75 S |
162 | 'formats': formats, |
163 | 'age_limit': age_limit, | |
f9eeeda3 S |
164 | }, info |
165 | ) | |
49bd993f S |
166 | |
167 | ||
168 | class SpankBangPlaylistIE(InfoExtractor): | |
29f7c58a | 169 | _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)' |
49bd993f S |
170 | _TEST = { |
171 | 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', | |
172 | 'info_dict': { | |
173 | 'id': 'ug0k', | |
174 | 'title': 'Big Ass Titties', | |
175 | }, | |
29f7c58a | 176 | 'playlist_mincount': 40, |
49bd993f S |
177 | } |
178 | ||
179 | def _real_extract(self, url): | |
29f7c58a | 180 | mobj = re.match(self._VALID_URL, url) |
181 | playlist_id = mobj.group('id') | |
182 | display_id = mobj.group('display_id') | |
49bd993f S |
183 | |
184 | webpage = self._download_webpage( | |
185 | url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) | |
186 | ||
187 | entries = [self.url_result( | |
29f7c58a | 188 | urljoin(url, mobj.group('path')), |
189 | ie=SpankBangIE.ie_key(), video_id=mobj.group('id')) | |
190 | for mobj in re.finditer( | |
191 | r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1' | |
192 | % re.escape(display_id), webpage)] | |
49bd993f S |
193 | |
194 | title = self._html_search_regex( | |
29f7c58a | 195 | r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title', |
49bd993f S |
196 | fatal=False) |
197 | ||
198 | return self.playlist_result(entries, playlist_id, title) |