]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/pornhub.py
Completely change project name to yt-dlp (#85)
[yt-dlp.git] / yt_dlp / extractor / pornhub.py
CommitLineData
6c376029 1# coding: utf-8
9933b574
PH
2from __future__ import unicode_literals
3
21fbf0f9 4import functools
34541395 5import itertools
21fbf0f9 6import operator
125cfd78 7import re
8
9from .common import InfoExtractor
1cc79574 10from ..compat import (
34541395 11 compat_HTTPError,
79367a98 12 compat_str,
278d061a 13 compat_urllib_request,
1cc79574 14)
278d061a 15from .openload import PhantomJSwrapper
1cc79574 16from ..utils import (
b8526c78 17 determine_ext,
50789175 18 ExtractorError,
ed8648a3 19 int_or_none,
cd85a1bb 20 merge_dicts,
0164cd5d 21 NO_DEFAULT,
8f9a477e 22 orderedSet,
e1e35d1a 23 remove_quotes,
0320ddc1 24 str_to_int,
2181983a 25 update_url_query,
26 urlencode_postdata,
4938c8d5 27 url_or_none,
125cfd78 28)
125cfd78 29
9933b574 30
71a1f617 31class PornHubBaseIE(InfoExtractor):
2181983a 32 _NETRC_MACHINE = 'pornhub'
33
71a1f617
S
34 def _download_webpage_handle(self, *args, **kwargs):
35 def dl(*args, **kwargs):
36 return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
37
29f7c58a 38 ret = dl(*args, **kwargs)
39
40 if not ret:
41 return ret
42
43 webpage, urlh = ret
71a1f617
S
44
45 if any(re.search(p, webpage) for p in (
46 r'<body\b[^>]+\bonload=["\']go\(\)',
47 r'document\.cookie\s*=\s*["\']RNKEY=',
48 r'document\.location\.reload\(true\)')):
49 url_or_request = args[0]
50 url = (url_or_request.get_full_url()
51 if isinstance(url_or_request, compat_urllib_request.Request)
52 else url_or_request)
53 phantom = PhantomJSwrapper(self, required_version='2.0')
54 phantom.get(url, html=webpage)
55 webpage, urlh = dl(*args, **kwargs)
56
57 return webpage, urlh
58
2181983a 59 def _real_initialize(self):
60 self._logged_in = False
61
62 def _login(self, host):
63 if self._logged_in:
64 return
65
66 site = host.split('.')[0]
67
68 # Both sites pornhub and pornhubpremium have separate accounts
69 # so there should be an option to provide credentials for both.
70 # At the same time some videos are available under the same video id
71 # on both sites so that we have to identify them as the same video.
72 # For that purpose we have to keep both in the same extractor
73 # but under different netrc machines.
74 username, password = self._get_login_info(netrc_machine=site)
75 if username is None:
76 return
77
78 login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
79 login_page = self._download_webpage(
80 login_url, None, 'Downloading %s login page' % site)
81
82 def is_logged(webpage):
83 return any(re.search(p, webpage) for p in (
84 r'class=["\']signOut',
85 r'>Sign\s+[Oo]ut\s*<'))
86
87 if is_logged(login_page):
88 self._logged_in = True
89 return
90
91 login_form = self._hidden_inputs(login_page)
92
93 login_form.update({
94 'username': username,
95 'password': password,
96 })
97
98 response = self._download_json(
99 'https://www.%s/front/authenticate' % host, None,
100 'Logging in to %s' % site,
101 data=urlencode_postdata(login_form),
102 headers={
103 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
104 'Referer': login_url,
105 'X-Requested-With': 'XMLHttpRequest',
106 })
107
108 if response.get('success') == '1':
109 self._logged_in = True
110 return
111
112 message = response.get('message')
113 if message is not None:
114 raise ExtractorError(
115 'Unable to login: %s' % message, expected=True)
116
117 raise ExtractorError('Unable to log in')
118
71a1f617
S
119
120class PornHubIE(PornHubBaseIE):
bc4b2d75
S
121 IE_DESC = 'PornHub and Thumbzilla'
122 _VALID_URL = r'''(?x)
123 https?://
124 (?:
29f7c58a 125 (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
bc4b2d75
S
126 (?:www\.)?thumbzilla\.com/video/
127 )
b52c9ef1 128 (?P<id>[\da-z]+)
bc4b2d75 129 '''
360075e2 130 _TESTS = [{
9933b574 131 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
cd85a1bb 132 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
9933b574 133 'info_dict': {
249efaf4
PH
134 'id': '648719015',
135 'ext': 'mp4',
611c1dd9 136 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
ed8648a3 137 'uploader': 'Babes',
3661ebf2 138 'upload_date': '20130628',
cd85a1bb 139 'timestamp': 1372447216,
ed8648a3
S
140 'duration': 361,
141 'view_count': int,
142 'like_count': int,
143 'dislike_count': int,
144 'comment_count': int,
145 'age_limit': 18,
6bb05b32
YCH
146 'tags': list,
147 'categories': list,
6c376029
S
148 },
149 }, {
150 # non-ASCII title
151 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
152 'info_dict': {
153 'id': '1331683002',
154 'ext': 'mp4',
155 'title': '重庆婷婷女王足交',
3661ebf2 156 'upload_date': '20150213',
cd85a1bb 157 'timestamp': 1423804862,
6c376029
S
158 'duration': 1753,
159 'view_count': int,
160 'like_count': int,
161 'dislike_count': int,
162 'comment_count': int,
163 'age_limit': 18,
6bb05b32
YCH
164 'tags': list,
165 'categories': list,
6c376029
S
166 },
167 'params': {
168 'skip_download': True,
169 },
4938c8d5
GF
170 }, {
171 # subtitles
172 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
173 'info_dict': {
174 'id': 'ph5af5fef7c2aa7',
175 'ext': 'mp4',
176 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
177 'uploader': 'BFFs',
178 'duration': 622,
179 'view_count': int,
180 'like_count': int,
181 'dislike_count': int,
182 'comment_count': int,
183 'age_limit': 18,
184 'tags': list,
185 'categories': list,
186 'subtitles': {
187 'en': [{
188 "ext": 'srt'
189 }]
190 },
191 },
192 'params': {
193 'skip_download': True,
194 },
cd85a1bb 195 'skip': 'This video has been disabled',
360075e2
S
196 }, {
197 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
198 'only_matching': True,
272e4db5 199 }, {
eaaaaec0 200 # removed at the request of cam4.com
272e4db5
S
201 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
202 'only_matching': True,
eaaaaec0
S
203 }, {
204 # removed at the request of the copyright owner
205 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
206 'only_matching': True,
207 }, {
208 # removed by uploader
209 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
210 'only_matching': True,
195f0845
S
211 }, {
212 # private video
213 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
214 'only_matching': True,
bc4b2d75
S
215 }, {
216 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
217 'only_matching': True,
a99cc4ca
S
218 }, {
219 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
220 'only_matching': True,
f97c0991
S
221 }, {
222 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
223 'only_matching': True,
29f7c58a 224 }, {
225 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
226 'only_matching': True,
fa9b8c66
TW
227 }, {
228 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
229 'only_matching': True,
2181983a 230 }, {
231 # Some videos are available with the same id on both premium
232 # and non-premium sites (e.g. this and the following test)
233 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
234 'only_matching': True,
235 }, {
236 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
237 'only_matching': True,
360075e2 238 }]
125cfd78 239
b52c9ef1
S
240 @staticmethod
241 def _extract_urls(webpage):
242 return re.findall(
2181983a 243 r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
b52c9ef1 244 webpage)
65d161c4 245
0320ddc1 246 def _extract_count(self, pattern, webpage, name):
7700207e
S
247 return str_to_int(self._search_regex(
248 pattern, webpage, '%s count' % name, fatal=False))
0320ddc1 249
125cfd78 250 def _real_extract(self, url):
3430ff9b
S
251 mobj = re.match(self._VALID_URL, url)
252 host = mobj.group('host') or 'pornhub.com'
253 video_id = mobj.group('id')
7399ca1f 254
2181983a 255 self._login(host)
fa9b8c66 256
3430ff9b 257 self._set_cookie(host, 'age_verified', '1')
125cfd78 258
9a372f14 259 def dl_webpage(platform):
3430ff9b 260 self._set_cookie(host, 'platform', platform)
9a372f14 261 return self._download_webpage(
2c53c0eb 262 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
79367a98 263 video_id, 'Downloading %s webpage' % platform)
9a372f14
S
264
265 webpage = dl_webpage('pc')
125cfd78 266
50789175 267 error_msg = self._html_search_regex(
add7d2a0 268 r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
3cb3b600 269 webpage, 'error message', default=None, group='error')
50789175
PH
270 if error_msg:
271 error_msg = re.sub(r'\s+', ' ', error_msg)
272 raise ExtractorError(
273 'PornHub said: %s' % error_msg,
274 expected=True, video_id=video_id)
275
6c376029
S
276 # video_title from flashvars contains whitespace instead of non-ASCII (see
277 # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
278 # on that anymore.
79367a98 279 title = self._html_search_meta(
46cc54ca
S
280 'twitter:title', webpage, default=None) or self._html_search_regex(
281 (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
282 r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
283 r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
6c376029
S
284 webpage, 'title', group='title')
285
79367a98
S
286 video_urls = []
287 video_urls_set = set()
4938c8d5 288 subtitles = {}
79367a98 289
ed8648a3
S
290 flashvars = self._parse_json(
291 self._search_regex(
03442072 292 r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
ed8648a3
S
293 video_id)
294 if flashvars:
4938c8d5
GF
295 subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
296 if subtitle_url:
297 subtitles.setdefault('en', []).append({
298 'url': subtitle_url,
299 'ext': 'srt',
300 })
ed8648a3
S
301 thumbnail = flashvars.get('image_url')
302 duration = int_or_none(flashvars.get('video_duration'))
79367a98
S
303 media_definitions = flashvars.get('mediaDefinitions')
304 if isinstance(media_definitions, list):
305 for definition in media_definitions:
306 if not isinstance(definition, dict):
307 continue
308 video_url = definition.get('videoUrl')
309 if not video_url or not isinstance(video_url, compat_str):
310 continue
311 if video_url in video_urls_set:
312 continue
313 video_urls_set.add(video_url)
314 video_urls.append(
315 (video_url, int_or_none(definition.get('quality'))))
ed8648a3 316 else:
79367a98
S
317 thumbnail, duration = [None] * 2
318
0164cd5d 319 def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
79367a98 320 assignments = self._search_regex(
0164cd5d 321 pattern, webpage, 'encoded url', default=default)
f4134726
S
322 if not assignments:
323 return {}
324
325 assignments = assignments.split(';')
79367a98
S
326
327 js_vars = {}
328
329 def parse_js_value(inp):
330 inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
331 if '+' in inp:
332 inps = inp.split('+')
333 return functools.reduce(
334 operator.concat, map(parse_js_value, inps))
335 inp = inp.strip()
336 if inp in js_vars:
337 return js_vars[inp]
338 return remove_quotes(inp)
339
340 for assn in assignments:
341 assn = assn.strip()
342 if not assn:
343 continue
344 assn = re.sub(r'var\s+', '', assn)
345 vname, value = assn.split('=', 1)
346 js_vars[vname] = parse_js_value(value)
f4134726 347 return js_vars
79367a98 348
f4134726
S
349 def add_video_url(video_url):
350 v_url = url_or_none(video_url)
351 if not v_url:
352 return
353 if v_url in video_urls_set:
354 return
355 video_urls.append((v_url, None))
356 video_urls_set.add(v_url)
357
29f7c58a 358 def parse_quality_items(quality_items):
359 q_items = self._parse_json(quality_items, video_id, fatal=False)
360 if not isinstance(q_items, list):
361 return
362 for item in q_items:
363 if isinstance(item, dict):
364 add_video_url(item.get('url'))
365
f4134726 366 if not video_urls:
29f7c58a 367 FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
f4134726
S
368 js_vars = extract_js_vars(
369 webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
0164cd5d 370 default=None)
f4134726
S
371 if js_vars:
372 for key, format_url in js_vars.items():
29f7c58a 373 if key.startswith(FORMAT_PREFIXES[-1]):
374 parse_quality_items(format_url)
375 elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
f4134726 376 add_video_url(format_url)
0164cd5d
S
377 if not video_urls and re.search(
378 r'<[^>]+\bid=["\']lockedPlayer', webpage):
379 raise ExtractorError(
380 'Video %s is locked' % video_id, expected=True)
f4134726
S
381
382 if not video_urls:
383 js_vars = extract_js_vars(
384 dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
385 add_video_url(js_vars['mediastring'])
79367a98
S
386
387 for mobj in re.finditer(
388 r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
389 webpage):
390 video_url = mobj.group('url')
391 if video_url not in video_urls_set:
392 video_urls.append((video_url, None))
393 video_urls_set.add(video_url)
394
3661ebf2 395 upload_date = None
79367a98
S
396 formats = []
397 for video_url, height in video_urls:
3661ebf2
S
398 if not upload_date:
399 upload_date = self._search_regex(
400 r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
401 if upload_date:
402 upload_date = upload_date.replace('/', '')
f4134726
S
403 ext = determine_ext(video_url)
404 if ext == 'mpd':
b8526c78
S
405 formats.extend(self._extract_mpd_formats(
406 video_url, video_id, mpd_id='dash', fatal=False))
407 continue
f4134726
S
408 elif ext == 'm3u8':
409 formats.extend(self._extract_m3u8_formats(
410 video_url, video_id, 'mp4', entry_protocol='m3u8_native',
411 m3u8_id='hls', fatal=False))
412 continue
79367a98
S
413 tbr = None
414 mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
415 if mobj:
416 if not height:
417 height = int(mobj.group('height'))
418 tbr = int(mobj.group('tbr'))
419 formats.append({
420 'url': video_url,
421 'format_id': '%dp' % height if height else None,
422 'height': height,
423 'tbr': tbr,
424 })
425 self._sort_formats(formats)
ed8648a3 426
0320ddc1 427 video_uploader = self._html_search_regex(
2d4fe594 428 r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
cd85a1bb 429 webpage, 'uploader', default=None)
125cfd78 430
29f7c58a 431 def extract_vote_count(kind, name):
432 return self._extract_count(
433 (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
434 r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
435 webpage, name)
436
7700207e 437 view_count = self._extract_count(
540b9f51 438 r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
29f7c58a 439 like_count = extract_vote_count('Up', 'like')
440 dislike_count = extract_vote_count('Down', 'dislike')
0320ddc1 441 comment_count = self._extract_count(
7700207e 442 r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
0320ddc1 443
5dda1ede 444 def extract_list(meta_key):
d2d970d0 445 div = self._search_regex(
5dda1ede
S
446 r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
447 % meta_key, webpage, meta_key, default=None)
d2d970d0 448 if div:
5dda1ede 449 return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
6bb05b32 450
cd85a1bb
S
451 info = self._search_json_ld(webpage, video_id, default={})
452 # description provided in JSON-LD is irrelevant
453 info['description'] = None
454
455 return merge_dicts({
125cfd78 456 'id': video_id,
457 'uploader': video_uploader,
3661ebf2 458 'upload_date': upload_date,
6c376029 459 'title': title,
125cfd78 460 'thumbnail': thumbnail,
ed8648a3 461 'duration': duration,
0320ddc1
S
462 'view_count': view_count,
463 'like_count': like_count,
464 'dislike_count': dislike_count,
465 'comment_count': comment_count,
79367a98 466 'formats': formats,
750e9833 467 'age_limit': 18,
5dda1ede
S
468 'tags': extract_list('tags'),
469 'categories': extract_list('categories'),
4938c8d5 470 'subtitles': subtitles,
cd85a1bb 471 }, info)
e66e1a00
S
472
473
71a1f617 474class PornHubPlaylistBaseIE(PornHubBaseIE):
2181983a 475 def _extract_page(self, url):
476 return int_or_none(self._search_regex(
477 r'\bpage=(\d+)', url, 'page', default=None))
478
3430ff9b 479 def _extract_entries(self, webpage, host):
475bcb22
S
480 # Only process container div with main playlist content skipping
481 # drop-down menu that uses similar pattern for videos (see
067aa17e 482 # https://github.com/ytdl-org/youtube-dl/issues/11594).
475bcb22
S
483 container = self._search_regex(
484 r'(?s)(<div[^>]+class=["\']container.+)', webpage,
485 'container', default=webpage)
486
40e146aa 487 return [
3a23bae9 488 self.url_result(
3430ff9b 489 'http://www.%s/%s' % (host, video_url),
3a23bae9
S
490 PornHubIE.ie_key(), video_title=title)
491 for video_url, title in orderedSet(re.findall(
492 r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
475bcb22 493 container))
40e146aa 494 ]
e66e1a00 495
40e146aa 496
21b08463 497class PornHubUserIE(PornHubPlaylistBaseIE):
29f7c58a 498 _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
40e146aa 499 _TESTS = [{
21b08463
S
500 'url': 'https://www.pornhub.com/model/zoe_ph',
501 'playlist_mincount': 118,
502 }, {
503 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
40e146aa 504 'info_dict': {
21b08463 505 'id': 'liz-vicious',
40e146aa 506 },
21b08463
S
507 'playlist_mincount': 118,
508 }, {
509 'url': 'https://www.pornhub.com/users/russianveet69',
9634de17 510 'only_matching': True,
21b08463
S
511 }, {
512 'url': 'https://www.pornhub.com/channels/povd',
9634de17
S
513 'only_matching': True,
514 }, {
515 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
516 'only_matching': True,
2181983a 517 }, {
518 # Unavailable via /videos page, but available with direct pagination
519 # on pornstar page (see [1]), requires premium
520 # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
521 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
522 'only_matching': True,
523 }, {
524 # Same as before, multi page
525 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
526 'only_matching': True,
21b08463
S
527 }]
528
21b08463
S
529 def _real_extract(self, url):
530 mobj = re.match(self._VALID_URL, url)
531 user_id = mobj.group('id')
2181983a 532 videos_url = '%s/videos' % mobj.group('url')
533 page = self._extract_page(url)
534 if page:
535 videos_url = update_url_query(videos_url, {'page': page})
21b08463 536 return self.url_result(
2181983a 537 videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
21b08463
S
538
539
540class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
4bf568d3
S
541 @staticmethod
542 def _has_more(webpage):
543 return re.search(
544 r'''(?x)
545 <li[^>]+\bclass=["\']page_next|
546 <link[^>]+\brel=["\']next|
547 <button[^>]+\bid=["\']moreDataBtn
548 ''', webpage) is not None
549
2181983a 550 def _entries(self, url, host, item_id):
551 page = self._extract_page(url)
21b08463 552
2181983a 553 VIDEOS = '/videos'
554
555 def download_page(base_url, num, fallback=False):
556 note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
557 return self._download_webpage(
558 base_url, item_id, note, query={'page': num})
1f7a563a 559
2181983a 560 def is_404(e):
561 return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
562
563 base_url = url
564 has_page = page is not None
565 first_page = page if has_page else 1
566 for page_num in (first_page, ) if has_page else itertools.count(first_page):
21b08463 567 try:
2181983a 568 try:
569 webpage = download_page(base_url, page_num)
570 except ExtractorError as e:
571 # Some sources may not be available via /videos page,
572 # trying to fallback to main page pagination (see [1])
573 # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
574 if is_404(e) and page_num == first_page and VIDEOS in base_url:
575 base_url = base_url.replace(VIDEOS, '')
576 webpage = download_page(base_url, page_num, fallback=True)
577 else:
578 raise
21b08463 579 except ExtractorError as e:
2181983a 580 if is_404(e) and page_num != first_page:
21b08463
S
581 break
582 raise
583 page_entries = self._extract_entries(webpage, host)
584 if not page_entries:
585 break
2181983a 586 for e in page_entries:
587 yield e
21b08463
S
588 if not self._has_more(webpage):
589 break
590
2181983a 591 def _real_extract(self, url):
592 mobj = re.match(self._VALID_URL, url)
593 host = mobj.group('host')
594 item_id = mobj.group('id')
595
596 self._login(host)
597
598 return self.playlist_result(self._entries(url, host, item_id), item_id)
21b08463
S
599
600
9634de17 601class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
29f7c58a 602 _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
21b08463 603 _TESTS = [{
1f7a563a 604 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
21b08463 605 'only_matching': True,
34541395
S
606 }, {
607 'url': 'http://www.pornhub.com/users/rushandlia/videos',
608 'only_matching': True,
21b08463
S
609 }, {
610 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
611 'info_dict': {
9634de17 612 'id': 'pornstar/jenny-blighe/videos',
21b08463
S
613 },
614 'playlist_mincount': 149,
1f7a563a
S
615 }, {
616 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
617 'info_dict': {
9634de17 618 'id': 'pornstar/jenny-blighe/videos',
1f7a563a
S
619 },
620 'playlist_mincount': 40,
f66df20c
PV
621 }, {
622 # default sorting as Top Rated Videos
623 'url': 'https://www.pornhub.com/channels/povd/videos',
624 'info_dict': {
9634de17 625 'id': 'channels/povd/videos',
f66df20c
PV
626 },
627 'playlist_mincount': 293,
628 }, {
629 # Top Rated Videos
630 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
631 'only_matching': True,
632 }, {
633 # Most Recent Videos
634 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
635 'only_matching': True,
636 }, {
637 # Most Viewed Videos
638 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
639 'only_matching': True,
92ded33a
S
640 }, {
641 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
642 'only_matching': True,
21b08463
S
643 }, {
644 # Most Viewed Videos
645 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
646 'only_matching': True,
647 }, {
648 # Top Rated Videos
649 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
650 'only_matching': True,
651 }, {
652 # Longest Videos
653 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
654 'only_matching': True,
655 }, {
656 # Newest Videos
657 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
658 'only_matching': True,
21b08463
S
659 }, {
660 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
661 'only_matching': True,
662 }, {
663 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
664 'only_matching': True,
9634de17
S
665 }, {
666 'url': 'https://www.pornhub.com/video',
667 'only_matching': True,
668 }, {
669 'url': 'https://www.pornhub.com/video?page=3',
670 'only_matching': True,
671 }, {
672 'url': 'https://www.pornhub.com/video/search?search=123',
673 'only_matching': True,
674 }, {
675 'url': 'https://www.pornhub.com/categories/teen',
676 'only_matching': True,
677 }, {
678 'url': 'https://www.pornhub.com/categories/teen?page=3',
679 'only_matching': True,
680 }, {
681 'url': 'https://www.pornhub.com/hd',
682 'only_matching': True,
683 }, {
684 'url': 'https://www.pornhub.com/hd?page=3',
685 'only_matching': True,
686 }, {
687 'url': 'https://www.pornhub.com/described-video',
688 'only_matching': True,
689 }, {
690 'url': 'https://www.pornhub.com/described-video?page=2',
691 'only_matching': True,
692 }, {
693 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
694 'only_matching': True,
695 }, {
696 'url': 'https://www.pornhub.com/playlist/44121572',
697 'info_dict': {
698 'id': 'playlist/44121572',
699 },
700 'playlist_mincount': 132,
701 }, {
702 'url': 'https://www.pornhub.com/playlist/4667351',
703 'only_matching': True,
704 }, {
705 'url': 'https://de.pornhub.com/playlist/4667351',
706 'only_matching': True,
40e146aa
S
707 }]
708
21b08463
S
709 @classmethod
710 def suitable(cls, url):
711 return (False
9634de17
S
712 if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
713 else super(PornHubPagedVideoListIE, cls).suitable(url))
21b08463 714
34541395 715
21b08463 716class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
29f7c58a 717 _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
21b08463
S
718 _TESTS = [{
719 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
720 'info_dict': {
721 'id': 'jenny-blighe',
722 },
723 'playlist_mincount': 129,
724 }, {
725 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
726 'only_matching': True,
727 }]