]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/pornhub.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / pornhub.py
1 import functools
2 import itertools
3 import math
4 import operator
5 import re
6
7 from .common import InfoExtractor
8 from .openload import PhantomJSwrapper
9 from ..networking import Request
10 from ..networking.exceptions import HTTPError
11 from ..utils import (
12 NO_DEFAULT,
13 ExtractorError,
14 clean_html,
15 determine_ext,
16 format_field,
17 int_or_none,
18 merge_dicts,
19 orderedSet,
20 remove_quotes,
21 remove_start,
22 str_to_int,
23 update_url_query,
24 url_or_none,
25 urlencode_postdata,
26 )
27
28
29 class PornHubBaseIE(InfoExtractor):
30 _NETRC_MACHINE = 'pornhub'
31 _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
32
33 def _download_webpage_handle(self, *args, **kwargs):
34 def dl(*args, **kwargs):
35 return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
36
37 ret = dl(*args, **kwargs)
38
39 if not ret:
40 return ret
41
42 webpage, urlh = ret
43
44 if any(re.search(p, webpage) for p in (
45 r'<body\b[^>]+\bonload=["\']go\(\)',
46 r'document\.cookie\s*=\s*["\']RNKEY=',
47 r'document\.location\.reload\(true\)')):
48 url_or_request = args[0]
49 url = (url_or_request.url
50 if isinstance(url_or_request, Request)
51 else url_or_request)
52 phantom = PhantomJSwrapper(self, required_version='2.0')
53 phantom.get(url, html=webpage)
54 webpage, urlh = dl(*args, **kwargs)
55
56 return webpage, urlh
57
58 def _real_initialize(self):
59 self._logged_in = False
60
61 def _set_age_cookies(self, host):
62 self._set_cookie(host, 'age_verified', '1')
63 self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
64 self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
65 self._set_cookie(host, 'accessPH', '1')
66
67 def _login(self, host):
68 if self._logged_in:
69 return
70
71 site = host.split('.')[0]
72
73 # Both sites pornhub and pornhubpremium have separate accounts
74 # so there should be an option to provide credentials for both.
75 # At the same time some videos are available under the same video id
76 # on both sites so that we have to identify them as the same video.
77 # For that purpose we have to keep both in the same extractor
78 # but under different netrc machines.
79 username, password = self._get_login_info(netrc_machine=site)
80 if username is None:
81 return
82
83 login_url = 'https://www.{}/{}login'.format(host, 'premium/' if 'premium' in host else '')
84 login_page = self._download_webpage(
85 login_url, None, f'Downloading {site} login page')
86
87 def is_logged(webpage):
88 return any(re.search(p, webpage) for p in (
89 r'id="profileMenuDropdown"',
90 r'class="ph-icon-logout"'))
91
92 if is_logged(login_page):
93 self._logged_in = True
94 return
95
96 login_form = self._hidden_inputs(login_page)
97
98 login_form.update({
99 'email': username,
100 'password': password,
101 })
102
103 response = self._download_json(
104 f'https://www.{host}/front/authenticate', None,
105 f'Logging in to {site}',
106 data=urlencode_postdata(login_form),
107 headers={
108 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
109 'Referer': login_url,
110 'X-Requested-With': 'XMLHttpRequest',
111 })
112
113 if response.get('success') == '1':
114 self._logged_in = True
115 return
116
117 message = response.get('message')
118 if message is not None:
119 raise ExtractorError(
120 f'Unable to login: {message}', expected=True)
121
122 raise ExtractorError('Unable to log in')
123
124
125 class PornHubIE(PornHubBaseIE):
126 IE_DESC = 'PornHub and Thumbzilla'
127 _VALID_URL = rf'''(?x)
128 https?://
129 (?:
130 (?:[^/]+\.)?
131 {PornHubBaseIE._PORNHUB_HOST_RE}
132 /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
133 (?:www\.)?thumbzilla\.com/video/
134 )
135 (?P<id>[\da-z]+)
136 '''
137 _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
138 _TESTS = [{
139 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
140 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
141 'info_dict': {
142 'id': '648719015',
143 'ext': 'mp4',
144 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
145 'uploader': 'Babes',
146 'upload_date': '20130628',
147 'timestamp': 1372447216,
148 'duration': 361,
149 'view_count': int,
150 'like_count': int,
151 'dislike_count': int,
152 'comment_count': int,
153 'age_limit': 18,
154 'tags': list,
155 'categories': list,
156 'cast': list,
157 },
158 }, {
159 # non-ASCII title
160 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
161 'info_dict': {
162 'id': '1331683002',
163 'ext': 'mp4',
164 'title': '重庆婷婷女王足交',
165 'upload_date': '20150213',
166 'timestamp': 1423804862,
167 'duration': 1753,
168 'view_count': int,
169 'like_count': int,
170 'dislike_count': int,
171 'comment_count': int,
172 'age_limit': 18,
173 'tags': list,
174 'categories': list,
175 },
176 'params': {
177 'skip_download': True,
178 },
179 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
180 }, {
181 # subtitles
182 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
183 'info_dict': {
184 'id': 'ph5af5fef7c2aa7',
185 'ext': 'mp4',
186 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
187 'uploader': 'BFFs',
188 'duration': 622,
189 'view_count': int,
190 'like_count': int,
191 'dislike_count': int,
192 'comment_count': int,
193 'age_limit': 18,
194 'tags': list,
195 'categories': list,
196 'subtitles': {
197 'en': [{
198 'ext': 'srt',
199 }],
200 },
201 },
202 'params': {
203 'skip_download': True,
204 },
205 'skip': 'This video has been disabled',
206 }, {
207 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
208 'info_dict': {
209 'id': 'ph601dc30bae19a',
210 'uploader': 'Projekt Melody',
211 'uploader_id': 'projekt-melody',
212 'upload_date': '20210205',
213 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
214 'thumbnail': r're:https?://.+',
215 },
216 }, {
217 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
218 'only_matching': True,
219 }, {
220 # removed at the request of cam4.com
221 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
222 'only_matching': True,
223 }, {
224 # removed at the request of the copyright owner
225 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
226 'only_matching': True,
227 }, {
228 # removed by uploader
229 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
230 'only_matching': True,
231 }, {
232 # private video
233 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
234 'only_matching': True,
235 }, {
236 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
237 'only_matching': True,
238 }, {
239 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
240 'only_matching': True,
241 }, {
242 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
243 'only_matching': True,
244 }, {
245 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
246 'only_matching': True,
247 }, {
248 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
249 'only_matching': True,
250 }, {
251 # Some videos are available with the same id on both premium
252 # and non-premium sites (e.g. this and the following test)
253 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
254 'only_matching': True,
255 }, {
256 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
257 'only_matching': True,
258 }, {
259 # geo restricted
260 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
261 'only_matching': True,
262 }, {
263 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156',
264 'only_matching': True,
265 }]
266
267 def _extract_count(self, pattern, webpage, name):
268 return str_to_int(self._search_regex(pattern, webpage, f'{name} count', default=None))
269
270 def _real_extract(self, url):
271 mobj = self._match_valid_url(url)
272 host = mobj.group('host') or 'pornhub.com'
273 video_id = mobj.group('id')
274
275 self._login(host)
276 self._set_age_cookies(host)
277
278 def dl_webpage(platform):
279 self._set_cookie(host, 'platform', platform)
280 return self._download_webpage(
281 f'https://www.{host}/view_video.php?viewkey={video_id}',
282 video_id, f'Downloading {platform} webpage')
283
284 webpage = dl_webpage('pc')
285
286 error_msg = self._html_search_regex(
287 (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
288 r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
289 webpage, 'error message', default=None, group='error')
290 if error_msg:
291 error_msg = re.sub(r'\s+', ' ', error_msg)
292 raise ExtractorError(
293 f'PornHub said: {error_msg}',
294 expected=True, video_id=video_id)
295
296 if any(re.search(p, webpage) for p in (
297 r'class=["\']geoBlocked["\']',
298 r'>\s*This content is unavailable in your country')):
299 self.raise_geo_restricted()
300
301 # video_title from flashvars contains whitespace instead of non-ASCII (see
302 # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
303 # on that anymore.
304 title = self._html_search_meta(
305 'twitter:title', webpage, default=None) or self._html_search_regex(
306 (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
307 r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
308 r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
309 webpage, 'title', group='title')
310
311 video_urls = []
312 video_urls_set = set()
313 subtitles = {}
314
315 flashvars = self._parse_json(
316 self._search_regex(
317 r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
318 video_id)
319 if flashvars:
320 subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
321 if subtitle_url:
322 subtitles.setdefault('en', []).append({
323 'url': subtitle_url,
324 'ext': 'srt',
325 })
326 thumbnail = flashvars.get('image_url')
327 duration = int_or_none(flashvars.get('video_duration'))
328 media_definitions = flashvars.get('mediaDefinitions')
329 if isinstance(media_definitions, list):
330 for definition in media_definitions:
331 if not isinstance(definition, dict):
332 continue
333 video_url = definition.get('videoUrl')
334 if not video_url or not isinstance(video_url, str):
335 continue
336 if video_url in video_urls_set:
337 continue
338 video_urls_set.add(video_url)
339 video_urls.append(
340 (video_url, int_or_none(definition.get('quality'))))
341 else:
342 thumbnail, duration = [None] * 2
343
344 def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
345 assignments = self._search_regex(
346 pattern, webpage, 'encoded url', default=default)
347 if not assignments:
348 return {}
349
350 assignments = assignments.split(';')
351
352 js_vars = {}
353
354 def parse_js_value(inp):
355 inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
356 if '+' in inp:
357 inps = inp.split('+')
358 return functools.reduce(
359 operator.concat, map(parse_js_value, inps))
360 inp = inp.strip()
361 if inp in js_vars:
362 return js_vars[inp]
363 return remove_quotes(inp)
364
365 for assn in assignments:
366 assn = assn.strip()
367 if not assn:
368 continue
369 assn = re.sub(r'var\s+', '', assn)
370 vname, value = assn.split('=', 1)
371 js_vars[vname] = parse_js_value(value)
372 return js_vars
373
374 def add_video_url(video_url):
375 v_url = url_or_none(video_url)
376 if not v_url:
377 return
378 if v_url in video_urls_set:
379 return
380 video_urls.append((v_url, None))
381 video_urls_set.add(v_url)
382
383 def parse_quality_items(quality_items):
384 q_items = self._parse_json(quality_items, video_id, fatal=False)
385 if not isinstance(q_items, list):
386 return
387 for item in q_items:
388 if isinstance(item, dict):
389 add_video_url(item.get('url'))
390
391 if not video_urls:
392 FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
393 js_vars = extract_js_vars(
394 webpage, r'(var\s+(?:{})_.+)'.format('|'.join(FORMAT_PREFIXES)),
395 default=None)
396 if js_vars:
397 for key, format_url in js_vars.items():
398 if key.startswith(FORMAT_PREFIXES[-1]):
399 parse_quality_items(format_url)
400 elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
401 add_video_url(format_url)
402 if not video_urls and re.search(
403 r'<[^>]+\bid=["\']lockedPlayer', webpage):
404 raise ExtractorError(
405 f'Video {video_id} is locked', expected=True)
406
407 if not video_urls:
408 js_vars = extract_js_vars(
409 dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
410 add_video_url(js_vars['mediastring'])
411
412 for mobj in re.finditer(
413 r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
414 webpage):
415 video_url = mobj.group('url')
416 if video_url not in video_urls_set:
417 video_urls.append((video_url, None))
418 video_urls_set.add(video_url)
419
420 upload_date = None
421 formats = []
422
423 def add_format(format_url, height=None):
424 ext = determine_ext(format_url)
425 if ext == 'mpd':
426 formats.extend(self._extract_mpd_formats(
427 format_url, video_id, mpd_id='dash', fatal=False))
428 return
429 if ext == 'm3u8':
430 formats.extend(self._extract_m3u8_formats(
431 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
432 m3u8_id='hls', fatal=False))
433 return
434 if not height:
435 height = int_or_none(self._search_regex(
436 r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
437 default=None))
438 formats.append({
439 'url': format_url,
440 'format_id': format_field(height, None, '%dp'),
441 'height': height,
442 })
443
444 for video_url, height in video_urls:
445 if not upload_date:
446 upload_date = self._search_regex(
447 r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
448 if upload_date:
449 upload_date = upload_date.replace('/', '')
450 if '/video/get_media' in video_url:
451 medias = self._download_json(video_url, video_id, fatal=False)
452 if isinstance(medias, list):
453 for media in medias:
454 if not isinstance(media, dict):
455 continue
456 video_url = url_or_none(media.get('videoUrl'))
457 if not video_url:
458 continue
459 height = int_or_none(media.get('quality'))
460 add_format(video_url, height)
461 continue
462 add_format(video_url)
463
464 model_profile = self._search_json(
465 r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
466 video_uploader = self._html_search_regex(
467 r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
468 webpage, 'uploader', default=None) or model_profile.get('username')
469
470 def extract_vote_count(kind, name):
471 return self._extract_count(
472 (rf'<span[^>]+\bclass="votes{kind}"[^>]*>([\d,\.]+)</span>',
473 rf'<span[^>]+\bclass=["\']votes{kind}["\'][^>]*\bdata-rating=["\'](\d+)'),
474 webpage, name)
475
476 view_count = self._extract_count(
477 r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
478 like_count = extract_vote_count('Up', 'like')
479 dislike_count = extract_vote_count('Down', 'dislike')
480 comment_count = self._extract_count(
481 r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
482
483 def extract_list(meta_key):
484 div = self._search_regex(
485 rf'(?s)<div[^>]+\bclass=["\'].*?\b{meta_key}Wrapper[^>]*>(.+?)</div>',
486 webpage, meta_key, default=None)
487 if div:
488 return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
489
490 info = self._search_json_ld(webpage, video_id, default={})
491 # description provided in JSON-LD is irrelevant
492 info['description'] = None
493
494 return merge_dicts({
495 'id': video_id,
496 'uploader': video_uploader,
497 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'),
498 'upload_date': upload_date,
499 'title': title,
500 'thumbnail': thumbnail,
501 'duration': duration,
502 'view_count': view_count,
503 'like_count': like_count,
504 'dislike_count': dislike_count,
505 'comment_count': comment_count,
506 'formats': formats,
507 'age_limit': 18,
508 'tags': extract_list('tags'),
509 'categories': extract_list('categories'),
510 'cast': extract_list('pornstars'),
511 'subtitles': subtitles,
512 }, info)
513
514
515 class PornHubPlaylistBaseIE(PornHubBaseIE):
516 def _extract_page(self, url):
517 return int_or_none(self._search_regex(
518 r'\bpage=(\d+)', url, 'page', default=None))
519
520 def _extract_entries(self, webpage, host):
521 # Only process container div with main playlist content skipping
522 # drop-down menu that uses similar pattern for videos (see
523 # https://github.com/ytdl-org/youtube-dl/issues/11594).
524 container = self._search_regex(
525 r'(?s)(<div[^>]+class=["\']container.+)', webpage,
526 'container', default=webpage)
527
528 return [
529 self.url_result(
530 f'http://www.{host}/{video_url}',
531 PornHubIE.ie_key(), video_title=title)
532 for video_url, title in orderedSet(re.findall(
533 r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
534 container))
535 ]
536
537
538 class PornHubUserIE(PornHubPlaylistBaseIE):
539 _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
540 _TESTS = [{
541 'url': 'https://www.pornhub.com/model/zoe_ph',
542 'playlist_mincount': 118,
543 }, {
544 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
545 'info_dict': {
546 'id': 'liz-vicious',
547 },
548 'playlist_mincount': 118,
549 }, {
550 'url': 'https://www.pornhub.com/users/russianveet69',
551 'only_matching': True,
552 }, {
553 'url': 'https://www.pornhub.com/channels/povd',
554 'only_matching': True,
555 }, {
556 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
557 'only_matching': True,
558 }, {
559 # Unavailable via /videos page, but available with direct pagination
560 # on pornstar page (see [1]), requires premium
561 # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
562 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
563 'only_matching': True,
564 }, {
565 # Same as before, multi page
566 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
567 'only_matching': True,
568 }, {
569 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph',
570 'only_matching': True,
571 }]
572
573 def _real_extract(self, url):
574 mobj = self._match_valid_url(url)
575 user_id = mobj.group('id')
576 videos_url = '{}/videos'.format(mobj.group('url'))
577 self._set_age_cookies(mobj.group('host'))
578 page = self._extract_page(url)
579 if page:
580 videos_url = update_url_query(videos_url, {'page': page})
581 return self.url_result(
582 videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
583
584
585 class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
586 @staticmethod
587 def _has_more(webpage):
588 return re.search(
589 r'''(?x)
590 <li[^>]+\bclass=["\']page_next|
591 <link[^>]+\brel=["\']next|
592 <button[^>]+\bid=["\']moreDataBtn
593 ''', webpage) is not None
594
595 def _entries(self, url, host, item_id):
596 page = self._extract_page(url)
597
598 VIDEOS = '/videos'
599
600 def download_page(base_url, num, fallback=False):
601 note = 'Downloading page {}{}'.format(num, ' (switch to fallback)' if fallback else '')
602 return self._download_webpage(
603 base_url, item_id, note, query={'page': num})
604
605 def is_404(e):
606 return isinstance(e.cause, HTTPError) and e.cause.status == 404
607
608 base_url = url
609 has_page = page is not None
610 first_page = page if has_page else 1
611 for page_num in (first_page, ) if has_page else itertools.count(first_page):
612 try:
613 try:
614 webpage = download_page(base_url, page_num)
615 except ExtractorError as e:
616 # Some sources may not be available via /videos page,
617 # trying to fallback to main page pagination (see [1])
618 # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
619 if is_404(e) and page_num == first_page and VIDEOS in base_url:
620 base_url = base_url.replace(VIDEOS, '')
621 webpage = download_page(base_url, page_num, fallback=True)
622 else:
623 raise
624 except ExtractorError as e:
625 if is_404(e) and page_num != first_page:
626 break
627 raise
628 page_entries = self._extract_entries(webpage, host)
629 if not page_entries:
630 break
631 for e in page_entries:
632 yield e
633 if not self._has_more(webpage):
634 break
635
636 def _real_extract(self, url):
637 mobj = self._match_valid_url(url)
638 host = mobj.group('host')
639 item_id = mobj.group('id')
640
641 self._login(host)
642 self._set_age_cookies(host)
643
644 return self.playlist_result(self._entries(url, host, item_id), item_id)
645
646
647 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
648 _VALID_URL = rf'https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)'
649 _TESTS = [{
650 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
651 'only_matching': True,
652 }, {
653 'url': 'http://www.pornhub.com/users/rushandlia/videos',
654 'only_matching': True,
655 }, {
656 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
657 'info_dict': {
658 'id': 'pornstar/jenny-blighe/videos',
659 },
660 'playlist_mincount': 149,
661 }, {
662 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
663 'info_dict': {
664 'id': 'pornstar/jenny-blighe/videos',
665 },
666 'playlist_mincount': 40,
667 }, {
668 # default sorting as Top Rated Videos
669 'url': 'https://www.pornhub.com/channels/povd/videos',
670 'info_dict': {
671 'id': 'channels/povd/videos',
672 },
673 'playlist_mincount': 293,
674 }, {
675 # Top Rated Videos
676 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
677 'only_matching': True,
678 }, {
679 # Most Recent Videos
680 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
681 'only_matching': True,
682 }, {
683 # Most Viewed Videos
684 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
685 'only_matching': True,
686 }, {
687 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
688 'only_matching': True,
689 }, {
690 # Most Viewed Videos
691 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
692 'only_matching': True,
693 }, {
694 # Top Rated Videos
695 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
696 'only_matching': True,
697 }, {
698 # Longest Videos
699 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
700 'only_matching': True,
701 }, {
702 # Newest Videos
703 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
704 'only_matching': True,
705 }, {
706 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
707 'only_matching': True,
708 }, {
709 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
710 'only_matching': True,
711 }, {
712 'url': 'https://www.pornhub.com/video',
713 'only_matching': True,
714 }, {
715 'url': 'https://www.pornhub.com/video?page=3',
716 'only_matching': True,
717 }, {
718 'url': 'https://www.pornhub.com/video/search?search=123',
719 'only_matching': True,
720 }, {
721 'url': 'https://www.pornhub.com/categories/teen',
722 'only_matching': True,
723 }, {
724 'url': 'https://www.pornhub.com/categories/teen?page=3',
725 'only_matching': True,
726 }, {
727 'url': 'https://www.pornhub.com/hd',
728 'only_matching': True,
729 }, {
730 'url': 'https://www.pornhub.com/hd?page=3',
731 'only_matching': True,
732 }, {
733 'url': 'https://www.pornhub.com/described-video',
734 'only_matching': True,
735 }, {
736 'url': 'https://www.pornhub.com/described-video?page=2',
737 'only_matching': True,
738 }, {
739 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
740 'only_matching': True,
741 }, {
742 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos',
743 'only_matching': True,
744 }]
745
746 @classmethod
747 def suitable(cls, url):
748 return (False
749 if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
750 else super().suitable(url))
751
752
753 class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
754 _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
755 _TESTS = [{
756 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
757 'info_dict': {
758 'id': 'jenny-blighe',
759 },
760 'playlist_mincount': 129,
761 }, {
762 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
763 'only_matching': True,
764 }, {
765 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload',
766 'only_matching': True,
767 }]
768
769
770 class PornHubPlaylistIE(PornHubPlaylistBaseIE):
771 _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/playlist/(?P<id>[^/?#&]+))'
772 _TESTS = [{
773 'url': 'https://www.pornhub.com/playlist/44121572',
774 'info_dict': {
775 'id': '44121572',
776 },
777 'playlist_count': 77,
778 }, {
779 'url': 'https://www.pornhub.com/playlist/4667351',
780 'only_matching': True,
781 }, {
782 'url': 'https://de.pornhub.com/playlist/4667351',
783 'only_matching': True,
784 }, {
785 'url': 'https://de.pornhub.com/playlist/4667351?page=2',
786 'only_matching': True,
787 }]
788
789 def _entries(self, url, host, item_id):
790 webpage = self._download_webpage(url, item_id, 'Downloading page 1')
791 playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id')
792 video_count = int_or_none(
793 self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count'))
794 token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token')
795 page_count = math.ceil((video_count - 36) / 40.) + 1
796 page_entries = self._extract_entries(webpage, host)
797
798 def download_page(page_num):
799 note = f'Downloading page {page_num}'
800 page_url = f'https://www.{host}/playlist/viewChunked'
801 return self._download_webpage(page_url, item_id, note, query={
802 'id': playlist_id,
803 'page': page_num,
804 'token': token,
805 })
806
807 for page_num in range(1, page_count + 1):
808 if page_num > 1:
809 webpage = download_page(page_num)
810 page_entries = self._extract_entries(webpage, host)
811 if not page_entries:
812 break
813 yield from page_entries
814
815 def _real_extract(self, url):
816 mobj = self._match_valid_url(url)
817 host = mobj.group('host')
818 item_id = mobj.group('id')
819
820 self._login(host)
821 self._set_age_cookies(host)
822
823 return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)