]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/pornhub.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / pornhub.py
index 5d8d7c100a4d40109f4f682c401ddec35a865619..d94f28ceb176701305f00f159a1c3734ddc8df9f 100644 (file)
@@ -3,11 +3,12 @@
 import math
 import operator
 import re
-import urllib.request
 
 from .common import InfoExtractor
 from .openload import PhantomJSwrapper
-from ..compat import compat_HTTPError, compat_str
+from ..compat import compat_str
+from ..networking import Request
+from ..networking.exceptions import HTTPError
 from ..utils import (
     NO_DEFAULT,
     ExtractorError,
@@ -46,8 +47,8 @@ def dl(*args, **kwargs):
                 r'document\.cookie\s*=\s*["\']RNKEY=',
                 r'document\.location\.reload\(true\)')):
             url_or_request = args[0]
-            url = (url_or_request.get_full_url()
-                   if isinstance(url_or_request, urllib.request.Request)
+            url = (url_or_request.url
+                   if isinstance(url_or_request, Request)
                    else url_or_request)
             phantom = PhantomJSwrapper(self, required_version='2.0')
             phantom.get(url, html=webpage)
@@ -58,6 +59,12 @@ def dl(*args, **kwargs):
     def _real_initialize(self):
         self._logged_in = False
 
+    def _set_age_cookies(self, host):
+        self._set_cookie(host, 'age_verified', '1')
+        self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
+        self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
+        self._set_cookie(host, 'accessPH', '1')
+
     def _login(self, host):
         if self._logged_in:
             return
@@ -80,8 +87,8 @@ def _login(self, host):
 
         def is_logged(webpage):
             return any(re.search(p, webpage) for p in (
-                r'class=["\']signOut',
-                r'>Sign\s+[Oo]ut\s*<'))
+                r'id="profileMenuDropdown"',
+                r'class="ph-icon-logout"'))
 
         if is_logged(login_page):
             self._logged_in = True
@@ -90,7 +97,7 @@ def is_logged(webpage):
         login_form = self._hidden_inputs(login_page)
 
         login_form.update({
-            'username': username,
+            'email': username,
             'password': password,
         })
 
@@ -267,8 +274,7 @@ def _real_extract(self, url):
         video_id = mobj.group('id')
 
         self._login(host)
-
-        self._set_cookie(host, 'age_verified', '1')
+        self._set_age_cookies(host)
 
         def dl_webpage(platform):
             self._set_cookie(host, 'platform', platform)
@@ -569,6 +575,7 @@ def _real_extract(self, url):
         mobj = self._match_valid_url(url)
         user_id = mobj.group('id')
         videos_url = '%s/videos' % mobj.group('url')
+        self._set_age_cookies(mobj.group('host'))
         page = self._extract_page(url)
         if page:
             videos_url = update_url_query(videos_url, {'page': page})
@@ -597,7 +604,7 @@ def download_page(base_url, num, fallback=False):
                 base_url, item_id, note, query={'page': num})
 
         def is_404(e):
-            return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
+            return isinstance(e.cause, HTTPError) and e.cause.status == 404
 
         base_url = url
         has_page = page is not None
@@ -633,6 +640,7 @@ def _real_extract(self, url):
         item_id = mobj.group('id')
 
         self._login(host)
+        self._set_age_cookies(host)
 
         return self.playlist_result(self._entries(url, host, item_id), item_id)
 
@@ -812,5 +820,6 @@ def _real_extract(self, url):
         item_id = mobj.group('id')
 
         self._login(host)
+        self._set_age_cookies(host)
 
         return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)