[ie/crunchyroll] Fix stream extraction (#10005)

[yt-dlp.git] / yt_dlp / extractor / pornhub.py
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py

index 023b5f3b9701738e7790cf681c242056ed67cf63..d94f28ceb176701305f00f159a1c3734ddc8df9f 100644 (file)
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -5,27 +5,25 @@
  import re
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-    compat_urllib_request,
-)
  from .openload import PhantomJSwrapper
+from ..compat import compat_str
+from ..networking import Request
+from ..networking.exceptions import HTTPError
  from ..utils import (
+    NO_DEFAULT,
+    ExtractorError,
      clean_html,
      determine_ext,
-    ExtractorError,
      format_field,
      int_or_none,
      merge_dicts,
-    NO_DEFAULT,
      orderedSet,
      remove_quotes,
      remove_start,
      str_to_int,
      update_url_query,
-    urlencode_postdata,
      url_or_none,
+    urlencode_postdata,
  )
  
  
@@ -49,8 +47,8 @@ def dl(*args, **kwargs):
                  r'document\.cookie\s*=\s*["\']RNKEY=',
                  r'document\.location\.reload\(true\)')):
              url_or_request = args[0]
-            url = (url_or_request.get_full_url()
-                   if isinstance(url_or_request, compat_urllib_request.Request)
+            url = (url_or_request.url
+                   if isinstance(url_or_request, Request)
                     else url_or_request)
              phantom = PhantomJSwrapper(self, required_version='2.0')
              phantom.get(url, html=webpage)
@@ -61,6 +59,12 @@ def dl(*args, **kwargs):
      def _real_initialize(self):
          self._logged_in = False
  
+    def _set_age_cookies(self, host):
+        self._set_cookie(host, 'age_verified', '1')
+        self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
+        self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
+        self._set_cookie(host, 'accessPH', '1')
+
      def _login(self, host):
          if self._logged_in:
              return
@@ -83,8 +87,8 @@ def _login(self, host):
  
          def is_logged(webpage):
              return any(re.search(p, webpage) for p in (
-                r'class=["\']signOut',
-                r'>Sign\s+[Oo]ut\s*<'))
+                r'id="profileMenuDropdown"',
+                r'class="ph-icon-logout"'))
  
          if is_logged(login_page):
              self._logged_in = True
@@ -93,7 +97,7 @@ def is_logged(webpage):
          login_form = self._hidden_inputs(login_page)
  
          login_form.update({
-            'username': username,
+            'email': username,
              'password': password,
          })
  
@@ -131,6 +135,7 @@ class PornHubIE(PornHubBaseIE):
                          )
                          (?P<id>[\da-z]+)
                      ''' % PornHubBaseIE._PORNHUB_HOST_RE
+    _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
      _TESTS = [{
          'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
          'md5': 'a6391306d050e4547f62b3f485dd9ba9',
@@ -260,12 +265,6 @@ class PornHubIE(PornHubBaseIE):
          'only_matching': True,
      }]
  
-    @staticmethod
-    def _extract_urls(webpage):
-        return re.findall(
-            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
-            webpage)
-
      def _extract_count(self, pattern, webpage, name):
          return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None))
  
@@ -275,8 +274,7 @@ def _real_extract(self, url):
          video_id = mobj.group('id')
  
          self._login(host)
-
-        self._set_cookie(host, 'age_verified', '1')
+        self._set_age_cookies(host)
  
          def dl_webpage(platform):
              self._set_cookie(host, 'platform', platform)
@@ -464,10 +462,6 @@ def add_format(format_url, height=None):
                  continue
              add_format(video_url)
  
-        # field_preference is unnecessary here, but kept for code-similarity with youtube-dl
-        self._sort_formats(
-            formats, field_preference=('height', 'width', 'fps', 'format_id'))
-
          model_profile = self._search_json(
              r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
          video_uploader = self._html_search_regex(
@@ -581,6 +575,7 @@ def _real_extract(self, url):
          mobj = self._match_valid_url(url)
          user_id = mobj.group('id')
          videos_url = '%s/videos' % mobj.group('url')
+        self._set_age_cookies(mobj.group('host'))
          page = self._extract_page(url)
          if page:
              videos_url = update_url_query(videos_url, {'page': page})
@@ -609,7 +604,7 @@ def download_page(base_url, num, fallback=False):
                  base_url, item_id, note, query={'page': num})
  
          def is_404(e):
-            return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
+            return isinstance(e.cause, HTTPError) and e.cause.status == 404
  
          base_url = url
          has_page = page is not None
@@ -645,6 +640,7 @@ def _real_extract(self, url):
          item_id = mobj.group('id')
  
          self._login(host)
+        self._set_age_cookies(host)
  
          return self.playlist_result(self._entries(url, host, item_id), item_id)
  
@@ -824,5 +820,6 @@ def _real_extract(self, url):
          item_id = mobj.group('id')
  
          self._login(host)
+        self._set_age_cookies(host)
  
          return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)