[ie/crunchyroll] Fix stream extraction (#10005)

[yt-dlp.git] / yt_dlp / extractor / youporn.py
diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py

index 6ee0abcae0d350a538a531df4f0238fdfb102520..6d4e31bf34b6f1a8d03bab03f4eb30c241d48e0f 100644 (file)
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@@ -72,15 +72,15 @@ class YouPornIE(InfoExtractor):
              'id': '16290308',
              'age_limit': 18,
              'categories': [],
-            'description': 'md5:00ea70f642f431c379763c17c2f396bc',
+            'description': str,  # TODO: detect/remove SEO spam description in ytdl backport
              'display_id': 'tinderspecial-trailer1',
              'duration': 298.0,
              'ext': 'mp4',
              'upload_date': '20201123',
              'uploader': 'Ersties',
              'tags': [],
-            'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
-            'timestamp': 1606089600,
+            'thumbnail': r're:https://.+\.jpg',
+            'timestamp': 1606147564,
              'title': 'Tinder In Real Life',
              'view_count': int,
          }
@@ -88,11 +88,17 @@ class YouPornIE(InfoExtractor):
  
      def _real_extract(self, url):
          video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
-        definitions = self._download_json(
-            f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
+        self._set_cookie('.youporn.com', 'age_verified', '1')
+        webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
+        definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions']
  
-        def get_format_data(data, f):
-            return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
+        def get_format_data(data, stream_type):
+            info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
+            if not info_url:
+                return []
+            return traverse_obj(
+                self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
+                lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
  
          formats = []
          # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@@ -123,10 +129,6 @@ def get_format_data(data, f):
              f['height'] = height
              formats.append(f)
  
-        webpage = self._download_webpage(
-            'http://www.youporn.com/watch/%s' % video_id, display_id,
-            headers={'Cookie': 'age_verified=1'})
-
          title = self._html_search_regex(
              r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
              webpage, 'title', default=None) or self._og_search_title(