]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/youporn.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / youporn.py
index 5feb568e72ea1a87f3b891d702a2f9dc4489821a..2f3f2133240d5fae935fc0560a257acd1d4f729b 100644 (file)
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
 import re
 
 from .common import InfoExtractor
@@ -14,6 +12,7 @@
 
 class YouPornIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
     _TESTS = [{
         'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
         'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@@ -67,12 +66,6 @@ class YouPornIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    @staticmethod
-    def _extract_urls(webpage):
-        return re.findall(
-            r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
-            webpage)
-
     def _real_extract(self, url):
         mobj = self._match_valid_url(url)
         video_id = mobj.group('id')
@@ -110,7 +103,6 @@ def _real_extract(self, url):
                 })
             f['height'] = height
             formats.append(f)
-        self._sort_formats(formats)
 
         webpage = self._download_webpage(
             'http://www.youporn.com/watch/%s' % video_id, display_id,
@@ -137,9 +129,10 @@ def _real_extract(self, url):
             r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
             webpage, 'uploader', fatal=False)
         upload_date = unified_strdate(self._html_search_regex(
-            [r'UPLOADED:\s*<span>([^<]+)',
+            (r'UPLOADED:\s*<span>([^<]+)',
              r'Date\s+[Aa]dded:\s*<span>([^<]+)',
-             r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
+             r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
+             r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
             webpage, 'upload date', fatal=False))
 
         age_limit = self._rta_search(webpage)