Update to ytdl-2021.01.16

[yt-dlp.git] / youtube_dlc / extractor / youporn.py
diff --git a/youtube_dlc/extractor/youporn.py b/youtube_dlc/extractor/youporn.py

index 7b9feafeb2a6beb75a8f7e9424fd74af0b227c3f..534270bac3d461bfca5bf0035e6435829413811d 100644 (file)
--- a/youtube_dlc/extractor/youporn.py
+++ b/youtube_dlc/extractor/youporn.py
@@ -60,6 +60,9 @@ class YouPornIE(InfoExtractor):
      }, {
          'url': 'http://www.youporn.com/watch/505835',
          'only_matching': True,
+    }, {
+        'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
+        'only_matching': True,
      }]
  
      @staticmethod
@@ -88,7 +91,7 @@ def _real_extract(self, url):
          # Main source
          definitions = self._parse_json(
              self._search_regex(
-                r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
+                r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
                  'media definitions', default='[]'),
              video_id, fatal=False)
          if definitions:
@@ -100,7 +103,7 @@ def _real_extract(self, url):
                      links.append(video_url)
  
          # Fallback #1, this also contains extra low quality 180p format
-        for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
+        for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
              links.append(link)
  
          # Fallback #2 (unavailable as at 22.06.2017)
@@ -128,8 +131,9 @@ def _real_extract(self, url):
              # Video URL's path looks like this:
              #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
              #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+            #  /videos/201703/11/109285532/1080P_4000K_109285532.mp4
              # We will benefit from it by extracting some metadata
-            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
              if mobj:
                  height = int(mobj.group('height'))
                  bitrate = int(mobj.group('bitrate'))