]> jfr.im git - yt-dlp.git/commitdiff
[ie/rule34video] Extract tags (#7117)
authorsoundchaser128 <redacted>
Sun, 17 Sep 2023 15:09:42 +0000 (17:09 +0200)
committerGitHub <redacted>
Sun, 17 Sep 2023 15:09:42 +0000 (15:09 +0000)
Authored by: soundchaser128

yt_dlp/extractor/rule34video.py

index 9d15f4d214d29f96fe8188a7b123c61a381f8f3e..f3250b557a2561baeeb3a274438ba48f800715e7 100644 (file)
@@ -1,6 +1,6 @@
 import re
 
-from ..utils import parse_duration
+from ..utils import parse_duration, unescapeHTML
 from .common import InfoExtractor
 
 
@@ -16,7 +16,8 @@ class Rule34VideoIE(InfoExtractor):
                 'title': 'Shot It-(mmd hmv)',
                 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
                 'duration': 347.0,
-                'age_limit': 18
+                'age_limit': 18,
+                'tags': 'count:14'
             }
         },
         {
@@ -28,7 +29,8 @@ class Rule34VideoIE(InfoExtractor):
                 'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
                 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
                 'duration': 938.0,
-                'age_limit': 18
+                'age_limit': 18,
+                'tags': 'count:50'
             }
         },
     ]
@@ -57,5 +59,7 @@ def _real_extract(self, url):
             'title': title,
             'thumbnail': thumbnail,
             'duration': parse_duration(duration),
-            'age_limit': 18
+            'age_limit': 18,
+            'tags': list(map(unescapeHTML, re.findall(
+                r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))),
         }