]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/rule34video.py
3 from ..utils
import parse_duration
4 from .common
import InfoExtractor
7 class Rule34VideoIE(InfoExtractor
):
8 _VALID_URL
= r
'https?://(?:www\.)?rule34video\.com/videos/(?P<id>\d+)'
11 'url': 'https://rule34video.com/videos/3065157/shot-it-mmd-hmv/',
12 'md5': 'ffccac2c23799dabbd192621ae4d04f3',
16 'title': 'Shot It-(mmd hmv)',
17 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
23 'url': 'https://rule34video.com/videos/3065296/lara-in-trouble-ep-7-wildeerstudio/',
24 'md5': '6bb5169f9f6b38cd70882bf2e64f6b86',
28 'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
29 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
36 def _real_extract(self
, url
):
37 video_id
= self
._match
_id
(url
)
38 webpage
= self
._download
_webpage
(url
, video_id
)
42 for mobj
in re
.finditer(r
'<a[^>]+href="(?P<video_url>[^"]+download=true[^"]+)".*>(?P<ext>[^\s]+) (?P<quality>[^<]+)p</a>', webpage
):
43 url
, ext
, quality
= mobj
.groups()
50 title
= self
._html
_extract
_title
(webpage
)
51 thumbnail
= self
._html
_search
_regex
(r
'preview_url:\s+\'([^
\']+)\'', webpage, 'thumbnail
', default=None)
52 duration = self._html_search_regex(r'"icon-clock"></i
>\s
+<span
>((?
:\d
+:?
)+)', webpage, 'duration
', default=None)
54 self._sort_formats(formats)
60 'thumbnail
': thumbnail,
61 'duration
': parse_duration(duration),