]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/xxxymovies.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / xxxymovies.py
1 from .common import InfoExtractor
2 from ..utils import (
3 int_or_none,
4 parse_duration,
5 )
6
7
8 class XXXYMoviesIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?xxxymovies\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)'
10 _TEST = {
11 'url': 'http://xxxymovies.com/videos/138669/ecstatic-orgasm-sofcore/',
12 'md5': '810b1bdbbffff89dd13bdb369fe7be4b',
13 'info_dict': {
14 'id': '138669',
15 'display_id': 'ecstatic-orgasm-sofcore',
16 'ext': 'mp4',
17 'title': 'Ecstatic Orgasm Sofcore',
18 'duration': 931,
19 'categories': list,
20 'view_count': int,
21 'like_count': int,
22 'dislike_count': int,
23 'age_limit': 18,
24 }
25 }
26
27 def _real_extract(self, url):
28 mobj = self._match_valid_url(url)
29 video_id = mobj.group('id')
30 display_id = mobj.group('display_id')
31
32 webpage = self._download_webpage(url, display_id)
33
34 video_url = self._search_regex(
35 r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
36
37 title = self._html_search_regex(
38 [r'<div[^>]+\bclass="block_header"[^>]*>\s*<h1>([^<]+)<',
39 r'<title>(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)</title>'],
40 webpage, 'title')
41
42 thumbnail = self._search_regex(
43 r"preview_url\s*:\s*'([^']+)'",
44 webpage, 'thumbnail', fatal=False)
45
46 categories = self._html_search_meta(
47 'keywords', webpage, 'categories', default='').split(',')
48
49 duration = parse_duration(self._search_regex(
50 r'<span>Duration:</span>\s*(\d+:\d+)',
51 webpage, 'duration', fatal=False))
52
53 view_count = int_or_none(self._html_search_regex(
54 r'<div class="video_views">\s*(\d+)',
55 webpage, 'view count', fatal=False))
56 like_count = int_or_none(self._search_regex(
57 r'>\s*Likes? <b>\((\d+)\)',
58 webpage, 'like count', fatal=False))
59 dislike_count = int_or_none(self._search_regex(
60 r'>\s*Dislike <b>\((\d+)\)</b>',
61 webpage, 'dislike count', fatal=False))
62
63 age_limit = self._rta_search(webpage)
64
65 return {
66 'id': video_id,
67 'display_id': display_id,
68 'url': video_url,
69 'title': title,
70 'thumbnail': thumbnail,
71 'categories': categories,
72 'duration': duration,
73 'view_count': view_count,
74 'like_count': like_count,
75 'dislike_count': dislike_count,
76 'age_limit': age_limit,
77 }