]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/xxxymovies.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / xxxymovies.py
CommitLineData
0cc4f8e3 1from .common import InfoExtractor
2from ..utils import (
3 parse_duration,
4 int_or_none,
5)
6
7
8class XXXYMoviesIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?xxxymovies\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)'
10 _TEST = {
11 'url': 'http://xxxymovies.com/videos/138669/ecstatic-orgasm-sofcore/',
12 'md5': '810b1bdbbffff89dd13bdb369fe7be4b',
13 'info_dict': {
14 'id': '138669',
15 'display_id': 'ecstatic-orgasm-sofcore',
16 'ext': 'mp4',
17 'title': 'Ecstatic Orgasm Sofcore',
18 'duration': 931,
6343a5f6
S
19 'categories': list,
20 'view_count': int,
21 'like_count': int,
22 'dislike_count': int,
0cc4f8e3 23 'age_limit': 18,
24 }
25 }
26
27 def _real_extract(self, url):
5ad28e7f 28 mobj = self._match_valid_url(url)
0cc4f8e3 29 video_id = mobj.group('id')
30 display_id = mobj.group('display_id')
31
6343a5f6 32 webpage = self._download_webpage(url, display_id)
0cc4f8e3 33
6343a5f6 34 video_url = self._search_regex(
0cc4f8e3 35 r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
36
37 title = self._html_search_regex(
baba5f4d
S
38 [r'<div[^>]+\bclass="block_header"[^>]*>\s*<h1>([^<]+)<',
39 r'<title>(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)</title>'],
6343a5f6 40 webpage, 'title')
0cc4f8e3 41
6343a5f6
S
42 thumbnail = self._search_regex(
43 r"preview_url\s*:\s*'([^']+)'",
44 webpage, 'thumbnail', fatal=False)
0cc4f8e3 45
46 categories = self._html_search_meta(
47 'keywords', webpage, 'categories', default='').split(',')
48
49 duration = parse_duration(self._search_regex(
6343a5f6
S
50 r'<span>Duration:</span>\s*(\d+:\d+)',
51 webpage, 'duration', fatal=False))
0cc4f8e3 52
53 view_count = int_or_none(self._html_search_regex(
6343a5f6
S
54 r'<div class="video_views">\s*(\d+)',
55 webpage, 'view count', fatal=False))
56 like_count = int_or_none(self._search_regex(
57 r'>\s*Likes? <b>\((\d+)\)',
58 webpage, 'like count', fatal=False))
59 dislike_count = int_or_none(self._search_regex(
60 r'>\s*Dislike <b>\((\d+)\)</b>',
61 webpage, 'dislike count', fatal=False))
62
63 age_limit = self._rta_search(webpage)
0cc4f8e3 64
65 return {
66 'id': video_id,
67 'display_id': display_id,
68 'url': video_url,
69 'title': title,
70 'thumbnail': thumbnail,
71 'categories': categories,
72 'duration': duration,
73 'view_count': view_count,
6343a5f6
S
74 'like_count': like_count,
75 'dislike_count': dislike_count,
76 'age_limit': age_limit,
0cc4f8e3 77 }