]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/alphaporno.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / alphaporno.py
CommitLineData
4cda41ac 1from .common import InfoExtractor
e82def52 2from ..utils import (
e897bd82 3 int_or_none,
e82def52
S
4 parse_duration,
5 parse_filesize,
e897bd82 6 parse_iso8601,
e82def52
S
7)
8
4cda41ac 9
10class AlphaPornoIE(InfoExtractor):
e82def52 11 _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
4cda41ac 12 _TEST = {
13 'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
14 'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
15 'info_dict': {
16 'id': '258807',
e82def52 17 'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
4cda41ac 18 'ext': 'mp4',
e82def52 19 'title': 'Sensual striptease porn with Samantha Alexandra',
ec85ded8 20 'thumbnail': r're:https?://.*\.jpg$',
e82def52
S
21 'timestamp': 1418694611,
22 'upload_date': '20141216',
23 'duration': 387,
24 'filesize_approx': 54120000,
25 'tbr': 1145,
26 'categories': list,
4cda41ac 27 'age_limit': 18,
add96eb9 28 },
4cda41ac 29 }
30
31 def _real_extract(self, url):
e82def52 32 display_id = self._match_id(url)
4cda41ac 33
e82def52 34 webpage = self._download_webpage(url, display_id)
4cda41ac 35
e82def52
S
36 video_id = self._search_regex(
37 r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
4cda41ac 38
e82def52
S
39 video_url = self._search_regex(
40 r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
41 ext = self._html_search_meta(
42 'encodingFormat', webpage, 'ext', default='.mp4')[1:]
4cda41ac 43
e82def52
S
44 title = self._search_regex(
45 [r'<meta content="([^"]+)" itemprop="description">',
46 r'class="title" itemprop="name">([^<]+)<'],
47 webpage, 'title')
48 thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
49 timestamp = parse_iso8601(self._html_search_meta(
50 'uploadDate', webpage, 'upload date'))
51 duration = parse_duration(self._html_search_meta(
52 'duration', webpage, 'duration'))
53 filesize_approx = parse_filesize(self._html_search_meta(
54 'contentSize', webpage, 'file size'))
55 bitrate = int_or_none(self._html_search_meta(
56 'bitrate', webpage, 'bitrate'))
57 categories = self._html_search_meta(
58 'keywords', webpage, 'categories', default='').split(',')
4cda41ac 59
e82def52 60 age_limit = self._rta_search(webpage)
4cda41ac 61
62 return {
63 'id': video_id,
e82def52 64 'display_id': display_id,
4cda41ac 65 'url': video_url,
4cda41ac 66 'ext': ext,
e82def52 67 'title': title,
4cda41ac 68 'thumbnail': thumbnail,
e82def52
S
69 'timestamp': timestamp,
70 'duration': duration,
71 'filesize_approx': filesize_approx,
72 'tbr': bitrate,
4cda41ac 73 'categories': categories,
e82def52 74 'age_limit': age_limit,
4cda41ac 75 }