[yt-dlp.git] / yt_dlp / extractor / alphaporno.py

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    parse_duration,
    parse_filesize,
    parse_iso8601,
)


class AlphaPornoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
    _TEST = {
        'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
        'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
        'info_dict': {
            'id': '258807',
            'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
            'ext': 'mp4',
            'title': 'Sensual striptease porn with Samantha Alexandra',
            'thumbnail': r're:https?://.*\.jpg$',
            'timestamp': 1418694611,
            'upload_date': '20141216',
            'duration': 387,
            'filesize_approx': 54120000,
            'tbr': 1145,
            'categories': list,
            'age_limit': 18,
        },
    }

    def _real_extract(self, url):
        display_id = self._match_id(url)

        webpage = self._download_webpage(url, display_id)

        video_id = self._search_regex(
            r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)

        video_url = self._search_regex(
            r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
        ext = self._html_search_meta(
            'encodingFormat', webpage, 'ext', default='.mp4')[1:]

        title = self._search_regex(
            [r'<meta content="([^"]+)" itemprop="description">',
             r'class="title" itemprop="name">([^<]+)<'],
            webpage, 'title')
        thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
        timestamp = parse_iso8601(self._html_search_meta(
            'uploadDate', webpage, 'upload date'))
        duration = parse_duration(self._html_search_meta(
            'duration', webpage, 'duration'))
        filesize_approx = parse_filesize(self._html_search_meta(
            'contentSize', webpage, 'file size'))
        bitrate = int_or_none(self._html_search_meta(
            'bitrate', webpage, 'bitrate'))
        categories = self._html_search_meta(
            'keywords', webpage, 'categories', default='').split(',')

        age_limit = self._rta_search(webpage)

        return {
            'id': video_id,
            'display_id': display_id,
            'url': video_url,
            'ext': ext,
            'title': title,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'duration': duration,
            'filesize_approx': filesize_approx,
            'tbr': bitrate,
            'categories': categories,
            'age_limit': age_limit,
        }
Commit	Line	Data
4cda41ac	1	from .common import InfoExtractor
e82def52	2	from ..utils import (
e897bd82	3	int_or_none,
e82def52 S	4	parse_duration,
e82def52 S	5	parse_filesize,
e897bd82	6	parse_iso8601,
e82def52 S	7	)
e82def52 S	8
4cda41ac	9
4cda41ac	10	class AlphaPornoIE(InfoExtractor):
e82def52	11	_VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
4cda41ac	12	_TEST = {
	13	'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
	14	'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
	15	'info_dict': {
	16	'id': '258807',
e82def52	17	'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
4cda41ac	18	'ext': 'mp4',
e82def52	19	'title': 'Sensual striptease porn with Samantha Alexandra',
ec85ded8	20	'thumbnail': r're:https?://.*\.jpg$',
e82def52 S	21	'timestamp': 1418694611,
	22	'upload_date': '20141216',
	23	'duration': 387,
	24	'filesize_approx': 54120000,
	25	'tbr': 1145,
	26	'categories': list,
4cda41ac	27	'age_limit': 18,
add96eb9	28	},
4cda41ac	29	}
	30
	31	def _real_extract(self, url):
e82def52	32	display_id = self._match_id(url)
4cda41ac	33
e82def52	34	webpage = self._download_webpage(url, display_id)
4cda41ac	35
e82def52 S	36	video_id = self._search_regex(
e82def52 S	37	r"video_id\s:\s'([^']+)'", webpage, 'video id', default=None)
4cda41ac	38
e82def52 S	39	video_url = self._search_regex(
	40	r"video_url\s:\s'([^']+)'", webpage, 'video url')
	41	ext = self._html_search_meta(
	42	'encodingFormat', webpage, 'ext', default='.mp4')[1:]
4cda41ac	43
e82def52 S	44	title = self._search_regex(
	45	[r'<meta content="([^"]+)" itemprop="description">',
	46	r'class="title" itemprop="name">([^<]+)<'],
	47	webpage, 'title')
	48	thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
	49	timestamp = parse_iso8601(self._html_search_meta(
	50	'uploadDate', webpage, 'upload date'))
	51	duration = parse_duration(self._html_search_meta(
	52	'duration', webpage, 'duration'))
	53	filesize_approx = parse_filesize(self._html_search_meta(
	54	'contentSize', webpage, 'file size'))
	55	bitrate = int_or_none(self._html_search_meta(
	56	'bitrate', webpage, 'bitrate'))
	57	categories = self._html_search_meta(
	58	'keywords', webpage, 'categories', default='').split(',')
4cda41ac	59
e82def52	60	age_limit = self._rta_search(webpage)
4cda41ac	61
	62	return {
	63	'id': video_id,
e82def52	64	'display_id': display_id,
4cda41ac	65	'url': video_url,
4cda41ac	66	'ext': ext,
e82def52	67	'title': title,
4cda41ac	68	'thumbnail': thumbnail,
e82def52 S	69	'timestamp': timestamp,
	70	'duration': duration,
	71	'filesize_approx': filesize_approx,
	72	'tbr': bitrate,
4cda41ac	73	'categories': categories,
e82def52	74	'age_limit': age_limit,
4cda41ac	75	}