[yt-dlp.git] / yt_dlp / extractor / sunporno.py

import re

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    int_or_none,
    parse_duration,
    qualities,
)


class SunPornoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.sunporno.com/videos/807778/',
        'md5': '507887e29033502f29dba69affeebfc9',
        'info_dict': {
            'id': '807778',
            'ext': 'mp4',
            'title': 'md5:0a400058e8105d39e35c35e7c5184164',
            'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 302,
            'age_limit': 18,
        }
    }, {
        'url': 'http://embeds.sunporno.com/embed/807778',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'http://www.sunporno.com/videos/%s' % video_id, video_id)

        title = self._html_extract_title(webpage)
        description = self._html_search_meta(
            'description', webpage, 'description')
        thumbnail = self._html_search_regex(
            r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)

        duration = parse_duration(self._search_regex(
            (r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<',
             r'>Duration:\s*<span[^>]+>\s*(\d+:\d+)\s*<'),
            webpage, 'duration', fatal=False))

        view_count = int_or_none(self._html_search_regex(
            r'class="views">(?:<noscript>)?\s*(\d+)\s*<',
            webpage, 'view count', fatal=False))
        comment_count = int_or_none(self._html_search_regex(
            r'(\d+)</b> Comments?',
            webpage, 'comment count', fatal=False, default=None))

        formats = []
        quality = qualities(['mp4', 'flv'])
        for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
            video_ext = determine_ext(video_url)
            formats.append({
                'url': video_url,
                'format_id': video_ext,
                'quality': quality(video_ext),
            })

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
            'comment_count': comment_count,
            'formats': formats,
            'age_limit': 18,
        }
Commit	Line	Data
7eb21356	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
e897bd82	5	determine_ext,
7eb21356	6	int_or_none,
e897bd82	7	parse_duration,
ae7246e7	8	qualities,
7eb21356	9	)
	10
	11
	12	class SunPornoIE(InfoExtractor):
b69b2ff7 S	13	_VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos\|embeds\.sunporno\.com/embed)/(?P<id>\d+)'
b69b2ff7 S	14	_TESTS = [{
7eb21356	15	'url': 'http://www.sunporno.com/videos/807778/',
794e5dcd	16	'md5': '507887e29033502f29dba69affeebfc9',
7eb21356	17	'info_dict': {
7eb21356	18	'id': '807778',
794e5dcd	19	'ext': 'mp4',
7eb21356	20	'title': 'md5:0a400058e8105d39e35c35e7c5184164',
7eb21356	21	'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
ec85ded8	22	'thumbnail': r're:^https?://.*\.jpg$',
7eb21356	23	'duration': 302,
9b330db7	24	'age_limit': 18,
7eb21356	25	}
b69b2ff7 S	26	}, {
	27	'url': 'http://embeds.sunporno.com/embed/807778',
	28	'only_matching': True,
	29	}]
7eb21356	30
7eb21356	31	def _real_extract(self, url):
31424c12	32	video_id = self._match_id(url)
7eb21356	33
b69b2ff7 S	34	webpage = self._download_webpage(
b69b2ff7 S	35	'http://www.sunporno.com/videos/%s' % video_id, video_id)
7eb21356	36
04f3fd2c	37	title = self._html_extract_title(webpage)
31424c12 S	38	description = self._html_search_meta(
31424c12 S	39	'description', webpage, 'description')
7eb21356	40	thumbnail = self._html_search_regex(
ae7246e7	41	r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
7eb21356	42
7eb21356	43	duration = parse_duration(self._search_regex(
794e5dcd S	44	(r'itemprop="duration"[^>]>\s(\d+:\d+)\s*<',
794e5dcd S	45	r'>Duration:\s<span[^>]+>\s(\d+:\d+)\s*<'),
31424c12	46	webpage, 'duration', fatal=False))
7eb21356	47
7eb21356	48	view_count = int_or_none(self._html_search_regex(
05aa9c82	49	r'class="views">(?:<noscript>)?\s(\d+)\s<',
31424c12	50	webpage, 'view count', fatal=False))
ae7246e7	51	comment_count = int_or_none(self._html_search_regex(
31424c12	52	r'(\d+)</b> Comments?',
794e5dcd	53	webpage, 'comment count', fatal=False, default=None))
ae7246e7 S	54
	55	formats = []
	56	quality = qualities(['mp4', 'flv'])
68f2d273	57	for video_url in re.findall(r'<(?:source\|video) src="([^"]+)"', webpage):
ae7246e7 S	58	video_ext = determine_ext(video_url)
	59	formats.append({
	60	'url': video_url,
	61	'format_id': video_ext,
	62	'quality': quality(video_ext),
	63	})
7eb21356	64
	65	return {
	66	'id': video_id,
7eb21356	67	'title': title,
	68	'description': description,
	69	'thumbnail': thumbnail,
	70	'duration': duration,
	71	'view_count': view_count,
ae7246e7 S	72	'comment_count': comment_count,
ae7246e7 S	73	'formats': formats,
9b330db7	74	'age_limit': 18,
7eb21356	75	}