[yt-dlp.git] / youtube_dl / extractor / sunporno.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    parse_duration,
    int_or_none,
)


class SunPornoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.sunporno.com/videos/807778/',
        'md5': '6457d3c165fd6de062b99ef6c2ff4c86',
        'info_dict': {
            'id': '807778',
            'ext': 'flv',
            'title': 'md5:0a400058e8105d39e35c35e7c5184164',
            'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
            'duration': 302,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)

        video_url = self._html_search_regex(
            r'videoSource\s*=\s*\'<source\s*src="([^"]*)"', webpage, 'video URL')

        title = self._html_search_regex(r'<title>([^<]*)</title>', webpage, 'title')

        description = self._html_search_regex(
            r'<meta name="description" content="([^"]*)"', webpage, 'description', fatal=False)
        
        thumbnail = self._html_search_regex(
            r'poster="([^"]*)"', webpage, 'thumbnail', fatal=False)

        duration = parse_duration(self._search_regex(
            r'<span>Duration: (\d+:\d+)</span>', webpage, 'duration', fatal=False))

        view_count = int_or_none(self._html_search_regex(
            r'<span class="views">(\d+)</span>', webpage, 'view count', fatal=False))

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
        }
Commit	Line	Data
7eb21356	1	from __future__ import unicode_literals
	2
	3	import re
	4
	5	from .common import InfoExtractor
	6	from ..utils import (
	7	parse_duration,
	8	int_or_none,
	9	)
	10
	11
	12	class SunPornoIE(InfoExtractor):
	13	_VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P<id>\d+)'
	14	_TEST = {
	15	'url': 'http://www.sunporno.com/videos/807778/',
	16	'md5': '6457d3c165fd6de062b99ef6c2ff4c86',
	17	'info_dict': {
	18	'id': '807778',
	19	'ext': 'flv',
	20	'title': 'md5:0a400058e8105d39e35c35e7c5184164',
	21	'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
	22	'duration': 302,
	23	}
	24	}
	25
	26	def _real_extract(self, url):
	27	mobj = re.match(self._VALID_URL, url)
	28	video_id = mobj.group('id')
	29
	30	webpage = self._download_webpage(url, video_id)
	31
	32	video_url = self._html_search_regex(
	33	r'videoSource\s=\s\'<source\ssrc="([^"])"', webpage, 'video URL')
	34
	35	title = self._html_search_regex(r'<title>([^<]*)</title>', webpage, 'title')
	36
	37	description = self._html_search_regex(
	38	r'<meta name="description" content="([^"]*)"', webpage, 'description', fatal=False)
	39
	40	thumbnail = self._html_search_regex(
	41	r'poster="([^"]*)"', webpage, 'thumbnail', fatal=False)
	42
	43	duration = parse_duration(self._search_regex(
	44	r'<span>Duration: (\d+:\d+)</span>', webpage, 'duration', fatal=False))
	45
	46	view_count = int_or_none(self._html_search_regex(
	47	r'<span class="views">(\d+)</span>', webpage, 'view count', fatal=False))
	48
	49	return {
	50	'id': video_id,
	51	'url': video_url,
	52	'title': title,
	53	'description': description,
	54	'thumbnail': thumbnail,
	55	'duration': duration,
	56	'view_count': view_count,
	57	}