[yt-dlp.git] / yt_dlp / extractor / pornez.py

from .common import InfoExtractor
from ..utils import int_or_none, urljoin


class PornezIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?pornez\.net/video(?P<id>[0-9]+)/'
    _TEST = {
        'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/',
        'md5': '2e19a0a1cff3a5dbea0ef1b9e80bcbbc',
        'info_dict': {
            'id': '344819',
            'ext': 'mp4',
            'title': r'mistresst funny_penis_names wmv',
            'thumbnail': r're:^https?://.*\.jpg$',
            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        iframe_src = self._html_search_regex(
            r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe', fatal=True)
        iframe_src = urljoin('https://pornez.net', iframe_src)
        title = self._html_search_meta(['name', 'twitter:title', 'og:title'], webpage, 'title', default=None)
        if title is None:
            title = self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title', fatal=True)
        thumbnail = self._html_search_meta(['thumbnailUrl'], webpage, 'title', default=None)
        webpage = self._download_webpage(iframe_src, video_id)
        entries = self._parse_html5_media_entries(iframe_src, webpage, video_id)[0]
        for format in entries['formats']:
            height = self._search_regex(r'_(\d+)\.m3u8', format['url'], 'height')
            format['format_id'] = '%sp' % height
            format['height'] = int_or_none(height)

        entries.update({
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'age_limit': 18
        })
        return entries
Commit	Line	Data
768145d4	1	from .common import InfoExtractor
f7efe6dc	2	from ..utils import int_or_none, urljoin
768145d4 ML	3
	4
	5	class PornezIE(InfoExtractor):
	6	_VALID_URL = r'https?://(?:www\.)?pornez\.net/video(?P<id>[0-9]+)/'
	7	_TEST = {
	8	'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/',
	9	'md5': '2e19a0a1cff3a5dbea0ef1b9e80bcbbc',
	10	'info_dict': {
	11	'id': '344819',
	12	'ext': 'mp4',
	13	'title': r'mistresst funny_penis_names wmv',
	14	'thumbnail': r're:^https?://.*\.jpg$',
	15	'age_limit': 18,
	16	}
	17	}
	18
	19	def _real_extract(self, url):
	20	video_id = self._match_id(url)
	21	webpage = self._download_webpage(url, video_id)
	22	iframe_src = self._html_search_regex(
f7efe6dc J	23	r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe', fatal=True)
f7efe6dc J	24	iframe_src = urljoin('https://pornez.net', iframe_src)
768145d4 ML	25	title = self._html_search_meta(['name', 'twitter:title', 'og:title'], webpage, 'title', default=None)
	26	if title is None:
	27	title = self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title', fatal=True)
	28	thumbnail = self._html_search_meta(['thumbnailUrl'], webpage, 'title', default=None)
	29	webpage = self._download_webpage(iframe_src, video_id)
	30	entries = self._parse_html5_media_entries(iframe_src, webpage, video_id)[0]
	31	for format in entries['formats']:
	32	height = self._search_regex(r'_(\d+)\.m3u8', format['url'], 'height')
	33	format['format_id'] = '%sp' % height
	34	format['height'] = int_or_none(height)
	35
	36	entries.update({
	37	'id': video_id,
	38	'title': title,
	39	'thumbnail': thumbnail,
	40	'age_limit': 18
	41	})
	42	return entries