[yt-dlp.git] / youtube_dl / extractor / spankbang.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor


class SpankBangIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
    _TEST = {
        'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
        'md5': '1cc433e1d6aa14bc376535b8679302f7',
        'info_dict': {
            'id': '3vvn',
            'ext': 'mp4',
            'title': 'fantasy solo',
            'description': 'dillion harper masturbates on a bed',
            'thumbnail': 're:^https?://.*\.jpg$',
            'uploader': 'silly2587',
            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        stream_key = self._html_search_regex(
            r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
            webpage, 'stream key')

        formats = [{
            'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
            'ext': 'mp4',
            'format_id': '%sp' % height,
            'height': int(height),
        } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
        self._sort_formats(formats)

        title = self._html_search_regex(
            r'(?s)<h1>(.+?)</h1>', webpage, 'title')
        description = self._search_regex(
            r'class="desc"[^>]*>([^<]+)',
            webpage, 'description', default=None)
        thumbnail = self._og_search_thumbnail(webpage)
        uploader = self._search_regex(
            r'class="user"[^>]*>([^<]+)',
            webpage, 'uploader', fatal=False)

        age_limit = self._rta_search(webpage)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'formats': formats,
            'age_limit': age_limit,
        }
Commit	Line	Data
64102296	1	from __future__ import unicode_literals
64102296	2
64102296	3	import re
64102296	4
d97aae75 S	5	from .common import InfoExtractor
	6
	7
64102296	8	class SpankBangIE(InfoExtractor):
d97aae75	9	_VALID_URL = r'https?://(?:(?:www\|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
5c1d459a	10	_TEST = {
d97aae75 S	11	'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
	12	'md5': '1cc433e1d6aa14bc376535b8679302f7',
	13	'info_dict': {
	14	'id': '3vvn',
	15	'ext': 'mp4',
	16	'title': 'fantasy solo',
	17	'description': 'dillion harper masturbates on a bed',
	18	'thumbnail': 're:^https?://.*\.jpg$',
	19	'uploader': 'silly2587',
	20	'age_limit': 18,
5c1d459a	21	}
5c1d459a	22	}
64102296	23
	24	def _real_extract(self, url):
	25	video_id = self._match_id(url)
	26	webpage = self._download_webpage(url, video_id)
	27
d97aae75 S	28	stream_key = self._html_search_regex(
	29	r'''var\s+stream_key\s=\s['"](.+?)['"]''',
	30	webpage, 'stream key')
	31
	32	formats = [{
	33	'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
	34	'ext': 'mp4',
	35	'format_id': '%sp' % height,
	36	'height': int(height),
	37	} for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
	38	self._sort_formats(formats)
	39
	40	title = self._html_search_regex(
	41	r'(?s)<h1>(.+?)</h1>', webpage, 'title')
	42	description = self._search_regex(
	43	r'class="desc"[^>]*>([^<]+)',
	44	webpage, 'description', default=None)
	45	thumbnail = self._og_search_thumbnail(webpage)
	46	uploader = self._search_regex(
	47	r'class="user"[^>]*>([^<]+)',
	48	webpage, 'uploader', fatal=False)
	49
	50	age_limit = self._rta_search(webpage)
64102296	51
64102296	52	return {
d97aae75 S	53	'id': video_id,
	54	'title': title,
	55	'description': description,
	56	'thumbnail': thumbnail,
	57	'uploader': uploader,
	58	'formats': formats,
	59	'age_limit': age_limit,
64102296	60	}