[yt-dlp.git] / youtube_dl / extractor / pornhub.py

# coding: utf-8
from __future__ import unicode_literals

import functools
import itertools
import operator
import re

from .common import InfoExtractor
from ..compat import (
    compat_HTTPError,
    compat_str,
)
from ..utils import (
    ExtractorError,
    int_or_none,
    js_to_json,
    orderedSet,
    remove_quotes,
    str_to_int,
)


class PornHubIE(InfoExtractor):
    IE_DESC = 'PornHub and Thumbzilla'
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
                            (?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
                            (?:www\.)?thumbzilla\.com/video/
                        )
                        (?P<id>[\da-z]+)
                    '''
    _TESTS = [{
        'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
        'md5': '1e19b41231a02eba417839222ac9d58e',
        'info_dict': {
            'id': '648719015',
            'ext': 'mp4',
            'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
            'uploader': 'Babes',
            'duration': 361,
            'view_count': int,
            'like_count': int,
            'dislike_count': int,
            'comment_count': int,
            'age_limit': 18,
            'tags': list,
            'categories': list,
        },
    }, {
        # non-ASCII title
        'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
        'info_dict': {
            'id': '1331683002',
            'ext': 'mp4',
            'title': '重庆婷婷女王足交',
            'uploader': 'Unknown',
            'duration': 1753,
            'view_count': int,
            'like_count': int,
            'dislike_count': int,
            'comment_count': int,
            'age_limit': 18,
            'tags': list,
            'categories': list,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
        'only_matching': True,
    }, {
        # removed at the request of cam4.com
        'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
        'only_matching': True,
    }, {
        # removed at the request of the copyright owner
        'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
        'only_matching': True,
    }, {
        # removed by uploader
        'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
        'only_matching': True,
    }, {
        # private video
        'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
        'only_matching': True,
    }, {
        'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
        'only_matching': True,
    }, {
        'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
            webpage)

    def _extract_count(self, pattern, webpage, name):
        return str_to_int(self._search_regex(
            pattern, webpage, '%s count' % name, fatal=False))

    def _real_extract(self, url):
        video_id = self._match_id(url)

        self._set_cookie('pornhub.com', 'age_verified', '1')

        def dl_webpage(platform):
            self._set_cookie('pornhub.com', 'platform', platform)
            return self._download_webpage(
                'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
                video_id, 'Downloading %s webpage' % platform)

        webpage = dl_webpage('pc')

        error_msg = self._html_search_regex(
            r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
            webpage, 'error message', default=None, group='error')
        if error_msg:
            error_msg = re.sub(r'\s+', ' ', error_msg)
            raise ExtractorError(
                'PornHub said: %s' % error_msg,
                expected=True, video_id=video_id)

        # video_title from flashvars contains whitespace instead of non-ASCII (see
        # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
        # on that anymore.
        title = self._html_search_meta(
            'twitter:title', webpage, default=None) or self._search_regex(
            (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
             r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
             r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
            webpage, 'title', group='title')

        video_urls = []
        video_urls_set = set()

        flashvars = self._parse_json(
            self._search_regex(
                r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
            video_id)
        if flashvars:
            thumbnail = flashvars.get('image_url')
            duration = int_or_none(flashvars.get('video_duration'))
            media_definitions = flashvars.get('mediaDefinitions')
            if isinstance(media_definitions, list):
                for definition in media_definitions:
                    if not isinstance(definition, dict):
                        continue
                    video_url = definition.get('videoUrl')
                    if not video_url or not isinstance(video_url, compat_str):
                        continue
                    if video_url in video_urls_set:
                        continue
                    video_urls_set.add(video_url)
                    video_urls.append(
                        (video_url, int_or_none(definition.get('quality'))))
        else:
            thumbnail, duration = [None] * 2

        if not video_urls:
            tv_webpage = dl_webpage('tv')

            assignments = self._search_regex(
                r'(var.+?mediastring.+?)</script>', tv_webpage,
                'encoded url').split(';')

            js_vars = {}

            def parse_js_value(inp):
                inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
                if '+' in inp:
                    inps = inp.split('+')
                    return functools.reduce(
                        operator.concat, map(parse_js_value, inps))
                inp = inp.strip()
                if inp in js_vars:
                    return js_vars[inp]
                return remove_quotes(inp)

            for assn in assignments:
                assn = assn.strip()
                if not assn:
                    continue
                assn = re.sub(r'var\s+', '', assn)
                vname, value = assn.split('=', 1)
                js_vars[vname] = parse_js_value(value)

            video_url = js_vars['mediastring']
            if video_url not in video_urls_set:
                video_urls.append((video_url, None))
                video_urls_set.add(video_url)

        for mobj in re.finditer(
                r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
                webpage):
            video_url = mobj.group('url')
            if video_url not in video_urls_set:
                video_urls.append((video_url, None))
                video_urls_set.add(video_url)

        formats = []
        for video_url, height in video_urls:
            tbr = None
            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
            if mobj:
                if not height:
                    height = int(mobj.group('height'))
                tbr = int(mobj.group('tbr'))
            formats.append({
                'url': video_url,
                'format_id': '%dp' % height if height else None,
                'height': height,
                'tbr': tbr,
            })
        self._sort_formats(formats)

        video_uploader = self._html_search_regex(
            r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
            webpage, 'uploader', fatal=False)

        view_count = self._extract_count(
            r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
        like_count = self._extract_count(
            r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
        dislike_count = self._extract_count(
            r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
        comment_count = self._extract_count(
            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')

        page_params = self._parse_json(self._search_regex(
            r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
            webpage, 'page parameters', group='data', default='{}'),
            video_id, transform_source=js_to_json, fatal=False)
        tags = categories = None
        if page_params:
            tags = page_params.get('tags', '').split(',')
            categories = page_params.get('categories', '').split(',')

        return {
            'id': video_id,
            'uploader': video_uploader,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
            'like_count': like_count,
            'dislike_count': dislike_count,
            'comment_count': comment_count,
            'formats': formats,
            'age_limit': 18,
            'tags': tags,
            'categories': categories,
        }


class PornHubPlaylistBaseIE(InfoExtractor):
    def _extract_entries(self, webpage):
        # Only process container div with main playlist content skipping
        # drop-down menu that uses similar pattern for videos (see
        # https://github.com/rg3/youtube-dl/issues/11594).
        container = self._search_regex(
            r'(?s)(<div[^>]+class=["\']container.+)', webpage,
            'container', default=webpage)

        return [
            self.url_result(
                'http://www.pornhub.com/%s' % video_url,
                PornHubIE.ie_key(), video_title=title)
            for video_url, title in orderedSet(re.findall(
                r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
                container))
        ]

    def _real_extract(self, url):
        playlist_id = self._match_id(url)

        webpage = self._download_webpage(url, playlist_id)

        entries = self._extract_entries(webpage)

        playlist = self._parse_json(
            self._search_regex(
                r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
                'playlist', default='{}'),
            playlist_id, fatal=False)
        title = playlist.get('title') or self._search_regex(
            r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)

        return self.playlist_result(
            entries, playlist_id, title, playlist.get('description'))


class PornHubPlaylistIE(PornHubPlaylistBaseIE):
    _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.pornhub.com/playlist/4667351',
        'info_dict': {
            'id': '4667351',
            'title': 'Nataly Hot',
        },
        'playlist_mincount': 2,
    }, {
        'url': 'https://de.pornhub.com/playlist/4667351',
        'only_matching': True,
    }]


class PornHubUserVideosIE(PornHubPlaylistBaseIE):
    _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
    _TESTS = [{
        'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
        'info_dict': {
            'id': 'zoe_ph',
        },
        'playlist_mincount': 171,
    }, {
        'url': 'http://www.pornhub.com/users/rushandlia/videos',
        'only_matching': True,
    }, {
        # default sorting as Top Rated Videos
        'url': 'https://www.pornhub.com/channels/povd/videos',
        'info_dict': {
            'id': 'povd',
        },
        'playlist_mincount': 293,
    }, {
        # Top Rated Videos
        'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
        'only_matching': True,
    }, {
        # Most Recent Videos
        'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
        'only_matching': True,
    }, {
        # Most Viewed Videos
        'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
        'only_matching': True,
    }, {
        'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        user_id = self._match_id(url)

        entries = []
        for page_num in itertools.count(1):
            try:
                webpage = self._download_webpage(
                    url, user_id, 'Downloading page %d' % page_num,
                    query={'page': page_num})
            except ExtractorError as e:
                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
                    break
                raise
            page_entries = self._extract_entries(webpage)
            if not page_entries:
                break
            entries.extend(page_entries)

        return self.playlist_result(entries, user_id)
Commit	Line	Data
6c376029	1	# coding: utf-8
9933b574 PH	2	from __future__ import unicode_literals
9933b574 PH	3
21fbf0f9	4	import functools
34541395	5	import itertools
21fbf0f9	6	import operator
125cfd78	7	import re
	8
	9	from .common import InfoExtractor
1cc79574	10	from ..compat import (
34541395	11	compat_HTTPError,
79367a98	12	compat_str,
1cc79574 PH	13	)
1cc79574 PH	14	from ..utils import (
50789175	15	ExtractorError,
ed8648a3	16	int_or_none,
6bb05b32	17	js_to_json,
8f9a477e	18	orderedSet,
e1e35d1a	19	remove_quotes,
0320ddc1	20	str_to_int,
125cfd78	21	)
125cfd78	22
9933b574	23
125cfd78	24	class PornHubIE(InfoExtractor):
bc4b2d75 S	25	IE_DESC = 'PornHub and Thumbzilla'
	26	_VALID_URL = r'''(?x)
	27	https?://
	28	(?:
92ded33a	29	(?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php\|video/show)\?viewkey=\|embed/)\|
bc4b2d75 S	30	(?:www\.)?thumbzilla\.com/video/
bc4b2d75 S	31	)
b52c9ef1	32	(?P<id>[\da-z]+)
bc4b2d75	33	'''
360075e2	34	_TESTS = [{
9933b574	35	'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
ed8648a3	36	'md5': '1e19b41231a02eba417839222ac9d58e',
9933b574	37	'info_dict': {
249efaf4 PH	38	'id': '648719015',
249efaf4 PH	39	'ext': 'mp4',
611c1dd9	40	'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
ed8648a3 S	41	'uploader': 'Babes',
	42	'duration': 361,
	43	'view_count': int,
	44	'like_count': int,
	45	'dislike_count': int,
	46	'comment_count': int,
	47	'age_limit': 18,
6bb05b32 YCH	48	'tags': list,
6bb05b32 YCH	49	'categories': list,
6c376029 S	50	},
	51	}, {
	52	# non-ASCII title
	53	'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
	54	'info_dict': {
	55	'id': '1331683002',
	56	'ext': 'mp4',
	57	'title': '重庆婷婷女王足交',
79367a98	58	'uploader': 'Unknown',
6c376029 S	59	'duration': 1753,
	60	'view_count': int,
	61	'like_count': int,
	62	'dislike_count': int,
	63	'comment_count': int,
	64	'age_limit': 18,
6bb05b32 YCH	65	'tags': list,
6bb05b32 YCH	66	'categories': list,
6c376029 S	67	},
	68	'params': {
	69	'skip_download': True,
	70	},
360075e2 S	71	}, {
	72	'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
	73	'only_matching': True,
272e4db5	74	}, {
eaaaaec0	75	# removed at the request of cam4.com
272e4db5 S	76	'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
272e4db5 S	77	'only_matching': True,
eaaaaec0 S	78	}, {
	79	# removed at the request of the copyright owner
	80	'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
	81	'only_matching': True,
	82	}, {
	83	# removed by uploader
	84	'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
	85	'only_matching': True,
195f0845 S	86	}, {
	87	# private video
	88	'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
	89	'only_matching': True,
bc4b2d75 S	90	}, {
	91	'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
	92	'only_matching': True,
a99cc4ca S	93	}, {
	94	'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
	95	'only_matching': True,
360075e2	96	}]
125cfd78	97
b52c9ef1 S	98	@staticmethod
	99	def _extract_urls(webpage):
	100	return re.findall(
	101	r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
	102	webpage)
65d161c4	103
0320ddc1	104	def _extract_count(self, pattern, webpage, name):
7700207e S	105	return str_to_int(self._search_regex(
7700207e S	106	pattern, webpage, '%s count' % name, fatal=False))
0320ddc1	107
125cfd78	108	def _real_extract(self, url):
249efaf4	109	video_id = self._match_id(url)
7399ca1f	110
a7298f3e	111	self._set_cookie('pornhub.com', 'age_verified', '1')
125cfd78	112
9a372f14	113	def dl_webpage(platform):
a7298f3e	114	self._set_cookie('pornhub.com', 'platform', platform)
9a372f14 S	115	return self._download_webpage(
9a372f14 S	116	'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
79367a98	117	video_id, 'Downloading %s webpage' % platform)
9a372f14 S	118
9a372f14 S	119	webpage = dl_webpage('pc')
125cfd78	120
50789175	121	error_msg = self._html_search_regex(
add7d2a0	122	r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)\b(?:removed\|userMessageSection)\b(?:(?!\1).)\1[^>]*>(?P<error>.+?)</div>',
3cb3b600	123	webpage, 'error message', default=None, group='error')
50789175 PH	124	if error_msg:
	125	error_msg = re.sub(r'\s+', ' ', error_msg)
	126	raise ExtractorError(
	127	'PornHub said: %s' % error_msg,
	128	expected=True, video_id=video_id)
	129
6c376029 S	130	# video_title from flashvars contains whitespace instead of non-ASCII (see
	131	# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
	132	# on that anymore.
79367a98	133	title = self._html_search_meta(
6c376029 S	134	'twitter:title', webpage, default=None) or self._search_regex(
	135	(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
	136	r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
	137	r'shareTitle\s=\s(["\'])(?P<title>.+?)\1'),
	138	webpage, 'title', group='title')
	139
79367a98 S	140	video_urls = []
	141	video_urls_set = set()
	142
ed8648a3 S	143	flashvars = self._parse_json(
ed8648a3 S	144	self._search_regex(
03442072	145	r'var\s+flashvars_\d+\s=\s({.+?});', webpage, 'flashvars', default='{}'),
ed8648a3 S	146	video_id)
ed8648a3 S	147	if flashvars:
ed8648a3 S	148	thumbnail = flashvars.get('image_url')
ed8648a3 S	149	duration = int_or_none(flashvars.get('video_duration'))
79367a98 S	150	media_definitions = flashvars.get('mediaDefinitions')
	151	if isinstance(media_definitions, list):
	152	for definition in media_definitions:
	153	if not isinstance(definition, dict):
	154	continue
	155	video_url = definition.get('videoUrl')
	156	if not video_url or not isinstance(video_url, compat_str):
	157	continue
	158	if video_url in video_urls_set:
	159	continue
	160	video_urls_set.add(video_url)
	161	video_urls.append(
	162	(video_url, int_or_none(definition.get('quality'))))
ed8648a3	163	else:
79367a98 S	164	thumbnail, duration = [None] * 2
	165
	166	if not video_urls:
	167	tv_webpage = dl_webpage('tv')
	168
	169	assignments = self._search_regex(
	170	r'(var.+?mediastring.+?)</script>', tv_webpage,
	171	'encoded url').split(';')
	172
	173	js_vars = {}
	174
	175	def parse_js_value(inp):
	176	inp = re.sub(r'/\(?:(?!\/).)?\/', '', inp)
	177	if '+' in inp:
	178	inps = inp.split('+')
	179	return functools.reduce(
	180	operator.concat, map(parse_js_value, inps))
	181	inp = inp.strip()
	182	if inp in js_vars:
	183	return js_vars[inp]
	184	return remove_quotes(inp)
	185
	186	for assn in assignments:
	187	assn = assn.strip()
	188	if not assn:
	189	continue
	190	assn = re.sub(r'var\s+', '', assn)
	191	vname, value = assn.split('=', 1)
	192	js_vars[vname] = parse_js_value(value)
	193
	194	video_url = js_vars['mediastring']
	195	if video_url not in video_urls_set:
	196	video_urls.append((video_url, None))
	197	video_urls_set.add(video_url)
	198
	199	for mobj in re.finditer(
	200	r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
	201	webpage):
	202	video_url = mobj.group('url')
	203	if video_url not in video_urls_set:
	204	video_urls.append((video_url, None))
	205	video_urls_set.add(video_url)
	206
	207	formats = []
	208	for video_url, height in video_urls:
	209	tbr = None
	210	mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
	211	if mobj:
	212	if not height:
	213	height = int(mobj.group('height'))
	214	tbr = int(mobj.group('tbr'))
	215	formats.append({
	216	'url': video_url,
	217	'format_id': '%dp' % height if height else None,
	218	'height': height,
	219	'tbr': tbr,
	220	})
	221	self._sort_formats(formats)
ed8648a3	222
0320ddc1	223	video_uploader = self._html_search_regex(
8d9c2a68	224	r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user\|channel)s/\|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
0320ddc1	225	webpage, 'uploader', fatal=False)
125cfd78	226
7700207e S	227	view_count = self._extract_count(
	228	r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
	229	like_count = self._extract_count(
	230	r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
	231	dislike_count = self._extract_count(
	232	r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
0320ddc1	233	comment_count = self._extract_count(
7700207e	234	r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
0320ddc1	235
6bb05b32 YCH	236	page_params = self._parse_json(self._search_regex(
	237	r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s=\s(?P<data>{[^}]+})',
	238	webpage, 'page parameters', group='data', default='{}'),
	239	video_id, transform_source=js_to_json, fatal=False)
	240	tags = categories = None
	241	if page_params:
	242	tags = page_params.get('tags', '').split(',')
	243	categories = page_params.get('categories', '').split(',')
	244
125cfd78	245	return {
	246	'id': video_id,
	247	'uploader': video_uploader,
6c376029	248	'title': title,
125cfd78	249	'thumbnail': thumbnail,
ed8648a3	250	'duration': duration,
0320ddc1 S	251	'view_count': view_count,
	252	'like_count': like_count,
	253	'dislike_count': dislike_count,
	254	'comment_count': comment_count,
79367a98	255	'formats': formats,
750e9833	256	'age_limit': 18,
6bb05b32 YCH	257	'tags': tags,
6bb05b32 YCH	258	'categories': categories,
125cfd78	259	}
e66e1a00 S	260
e66e1a00 S	261
40e146aa S	262	class PornHubPlaylistBaseIE(InfoExtractor):
40e146aa S	263	def _extract_entries(self, webpage):
475bcb22 S	264	# Only process container div with main playlist content skipping
	265	# drop-down menu that uses similar pattern for videos (see
	266	# https://github.com/rg3/youtube-dl/issues/11594).
	267	container = self._search_regex(
	268	r'(?s)(<div[^>]+class=["\']container.+)', webpage,
	269	'container', default=webpage)
	270
40e146aa	271	return [
3a23bae9 S	272	self.url_result(
	273	'http://www.pornhub.com/%s' % video_url,
	274	PornHubIE.ie_key(), video_title=title)
	275	for video_url, title in orderedSet(re.findall(
	276	r'href="/?(view_video\.php\?.\bviewkey=[\da-z]+[^"])"[^>]*\s+title="([^"]+)"',
475bcb22	277	container))
40e146aa	278	]
e66e1a00 S	279
	280	def _real_extract(self, url):
	281	playlist_id = self._match_id(url)
	282
	283	webpage = self._download_webpage(url, playlist_id)
	284
475bcb22	285	entries = self._extract_entries(webpage)
e66e1a00 S	286
	287	playlist = self._parse_json(
	288	self._search_regex(
cd138d8b S	289	r'(?:playlistObject\|PLAYLIST_VIEW)\s=\s({.+?});', webpage,
	290	'playlist', default='{}'),
	291	playlist_id, fatal=False)
	292	title = playlist.get('title') or self._search_regex(
	293	r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
e66e1a00 S	294
e66e1a00 S	295	return self.playlist_result(
cd138d8b	296	entries, playlist_id, title, playlist.get('description'))
40e146aa S	297
	298
	299	class PornHubPlaylistIE(PornHubPlaylistBaseIE):
92ded33a	300	_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
40e146aa	301	_TESTS = [{
96d315c2	302	'url': 'http://www.pornhub.com/playlist/4667351',
40e146aa	303	'info_dict': {
96d315c2 S	304	'id': '4667351',
96d315c2 S	305	'title': 'Nataly Hot',
40e146aa	306	},
96d315c2	307	'playlist_mincount': 2,
92ded33a S	308	}, {
	309	'url': 'https://de.pornhub.com/playlist/4667351',
	310	'only_matching': True,
40e146aa S	311	}]
	312
	313
	314	class PornHubUserVideosIE(PornHubPlaylistBaseIE):
92ded33a	315	_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user\|channel)s/(?P<id>[^/]+)/videos'
40e146aa	316	_TESTS = [{
34541395	317	'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
40e146aa	318	'info_dict': {
34541395	319	'id': 'zoe_ph',
40e146aa	320	},
34541395 S	321	'playlist_mincount': 171,
	322	}, {
	323	'url': 'http://www.pornhub.com/users/rushandlia/videos',
	324	'only_matching': True,
f66df20c PV	325	}, {
	326	# default sorting as Top Rated Videos
	327	'url': 'https://www.pornhub.com/channels/povd/videos',
	328	'info_dict': {
	329	'id': 'povd',
	330	},
	331	'playlist_mincount': 293,
	332	}, {
	333	# Top Rated Videos
	334	'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
	335	'only_matching': True,
	336	}, {
	337	# Most Recent Videos
	338	'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
	339	'only_matching': True,
	340	}, {
	341	# Most Viewed Videos
	342	'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
	343	'only_matching': True,
92ded33a S	344	}, {
	345	'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
	346	'only_matching': True,
40e146aa S	347	}]
	348
	349	def _real_extract(self, url):
	350	user_id = self._match_id(url)
	351
34541395 S	352	entries = []
	353	for page_num in itertools.count(1):
	354	try:
	355	webpage = self._download_webpage(
	356	url, user_id, 'Downloading page %d' % page_num,
	357	query={'page': page_num})
	358	except ExtractorError as e:
	359	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
	360	break
9edcdac9	361	raise
34541395 S	362	page_entries = self._extract_entries(webpage)
	363	if not page_entries:
	364	break
	365	entries.extend(page_entries)
	366
	367	return self.playlist_result(entries, user_id)