[yt-dlp.git] / youtube_dl / extractor / patreon.py

# encoding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import js_to_json


class PatreonIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
    _TESTS = [
        {
            'url': 'http://www.patreon.com/creation?hid=743933',
            'md5': 'e25505eec1053a6e6813b8ed369875cc',
            'info_dict': {
                'id': '743933',
                'ext': 'mp3',
                'title': 'Episode 166: David Smalley of Dogma Debate',
                'uploader': 'Cognitive Dissonance Podcast',
                'thumbnail': 're:^https?://.*$',
            },
        },
        {
            'url': 'http://www.patreon.com/creation?hid=754133',
            'md5': '3eb09345bf44bf60451b8b0b81759d0a',
            'info_dict': {
                'id': '754133',
                'ext': 'mp3',
                'title': 'CD 167 Extra',
                'uploader': 'Cognitive Dissonance Podcast',
                'thumbnail': 're:^https?://.*$',
            },
        },
        {
            'url': 'https://www.patreon.com/creation?hid=1682498',
            'info_dict': {
                'id': 'SU4fj_aEMVw',
                'ext': 'mp4',
                'title': 'I\'m on Patreon!',
                'uploader': 'TraciJHines',
                'thumbnail': 're:^https?://.*$',
                'upload_date': '20150211',
                'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
                'uploader_id': 'TraciJHines',
            },
            'params': {
                'noplaylist': True,
                'skip_download': True,
            }
        }
    ]

    # Currently Patreon exposes download URL via hidden CSS, so login is not
    # needed. Keeping this commented for when this inevitably changes.
    '''
    def _login(self):
        (username, password) = self._get_login_info()
        if username is None:
            return

        login_form = {
            'redirectUrl': 'http://www.patreon.com/',
            'email': username,
            'password': password,
        }

        request = sanitized_Request(
            'https://www.patreon.com/processLogin',
            compat_urllib_parse_urlencode(login_form).encode('utf-8')
        )
        login_page = self._download_webpage(request, None, note='Logging in as %s' % username)

        if re.search(r'onLoginFailed', login_page):
            raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)

    def _real_initialize(self):
        self._login()
    '''

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage).strip()

        attach_fn = self._html_search_regex(
            r'<div class="attach"><a target="_blank" href="([^"]+)">',
            webpage, 'attachment URL', default=None)
        embed = self._html_search_regex(
            r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
            webpage, 'embedded URL', default=None)

        if attach_fn is not None:
            video_url = 'http://www.patreon.com' + attach_fn
            thumbnail = self._og_search_thumbnail(webpage)
            uploader = self._html_search_regex(
                r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
        elif embed is not None:
            return self.url_result(embed)
        else:
            playlist = self._parse_json(self._search_regex(
                r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
                webpage, 'playlist JSON'),
                video_id, transform_source=js_to_json)
            data = playlist[0]
            video_url = self._proto_relative_url(data['mp3'])
            thumbnail = self._proto_relative_url(data.get('cover'))
            uploader = data.get('artist')

        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp3',
            'title': title,
            'uploader': uploader,
            'thumbnail': thumbnail,
        }
Commit	Line	Data
a00d73c8 EJ	1	# encoding: utf-8
	2	from __future__ import unicode_literals
	3
a00d73c8	4	from .common import InfoExtractor
5c2266df	5	from ..utils import js_to_json
a00d73c8 EJ	6
a00d73c8 EJ	7
a00d73c8	8	class PatreonIE(InfoExtractor):
77070040	9	_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
a00d73c8 EJ	10	_TESTS = [
	11	{
	12	'url': 'http://www.patreon.com/creation?hid=743933',
	13	'md5': 'e25505eec1053a6e6813b8ed369875cc',
a00d73c8 EJ	14	'info_dict': {
	15	'id': '743933',
	16	'ext': 'mp3',
	17	'title': 'Episode 166: David Smalley of Dogma Debate',
	18	'uploader': 'Cognitive Dissonance Podcast',
e05f6939	19	'thumbnail': 're:^https?://.*$',
a00d73c8 EJ	20	},
a00d73c8 EJ	21	},
6994e706 EJ	22	{
	23	'url': 'http://www.patreon.com/creation?hid=754133',
	24	'md5': '3eb09345bf44bf60451b8b0b81759d0a',
	25	'info_dict': {
	26	'id': '754133',
	27	'ext': 'mp3',
	28	'title': 'CD 167 Extra',
	29	'uploader': 'Cognitive Dissonance Podcast',
e05f6939	30	'thumbnail': 're:^https?://.*$',
6994e706 EJ	31	},
6994e706 EJ	32	},
6b961a85 PH	33	{
	34	'url': 'https://www.patreon.com/creation?hid=1682498',
	35	'info_dict': {
	36	'id': 'SU4fj_aEMVw',
	37	'ext': 'mp4',
	38	'title': 'I\'m on Patreon!',
	39	'uploader': 'TraciJHines',
	40	'thumbnail': 're:^https?://.*$',
	41	'upload_date': '20150211',
	42	'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
	43	'uploader_id': 'TraciJHines',
	44	},
	45	'params': {
	46	'noplaylist': True,
	47	'skip_download': True,
	48	}
	49	}
a00d73c8 EJ	50	]
	51
	52	# Currently Patreon exposes download URL via hidden CSS, so login is not
	53	# needed. Keeping this commented for when this inevitably changes.
	54	'''
	55	def _login(self):
	56	(username, password) = self._get_login_info()
	57	if username is None:
	58	return
	59
	60	login_form = {
	61	'redirectUrl': 'http://www.patreon.com/',
	62	'email': username,
	63	'password': password,
	64	}
	65
5c2266df	66	request = sanitized_Request(
a00d73c8	67	'https://www.patreon.com/processLogin',
15707c7e	68	compat_urllib_parse_urlencode(login_form).encode('utf-8')
a00d73c8 EJ	69	)
	70	login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
	71
	72	if re.search(r'onLoginFailed', login_page):
	73	raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
	74
	75	def _real_initialize(self):
	76	self._login()
	77	'''
	78
	79	def _real_extract(self, url):
77070040	80	video_id = self._match_id(url)
e05f6939 PH	81	webpage = self._download_webpage(url, video_id)
	82	title = self._og_search_title(webpage).strip()
	83
	84	attach_fn = self._html_search_regex(
	85	r'<div class="attach"><a target="_blank" href="([^"]+)">',
	86	webpage, 'attachment URL', default=None)
6b961a85	87	embed = self._html_search_regex(
47fd8c2f	88	r'<div[^>]+id="watchCreation"[^>]>\s<iframe[^>]+src="([^"]+)"',
6b961a85 PH	89	webpage, 'embedded URL', default=None)
6b961a85 PH	90
e05f6939 PH	91	if attach_fn is not None:
	92	video_url = 'http://www.patreon.com' + attach_fn
	93	thumbnail = self._og_search_thumbnail(webpage)
	94	uploader = self._html_search_regex(
	95	r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
6b961a85 PH	96	elif embed is not None:
6b961a85 PH	97	return self.url_result(embed)
e05f6939	98	else:
77070040	99	playlist = self._parse_json(self._search_regex(
e05f6939	100	r'(?s)new\s+jPlayerPlaylist\(\s\{\s[^}]},\s(\[.?,?\s\])',
77070040 PH	101	webpage, 'playlist JSON'),
77070040 PH	102	video_id, transform_source=js_to_json)
e05f6939 PH	103	data = playlist[0]
	104	video_url = self._proto_relative_url(data['mp3'])
	105	thumbnail = self._proto_relative_url(data.get('cover'))
	106	uploader = data.get('artist')
	107
	108	return {
	109	'id': video_id,
	110	'url': video_url,
	111	'ext': 'mp3',
	112	'title': title,
	113	'uploader': uploader,
	114	'thumbnail': thumbnail,
	115	}