[yt-dlp.git] / youtube_dl / extractor / patreon.py

# encoding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    js_to_json,
)


class PatreonIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
    _TESTS = [
        {
            'url': 'http://www.patreon.com/creation?hid=743933',
            'md5': 'e25505eec1053a6e6813b8ed369875cc',
            'info_dict': {
                'id': '743933',
                'ext': 'mp3',
                'title': 'Episode 166: David Smalley of Dogma Debate',
                'uploader': 'Cognitive Dissonance Podcast',
                'thumbnail': 're:^https?://.*$',
            },
        },
        {
            'url': 'http://www.patreon.com/creation?hid=754133',
            'md5': '3eb09345bf44bf60451b8b0b81759d0a',
            'info_dict': {
                'id': '754133',
                'ext': 'mp3',
                'title': 'CD 167 Extra',
                'uploader': 'Cognitive Dissonance Podcast',
                'thumbnail': 're:^https?://.*$',
            },
        },
        {
            'url': 'https://www.patreon.com/creation?hid=1682498',
            'info_dict': {
                'id': 'SU4fj_aEMVw',
                'ext': 'mp4',
                'title': 'I\'m on Patreon!',
                'uploader': 'TraciJHines',
                'thumbnail': 're:^https?://.*$',
                'upload_date': '20150211',
                'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
                'uploader_id': 'TraciJHines',
            },
            'params': {
                'noplaylist': True,
                'skip_download': True,
            }
        }
    ]

    # Currently Patreon exposes download URL via hidden CSS, so login is not
    # needed. Keeping this commented for when this inevitably changes.
    '''
    def _login(self):
        (username, password) = self._get_login_info()
        if username is None:
            return

        login_form = {
            'redirectUrl': 'http://www.patreon.com/',
            'email': username,
            'password': password,
        }

        request = compat_urllib_request.Request(
            'https://www.patreon.com/processLogin',
            compat_urllib_parse.urlencode(login_form).encode('utf-8')
        )
        login_page = self._download_webpage(request, None, note='Logging in as %s' % username)

        if re.search(r'onLoginFailed', login_page):
            raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)

    def _real_initialize(self):
        self._login()
    '''

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage).strip()

        attach_fn = self._html_search_regex(
            r'<div class="attach"><a target="_blank" href="([^"]+)">',
            webpage, 'attachment URL', default=None)
        embed = self._html_search_regex(
            r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
            webpage, 'embedded URL', default=None)

        if attach_fn is not None:
            video_url = 'http://www.patreon.com' + attach_fn
            thumbnail = self._og_search_thumbnail(webpage)
            uploader = self._html_search_regex(
                r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
        elif embed is not None:
            return self.url_result(embed)
        else:
            playlist = self._parse_json(self._search_regex(
                r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
                webpage, 'playlist JSON'),
                video_id, transform_source=js_to_json)
            data = playlist[0]
            video_url = self._proto_relative_url(data['mp3'])
            thumbnail = self._proto_relative_url(data.get('cover'))
            uploader = data.get('artist')

        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp3',
            'title': title,
            'uploader': uploader,
            'thumbnail': thumbnail,
        }
Commit	Line	Data
a00d73c8 EJ	1	# encoding: utf-8
	2	from __future__ import unicode_literals
	3
a00d73c8 EJ	4	from .common import InfoExtractor
a00d73c8 EJ	5	from ..utils import (
e05f6939	6	js_to_json,
a00d73c8 EJ	7	)
	8
	9
a00d73c8	10	class PatreonIE(InfoExtractor):
77070040	11	_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
a00d73c8 EJ	12	_TESTS = [
	13	{
	14	'url': 'http://www.patreon.com/creation?hid=743933',
	15	'md5': 'e25505eec1053a6e6813b8ed369875cc',
a00d73c8 EJ	16	'info_dict': {
	17	'id': '743933',
	18	'ext': 'mp3',
	19	'title': 'Episode 166: David Smalley of Dogma Debate',
	20	'uploader': 'Cognitive Dissonance Podcast',
e05f6939	21	'thumbnail': 're:^https?://.*$',
a00d73c8 EJ	22	},
a00d73c8 EJ	23	},
6994e706 EJ	24	{
	25	'url': 'http://www.patreon.com/creation?hid=754133',
	26	'md5': '3eb09345bf44bf60451b8b0b81759d0a',
	27	'info_dict': {
	28	'id': '754133',
	29	'ext': 'mp3',
	30	'title': 'CD 167 Extra',
	31	'uploader': 'Cognitive Dissonance Podcast',
e05f6939	32	'thumbnail': 're:^https?://.*$',
6994e706 EJ	33	},
6994e706 EJ	34	},
6b961a85 PH	35	{
	36	'url': 'https://www.patreon.com/creation?hid=1682498',
	37	'info_dict': {
	38	'id': 'SU4fj_aEMVw',
	39	'ext': 'mp4',
	40	'title': 'I\'m on Patreon!',
	41	'uploader': 'TraciJHines',
	42	'thumbnail': 're:^https?://.*$',
	43	'upload_date': '20150211',
	44	'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
	45	'uploader_id': 'TraciJHines',
	46	},
	47	'params': {
	48	'noplaylist': True,
	49	'skip_download': True,
	50	}
	51	}
a00d73c8 EJ	52	]
	53
	54	# Currently Patreon exposes download URL via hidden CSS, so login is not
	55	# needed. Keeping this commented for when this inevitably changes.
	56	'''
	57	def _login(self):
	58	(username, password) = self._get_login_info()
	59	if username is None:
	60	return
	61
	62	login_form = {
	63	'redirectUrl': 'http://www.patreon.com/',
	64	'email': username,
	65	'password': password,
	66	}
	67
	68	request = compat_urllib_request.Request(
	69	'https://www.patreon.com/processLogin',
	70	compat_urllib_parse.urlencode(login_form).encode('utf-8')
	71	)
	72	login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
	73
	74	if re.search(r'onLoginFailed', login_page):
	75	raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
	76
	77	def _real_initialize(self):
	78	self._login()
	79	'''
	80
	81	def _real_extract(self, url):
77070040	82	video_id = self._match_id(url)
e05f6939 PH	83	webpage = self._download_webpage(url, video_id)
	84	title = self._og_search_title(webpage).strip()
	85
	86	attach_fn = self._html_search_regex(
	87	r'<div class="attach"><a target="_blank" href="([^"]+)">',
	88	webpage, 'attachment URL', default=None)
6b961a85 PH	89	embed = self._html_search_regex(
	90	r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
	91	webpage, 'embedded URL', default=None)
	92
e05f6939 PH	93	if attach_fn is not None:
	94	video_url = 'http://www.patreon.com' + attach_fn
	95	thumbnail = self._og_search_thumbnail(webpage)
	96	uploader = self._html_search_regex(
	97	r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
6b961a85 PH	98	elif embed is not None:
6b961a85 PH	99	return self.url_result(embed)
e05f6939	100	else:
77070040	101	playlist = self._parse_json(self._search_regex(
e05f6939	102	r'(?s)new\s+jPlayerPlaylist\(\s\{\s[^}]},\s(\[.?,?\s\])',
77070040 PH	103	webpage, 'playlist JSON'),
77070040 PH	104	video_id, transform_source=js_to_json)
e05f6939 PH	105	data = playlist[0]
	106	video_url = self._proto_relative_url(data['mp3'])
	107	thumbnail = self._proto_relative_url(data.get('cover'))
	108	uploader = data.get('artist')
	109
	110	return {
	111	'id': video_id,
	112	'url': video_url,
	113	'ext': 'mp3',
	114	'title': title,
	115	'uploader': uploader,
	116	'thumbnail': thumbnail,
	117	}