[erebus.git] / modules / urls.py

# Erebus IRC bot - Author: Erebus Team
# vim: fileencoding=utf-8
# URL Checker
# This file is released into the public domain; see http://unlicense.org/

# module info
modinfo = {
	'author': 'Erebus Team',
	'license': 'public domain',
	'compatible': [0],
	'depends': [],
	'softdeps': [],
}

# http://embed.ly/tools/generator

# preamble
import modlib
lib = modlib.modlib(__name__)
modstart = lib.modstart
modstop = lib.modstop

# module code
import sys
if sys.version_info.major < 3:
	import urllib2
	import urlparse
	import HTMLParser
	from BeautifulSoup import BeautifulSoup
	import re
else:
	import urllib.request as urllib2
	import urllib.parse as urlparse
	import html.parser as HTMLParser
	from bs4 import BeautifulSoup
	import re

import re, json

html_parser = HTMLParser.HTMLParser()

hostmask_regex = re.compile(r'^(.*)!(.*)@(.*)$')
url_regex = re.compile(r'https?://[^/\s]+\.[^/\s]+(?:/\S+)?')
spotify_regex = (
	re.compile(r'spotify:(?P<type>\w+):(?P<track_id>\w{22})'),
	re.compile(r'https?://open.spotify.com/(?P<type>\w+)/(?P<track_id>\w+)')
)
youtube_regex = (
	re.compile(r'https?://(?:www\.)?youtube\.com/watch\?[a-zA-Z0-9=&_\-]+'),
)
twitch_regex = (
	re.compile(r'https?:\/\/(?:www\.)?twitch.tv\/([A-Za-z0-9]*)'),
)

def parser_hostmask(hostmask):
	if isinstance(hostmask, dict):
		return hostmask

	nick = None
	user = None
	host = None

	if hostmask is not None:
		match = hostmask_regex.match(hostmask)

		if not match:
			nick = hostmask
		else:
			nick = match.group(1)
			user = match.group(2)
			host = match.group(3)

	return {
		'nick': nick,
		'user': user,
		'host': host
	}

class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
	def http_error_301(self, req, fp, code, msg, headers):
		result = urllib2.HTTPRedirectHandler.http_error_301(
				self, req, fp, code, msg, headers)
		result.status = code
		return result

	def http_error_302(self, req, fp, code, msg, headers):
		result = urllib2.HTTPRedirectHandler.http_error_302(
				self, req, fp, code, msg, headers)
		result.status = code
		return result

@lib.hooknum("PRIVMSG")
def privmsg_hook(bot, textline):
	user = parser_hostmask(textline[1:textline.find(' ')])
	chan = textline.split()[2]

	try:
		line = textline.split(None, 3)[3][1:]
	except IndexError:
		line = ''

	for match in url_regex.findall(line):
		if match:
			response = goturl(match)
			if response is not None:
				bot.msg(chan, response)

def unescape(line):
	return re.sub('\s+', ' ', html_parser.unescape(line))

def gotspotify(type, track):
	url = 'http://ws.spotify.com/lookup/1/?uri=spotify:%s:%s' % (type, track)
	xml = urllib2.urlopen(url).read()
	soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.HTML_ENTITIES)
	lookup_type = soup.contents[2].name

	if lookup_type == 'track':
		name = soup.find('name').string
		album_name = soup.find('album').find('name').string
		artist_name = soup.find('artist').find('name').string
		popularity = soup.find('popularity')
		if popularity:
			popularity = float(popularity.string)*100
		length = float(soup.find('length').string)
		minutes = int(length)/60
		seconds =  int(length)%60

		return unescape('Track: %s - %s / %s %s:%.2d %2d%%' % (artist_name, name, album_name, minutes, seconds, popularity))

	elif lookup_type == 'album':
		album_name = soup.find('album').find('name').string
		artist_name = soup.find('artist').find('name').string
		released = soup.find('released').string
		return unescape('Album: %s - %s - %s' % (artist_name, album_name, released))

	else:
		return 'Unsupported type.'

def gotyoutube(url):
	url_data = urlparse.urlparse(url)
	query = urlparse.parse_qs(url_data.query)
	video = query["v"][0]
	api_url = 'http://gdata.youtube.com/feeds/api/videos/%s?alt=json&v=2' % video
	try:
		respdata = urllib2.urlopen(api_url).read()
		video_info = json.loads(respdata)

		title = video_info['entry']['title']["$t"]
		author = video_info['entry']['author'][0]['name']['$t']

		return unescape("Youtube: %s (%s)" % (title, author))
	except:
		pass

def gottwitch(uri):
		url = 'http://api.justin.tv/api/stream/list.json?channel=%s' % uri.split('/')[0]
		respdata = urllib2.urlopen(url).read()
		twitch = json.loads(respdata)
		try:
			return unescape('Twitch: %s (%s playing %s)' % (twitch[0]['channel']['status'], twitch[0]['channel']['login'], twitch[0]['channel']['meta_game']))
		except:
			return 'Twitch: Channel offline.'

def goturl(url):
	request = urllib2.Request(url)
	opener = urllib2.build_opener(SmartRedirectHandler())
	try:
		soup = BeautifulSoup(opener.open(request, timeout=2))
		return unescape('Title: %s' % (soup.title.string))
	except urllib2.HTTPError as e:
		return 'Error: %s %s' % (e.code, e.reason)
	except:
		return None
Commit	Line	Data
a83e1f9c	1	# Erebus IRC bot - Author: Erebus Team
4477123d	2	# vim: fileencoding=utf-8
a83e1f9c	3	# URL Checker
	4	# This file is released into the public domain; see http://unlicense.org/
	5
	6	# module info
	7	modinfo = {
	8	'author': 'Erebus Team',
	9	'license': 'public domain',
fa93b933	10	'compatible': [0],
a62d0d18	11	'depends': [],
a62d0d18	12	'softdeps': [],
a83e1f9c	13	}
a83e1f9c	14
99366200 CS	15	# http://embed.ly/tools/generator
99366200 CS	16
a83e1f9c	17	# preamble
	18	import modlib
	19	lib = modlib.modlib(__name__)
	20	modstart = lib.modstart
	21	modstop = lib.modstop
	22
	23	# module code
a28e2ae9	24	import sys
	25	if sys.version_info.major < 3:
	26	import urllib2
	27	import urlparse
	28	import HTMLParser
	29	from BeautifulSoup import BeautifulSoup
d140b1af	30	import re
a28e2ae9	31	else:
	32	import urllib.request as urllib2
	33	import urllib.parse as urlparse
	34	import html.parser as HTMLParser
	35	from bs4 import BeautifulSoup
d140b1af	36	import re
a28e2ae9	37
a28e2ae9	38	import re, json
a83e1f9c	39
390fbad4	40	html_parser = HTMLParser.HTMLParser()
a83e1f9c	41
390fbad4	42	hostmask_regex = re.compile(r'^(.)!(.)@(.*)$')
c6880712	43	url_regex = re.compile(r'https?://[^/\s]+\.[^/\s]+(?:/\S+)?')
a83e1f9c	44	spotify_regex = (
a83e1f9c	45	re.compile(r'spotify:(?P<type>\w+):(?P<track_id>\w{22})'),
c6880712	46	re.compile(r'https?://open.spotify.com/(?P<type>\w+)/(?P<track_id>\w+)')
a83e1f9c	47	)
	48	youtube_regex = (
	49	re.compile(r'https?://(?:www\.)?youtube\.com/watch\?[a-zA-Z0-9=&_\-]+'),
	50	)
	51	twitch_regex = (
01a6184a	52	re.compile(r'https?:\/\/(?:www\.)?twitch.tv\/([A-Za-z0-9]*)'),
a83e1f9c	53	)
	54
	55	def parser_hostmask(hostmask):
	56	if isinstance(hostmask, dict):
	57	return hostmask
	58
	59	nick = None
	60	user = None
	61	host = None
	62
	63	if hostmask is not None:
	64	match = hostmask_regex.match(hostmask)
	65
	66	if not match:
	67	nick = hostmask
	68	else:
	69	nick = match.group(1)
	70	user = match.group(2)
	71	host = match.group(3)
	72
	73	return {
	74	'nick': nick,
	75	'user': user,
	76	'host': host
	77	}
	78
394a7b69 CS	79	class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
	80	def http_error_301(self, req, fp, code, msg, headers):
	81	result = urllib2.HTTPRedirectHandler.http_error_301(
	82	self, req, fp, code, msg, headers)
	83	result.status = code
	84	return result
	85
	86	def http_error_302(self, req, fp, code, msg, headers):
	87	result = urllib2.HTTPRedirectHandler.http_error_302(
	88	self, req, fp, code, msg, headers)
	89	result.status = code
	90	return result
	91
a83e1f9c	92	@lib.hooknum("PRIVMSG")
390fbad4 CS	93	def privmsg_hook(bot, textline):
	94	user = parser_hostmask(textline[1:textline.find(' ')])
	95	chan = textline.split()[2]
a83e1f9c	96
a83e1f9c	97	try:
390fbad4	98	line = textline.split(None, 3)[3][1:]
a83e1f9c	99	except IndexError:
390fbad4	100	line = ''
a83e1f9c	101
390fbad4 CS	102	for match in url_regex.findall(line):
390fbad4 CS	103	if match:
c6880712	104	response = goturl(match)
	105	if response is not None:
	106	bot.msg(chan, response)
a83e1f9c	107
390fbad4	108	def unescape(line):
d140b1af	109	return re.sub('\s+', ' ', html_parser.unescape(line))
a83e1f9c	110
	111	def gotspotify(type, track):
	112	url = 'http://ws.spotify.com/lookup/1/?uri=spotify:%s:%s' % (type, track)
	113	xml = urllib2.urlopen(url).read()
390fbad4	114	soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.HTML_ENTITIES)
a83e1f9c	115	lookup_type = soup.contents[2].name
390fbad4	116
a83e1f9c	117	if lookup_type == 'track':
	118	name = soup.find('name').string
	119	album_name = soup.find('album').find('name').string
	120	artist_name = soup.find('artist').find('name').string
	121	popularity = soup.find('popularity')
	122	if popularity:
	123	popularity = float(popularity.string)*100
	124	length = float(soup.find('length').string)
	125	minutes = int(length)/60
	126	seconds = int(length)%60
390fbad4	127
dafa38fc	128	return unescape('Track: %s - %s / %s %s:%.2d %2d%%' % (artist_name, name, album_name, minutes, seconds, popularity))
390fbad4	129
a83e1f9c	130	elif lookup_type == 'album':
	131	album_name = soup.find('album').find('name').string
	132	artist_name = soup.find('artist').find('name').string
	133	released = soup.find('released').string
dafa38fc	134	return unescape('Album: %s - %s - %s' % (artist_name, album_name, released))
390fbad4	135
a83e1f9c	136	else:
	137	return 'Unsupported type.'
	138
	139	def gotyoutube(url):
	140	url_data = urlparse.urlparse(url)
	141	query = urlparse.parse_qs(url_data.query)
	142	video = query["v"][0]
	143	api_url = 'http://gdata.youtube.com/feeds/api/videos/%s?alt=json&v=2' % video
	144	try:
	145	respdata = urllib2.urlopen(api_url).read()
	146	video_info = json.loads(respdata)
	147
	148	title = video_info['entry']['title']["$t"]
	149	author = video_info['entry']['author'][0]['name']['$t']
	150
dafa38fc	151	return unescape("Youtube: %s (%s)" % (title, author))
a83e1f9c	152	except:
	153	pass
	154
390fbad4 CS	155	def gottwitch(uri):
	156	url = 'http://api.justin.tv/api/stream/list.json?channel=%s' % uri.split('/')[0]
	157	respdata = urllib2.urlopen(url).read()
	158	twitch = json.loads(respdata)
	159	try:
dafa38fc	160	return unescape('Twitch: %s (%s playing %s)' % (twitch[0]['channel']['status'], twitch[0]['channel']['login'], twitch[0]['channel']['meta_game']))
390fbad4 CS	161	except:
	162	return 'Twitch: Channel offline.'
	163
	164	def goturl(url):
394a7b69 CS	165	request = urllib2.Request(url)
394a7b69 CS	166	opener = urllib2.build_opener(SmartRedirectHandler())
993046cc	167	try:
394a7b69	168	soup = BeautifulSoup(opener.open(request, timeout=2))
acfe3d05	169	return unescape('Title: %s' % (soup.title.string))
de8ab9cb	170	except urllib2.HTTPError as e:
acfe3d05	171	return 'Error: %s %s' % (e.code, e.reason)
993046cc	172	except:
c6880712	173	return None