jfr.im git - erebus.git/blame_incremental

... / ...

Commit	Line	Data
	1	# Erebus IRC bot - Author: Conny Sjoblom
	2	# vim: fileencoding=utf-8
	3	# URL Checker
	4	# This file is released into the public domain; see http://unlicense.org/
	5
	6	# module info
	7	modinfo = {
	8	'author': 'Erebus Team',
	9	'license': 'public domain',
	10	'compatible': [0],
	11	'depends': [],
	12	'softdeps': [],
	13	}
	14
	15	# http://embed.ly/tools/generator
	16
	17	# preamble
	18	import modlib
	19	lib = modlib.modlib(__name__)
	20	modstart = lib.modstart
	21	modstop = lib.modstop
	22
	23	# module code
	24	import sys
	25	if sys.version_info.major < 3:
	26	stringbase = basestring
	27	import urllib2
	28	import urlparse
	29	import HTMLParser
	30	html = HTMLParser.HTMLParser()
	31	from BeautifulSoup import BeautifulSoup
	32	else:
	33	stringbase = str
	34	import urllib.request as urllib2
	35	import urllib.parse as urlparse
	36	import html
	37	from bs4 import BeautifulSoup
	38
	39	import re, json, datetime
	40
	41	hostmask_regex = re.compile(r'^(.)!(.)@(.*)$')
	42
	43	def parser_hostmask(hostmask):
	44	if isinstance(hostmask, dict):
	45	return hostmask
	46
	47	nick = None
	48	user = None
	49	host = None
	50
	51	if hostmask is not None:
	52	match = hostmask_regex.match(hostmask)
	53
	54	if not match:
	55	nick = hostmask
	56	else:
	57	nick = match.group(1)
	58	user = match.group(2)
	59	host = match.group(3)
	60
	61	return {
	62	'nick': nick,
	63	'user': user,
	64	'host': host
	65	}
	66
	67	class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
	68	def http_error_301(self, req, fp, code, msg, headers):
	69	result = urllib2.HTTPRedirectHandler.http_error_301(
	70	self, req, fp, code, msg, headers)
	71	result.status = code
	72	return result
	73
	74	def http_error_302(self, req, fp, code, msg, headers):
	75	result = urllib2.HTTPRedirectHandler.http_error_302(
	76	self, req, fp, code, msg, headers)
	77	result.status = code
	78	return result
	79
	80	def process_line(line):
	81	responses = []
	82	num_found = 0
	83	limit = lib.parent.cfg.getint('urls', 'limit', 2)
	84	for action, group in regexes:
	85	for regex in group:
	86	for match in regex.findall(line):
	87	if match:
	88	num_found += 1
	89	if num_found > limit:
	90	return responses
	91	if isinstance(match, stringbase):
	92	resp = action(match)
	93	else:
	94	resp = action(*match)
	95	if resp is not None and resp != "":
	96	responses.append(resp)
	97	return responses
	98
	99	@lib.hooknum("PRIVMSG")
	100	def privmsg_hook(bot, textline):
	101	user = parser_hostmask(textline[1:textline.find(' ')])
	102	chan = textline.split()[2]
	103
	104	try:
	105	line = textline.split(None, 3)[3][1:]
	106	except IndexError:
	107	line = ''
	108
	109	responses = process_line(line)
	110	if len(responses) > 0:
	111	if lib.parent.cfg.getboolean('urls', 'multiline'):
	112	for r in responses:
	113	bot.msg(chan, r, True)
	114	else:
	115	bot.msg(chan, ' \| '.join(responses), True)
	116
	117	def unescape(line):
	118	return re.sub('\s+', ' ', html.unescape(line))
	119
	120	def gotspotify(type, track):
	121	url = 'http://ws.spotify.com/lookup/1/?uri=spotify:%s:%s' % (type, track)
	122	xml = urllib2.urlopen(url).read()
	123	soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.HTML_ENTITIES)
	124	lookup_type = soup.contents[2].name
	125
	126	if lookup_type == 'track':
	127	name = soup.find('name').string
	128	album_name = soup.find('album').find('name').string
	129	artist_name = soup.find('artist').find('name').string
	130	popularity = soup.find('popularity')
	131	if popularity:
	132	popularity = float(popularity.string)*100
	133	length = float(soup.find('length').string)
	134	minutes = int(length)/60
	135	seconds = int(length)%60
	136
	137	return unescape('Track: %s - %s / %s %s:%.2d %2d%%' % (artist_name, name, album_name, minutes, seconds, popularity))
	138
	139	elif lookup_type == 'album':
	140	album_name = soup.find('album').find('name').string
	141	artist_name = soup.find('artist').find('name').string
	142	released = soup.find('released').string
	143	return unescape('Album: %s - %s - %s' % (artist_name, album_name, released))
	144
	145	else:
	146	return 'Unsupported type.'
	147
	148	def _yt_duration(s):
	149	mo = re.match(r'P(\d+D)?T(\d+H)?(\d+M)?(\d+S)?', s)
	150	pcs = [x for x in mo.groups() if x]
	151	return ''.join(pcs).lower()
	152	def _yt_date(s, f):
	153	mo = re.match(r'(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})\.(\d+)Z', s)
	154	return datetime.datetime(*(int(x) for x in mo.groups())).strftime(f)
	155	def _yt_round(n):
	156	n = float(n)
	157	if n >= 10**12:
	158	return '%.1ft' % (n/10**12)
	159	elif n >= 10**9:
	160	return '%.1fb' % (n/10**9)
	161	elif n >= 10**6:
	162	return '%.1fm' % (n/10**6)
	163	elif n >= 10**3:
	164	return '%.1fk' % (n/10**3)
	165	else:
	166	return int(n)
	167
	168	def gotyoutube(url):
	169	url_data = urlparse.urlparse(url)
	170	query = urlparse.parse_qs(url_data.query)
	171	video = query["v"][0]
	172	api_url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet,contentDetails,statistics&id=%s&key=%s' % (video, lib.parent.cfg.get('urls', 'api_key'))
	173	try:
	174	respdata = urllib2.urlopen(api_url).read()
	175	v = json.loads(respdata)
	176	v = v['items'][0]
	177
	178	return unescape(lib.parent.cfg.get('urls', 'yt_format', "\002%(author)s\002: \037%(title)s\037 [%(duration)s, uploaded %(uploaded)s, %(views)s v/%(likes)s l/%(dislikes)s d]") % {
	179	'title': v['snippet']['title'],
	180	'author': v['snippet']['channelTitle'],
	181	'duration': _yt_duration(v['contentDetails']['duration']),
	182	'uploaded': _yt_date(v['snippet']['publishedAt'], lib.parent.cfg.get('urls', 'yt_date_format', '%b %d %Y')),
	183	'views': _yt_round(v['statistics']['viewCount']),
	184	'likes': _yt_round(v['statistics']['likeCount']),
	185	'dislikes': _yt_round(v['statistics']['dislikeCount']),
	186	})
	187	except urllib2.HTTPError as e:
	188	if e.getcode() == 403:
	189	return 'API limit exceeded'
	190	else:
	191	return str(e)
	192	except IndexError:
	193	return 'no results'
	194	except Exception as e:
	195	return str(e)
	196
	197	def gottwitch(uri):
	198	url = 'https://api.twitch.tv/helix/streams?user_login=%s' % uri.split('/')[0]
	199	opener = urllib2.build_opener()
	200	opener.addheaders = [('Client-ID', lib.parent.cfg.get('urls', 'twitch_api_key'))]
	201	respdata = opener.open(url).read()
	202	twitch = json.loads(respdata)['data']
	203	try:
	204	# TODO: add current game.
	205	return unescape('\037%s\037 is %s (%s)' % (twitch[0]['user_name'], twitch[0]['type'], twitch[0]['title']))
	206	except:
	207	return 'Channel offline.'
	208
	209	def _humanize_bytes(b):
	210	b = int(b)
	211	i = 0
	212	table = " kMGTPEZYRQ"
	213	while b > 1024:
	214	i += 1
	215	b /= 1024.0
	216	if i == 0:
	217	return "%dB" % (b)
	218	else:
	219	return "%.2f%siB" % (b, table[i])
	220
	221	def goturl(url):
	222	output = []
	223	for _, group in other_regexes:
	224	for regex in group:
	225	if regex.match(url):
	226	return None
	227	request = urllib2.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'})
	228	opener = urllib2.build_opener(SmartRedirectHandler())
	229
	230	# Send request and handle errors
	231	try:
	232	response = opener.open(request, timeout=2)
	233	except urllib2.HTTPError as e:
	234	return 'Request error: %s %s' % (e.code, e.reason)
	235	except urllib2.URLError as e:
	236	return 'Request error: %s' % (e.reason)
	237	except TimeoutError as e:
	238	return 'Request error: request timed out'
	239	except Exception as e:
	240	return 'Unknown error: %s %r' % (type(e).__name__, e.args)
	241
	242	# Try to add type and length headers to reply
	243	c_type = response.getheader('Content-Type', '').split(';', 1)[0]
	244	c_len = response.getheader('Content-Length')
	245	if c_type != '':
	246	output.append("[%s] " % (c_type))
	247	else:
	248	output.append("[no type] ")
	249	if c_type != "text/html": # else length will be provided by HTML code below
	250	if c_len is not None:
	251	output.append("[%s] " % (_humanize_bytes(c_len)))
	252	else:
	253	output.append("[no length] ")
	254
	255	# Try to add title if HTML
	256	if c_type == 'text/html':
	257	try:
	258	responsebody = response.read(1024*1024)
	259	print(type(responsebody))
	260	except Exception as e:
	261	output.append('Error reading response body: %s %r' % (type(e).__name__, e.args))
	262	else:
	263	if c_len is not None and len(responsebody) != int(c_len):
	264	output.append("[actual %s; Content-Length %s] " % (_humanize_bytes(len(responsebody)), _humanize_bytes(c_len)))
	265	else:
	266	output.append("[%s] " % (_humanize_bytes(len(responsebody))))
	267	try:
	268	soup = BeautifulSoup(responsebody)
	269	if soup.title:
	270	output.append('Title: ' + unescape('%s' % (soup.find('title').string)))
	271	else:
	272	output.append('No title')
	273	except Exception as e:
	274	output.append('Title error: %s %r ' % (type(e).__name__, e.args))
	275
	276	return ''.join(output)
	277
	278	url_regex = (
	279	re.compile(r'https?://(?:[^/\s.]+\.)+[^/\s.]+(?:/\S+)?'),
	280	)
	281	other_regexes = (
	282	)
	283	regexes = other_regexes + (
	284	(goturl, url_regex),
	285	)