]> jfr.im git - erebus.git/blame - modules/urls.py
urls: .strip title
[erebus.git] / modules / urls.py
CommitLineData
58cd0191 1# Erebus IRC bot - Author: Conny Sjoblom
4477123d 2# vim: fileencoding=utf-8
a83e1f9c 3# URL Checker
4# This file is released into the public domain; see http://unlicense.org/
5
6# module info
7modinfo = {
8 'author': 'Erebus Team',
9 'license': 'public domain',
fa93b933 10 'compatible': [0],
a62d0d18 11 'depends': [],
12 'softdeps': [],
a83e1f9c 13}
14
99366200
CS
15# http://embed.ly/tools/generator
16
a83e1f9c 17# preamble
18import modlib
19lib = modlib.modlib(__name__)
20modstart = lib.modstart
21modstop = lib.modstop
22
23# module code
a28e2ae9 24import sys
25if sys.version_info.major < 3:
55bfe803 26 stringbase = basestring
a28e2ae9 27 import urllib2
28 import urlparse
29 import HTMLParser
d266ce49 30 html = HTMLParser.HTMLParser()
a28e2ae9 31 from BeautifulSoup import BeautifulSoup
32else:
55bfe803 33 stringbase = str
a28e2ae9 34 import urllib.request as urllib2
35 import urllib.parse as urlparse
d266ce49 36 import html
a28e2ae9 37 from bs4 import BeautifulSoup
38
467acacf 39import re, json, datetime
a83e1f9c 40
390fbad4 41hostmask_regex = re.compile(r'^(.*)!(.*)@(.*)$')
a83e1f9c 42
43def parser_hostmask(hostmask):
44 if isinstance(hostmask, dict):
45 return hostmask
46
47 nick = None
48 user = None
49 host = None
50
51 if hostmask is not None:
52 match = hostmask_regex.match(hostmask)
53
54 if not match:
55 nick = hostmask
56 else:
57 nick = match.group(1)
58 user = match.group(2)
59 host = match.group(3)
60
61 return {
62 'nick': nick,
63 'user': user,
64 'host': host
65 }
66
394a7b69
CS
67class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
68 def http_error_301(self, req, fp, code, msg, headers):
69 result = urllib2.HTTPRedirectHandler.http_error_301(
70 self, req, fp, code, msg, headers)
71 result.status = code
72 return result
73
74 def http_error_302(self, req, fp, code, msg, headers):
75 result = urllib2.HTTPRedirectHandler.http_error_302(
76 self, req, fp, code, msg, headers)
77 result.status = code
78 return result
79
467acacf 80def process_line(line):
81 responses = []
82 num_found = 0
83 limit = lib.parent.cfg.getint('urls', 'limit', 2)
ecbed328 84 for action, group in regexes:
467acacf 85 for regex in group:
86 for match in regex.findall(line):
87 if match:
88 num_found += 1
89 if num_found > limit:
90 return responses
55bfe803
JR
91 if isinstance(match, stringbase):
92 resp = action(match)
93 else:
94 resp = action(*match)
ecbed328
JR
95 if resp is not None and resp != "":
96 responses.append(resp)
467acacf 97 return responses
98
a83e1f9c 99@lib.hooknum("PRIVMSG")
390fbad4
CS
100def privmsg_hook(bot, textline):
101 user = parser_hostmask(textline[1:textline.find(' ')])
102 chan = textline.split()[2]
a83e1f9c 103
104 try:
390fbad4 105 line = textline.split(None, 3)[3][1:]
a83e1f9c 106 except IndexError:
390fbad4 107 line = ''
a83e1f9c 108
467acacf 109 responses = process_line(line)
04d48353 110 if len(responses) > 0:
467acacf 111 if lib.parent.cfg.getboolean('urls', 'multiline'):
112 for r in responses:
113 bot.msg(chan, r, True)
114 else:
115 bot.msg(chan, ' | '.join(responses), True)
a83e1f9c 116
390fbad4 117def unescape(line):
d266ce49 118 return re.sub('\s+', ' ', html.unescape(line))
a83e1f9c 119
120def gotspotify(type, track):
121 url = 'http://ws.spotify.com/lookup/1/?uri=spotify:%s:%s' % (type, track)
122 xml = urllib2.urlopen(url).read()
390fbad4 123 soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.HTML_ENTITIES)
a83e1f9c 124 lookup_type = soup.contents[2].name
390fbad4 125
a83e1f9c 126 if lookup_type == 'track':
127 name = soup.find('name').string
128 album_name = soup.find('album').find('name').string
129 artist_name = soup.find('artist').find('name').string
130 popularity = soup.find('popularity')
131 if popularity:
132 popularity = float(popularity.string)*100
133 length = float(soup.find('length').string)
134 minutes = int(length)/60
467acacf 135 seconds = int(length)%60
390fbad4 136
dafa38fc 137 return unescape('Track: %s - %s / %s %s:%.2d %2d%%' % (artist_name, name, album_name, minutes, seconds, popularity))
390fbad4 138
a83e1f9c 139 elif lookup_type == 'album':
140 album_name = soup.find('album').find('name').string
141 artist_name = soup.find('artist').find('name').string
142 released = soup.find('released').string
dafa38fc 143 return unescape('Album: %s - %s - %s' % (artist_name, album_name, released))
390fbad4 144
a83e1f9c 145 else:
146 return 'Unsupported type.'
147
467acacf 148def _yt_duration(s):
149 mo = re.match(r'P(\d+D)?T(\d+H)?(\d+M)?(\d+S)?', s)
150 pcs = [x for x in mo.groups() if x]
151 return ''.join(pcs).lower()
152def _yt_date(s, f):
153 mo = re.match(r'(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})\.(\d+)Z', s)
154 return datetime.datetime(*(int(x) for x in mo.groups())).strftime(f)
155def _yt_round(n):
156 n = float(n)
157 if n >= 10**12:
158 return '%.1ft' % (n/10**12)
159 elif n >= 10**9:
160 return '%.1fb' % (n/10**9)
161 elif n >= 10**6:
162 return '%.1fm' % (n/10**6)
163 elif n >= 10**3:
164 return '%.1fk' % (n/10**3)
165 else:
166 return int(n)
167
a83e1f9c 168def gotyoutube(url):
169 url_data = urlparse.urlparse(url)
170 query = urlparse.parse_qs(url_data.query)
171 video = query["v"][0]
467acacf 172 api_url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet,contentDetails,statistics&id=%s&key=%s' % (video, lib.parent.cfg.get('urls', 'api_key'))
a83e1f9c 173 try:
174 respdata = urllib2.urlopen(api_url).read()
467acacf 175 v = json.loads(respdata)
176 v = v['items'][0]
177
178 return unescape(lib.parent.cfg.get('urls', 'yt_format', "\002%(author)s\002: \037%(title)s\037 [%(duration)s, uploaded %(uploaded)s, %(views)s v/%(likes)s l/%(dislikes)s d]") % {
179 'title': v['snippet']['title'],
180 'author': v['snippet']['channelTitle'],
181 'duration': _yt_duration(v['contentDetails']['duration']),
182 'uploaded': _yt_date(v['snippet']['publishedAt'], lib.parent.cfg.get('urls', 'yt_date_format', '%b %d %Y')),
183 'views': _yt_round(v['statistics']['viewCount']),
184 'likes': _yt_round(v['statistics']['likeCount']),
185 'dislikes': _yt_round(v['statistics']['dislikeCount']),
186 })
187 except urllib2.HTTPError as e:
188 if e.getcode() == 403:
189 return 'API limit exceeded'
190 else:
191 return str(e)
192 except IndexError:
193 return 'no results'
194 except Exception as e:
195 return str(e)
a83e1f9c 196
390fbad4 197def gottwitch(uri):
467acacf 198 url = 'https://api.twitch.tv/helix/streams?user_login=%s' % uri.split('/')[0]
199 opener = urllib2.build_opener()
200 opener.addheaders = [('Client-ID', lib.parent.cfg.get('urls', 'twitch_api_key'))]
201 respdata = opener.open(url).read()
202 twitch = json.loads(respdata)['data']
203 try:
204 # TODO: add current game.
205 return unescape('\037%s\037 is %s (%s)' % (twitch[0]['user_name'], twitch[0]['type'], twitch[0]['title']))
206 except:
207 return 'Channel offline.'
390fbad4 208
ecbed328
JR
209def _humanize_bytes(b):
210 b = int(b)
211 i = 0
212 table = " kMGTPEZYRQ"
213 while b > 1024:
214 i += 1
215 b /= 1024.0
216 if i == 0:
217 return "%dB" % (b)
218 else:
219 return "%.2f%siB" % (b, table[i])
220
390fbad4 221def goturl(url):
ecbed328
JR
222 output = []
223 for _, group in other_regexes:
467acacf 224 for regex in group:
225 if regex.match(url):
226 return None
9df62f90 227 request = urllib2.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'})
394a7b69 228 opener = urllib2.build_opener(SmartRedirectHandler())
ecbed328
JR
229
230 # Send request and handle errors
993046cc 231 try:
ecbed328 232 response = opener.open(request, timeout=2)
de8ab9cb 233 except urllib2.HTTPError as e:
ecbed328 234 return 'Request error: %s %s' % (e.code, e.reason)
74dc2a9d 235 except urllib2.URLError as e:
ecbed328 236 return 'Request error: %s' % (e.reason)
9df62f90 237 except TimeoutError as e:
ecbed328 238 return 'Request error: request timed out'
04d48353 239 except Exception as e:
ecbed328
JR
240 return 'Unknown error: %s %r' % (type(e).__name__, e.args)
241
242 # Try to add type and length headers to reply
243 c_type = response.getheader('Content-Type', '').split(';', 1)[0]
244 c_len = response.getheader('Content-Length')
245 if c_type != '':
246 output.append("[%s] " % (c_type))
247 else:
248 output.append("[no type] ")
249 if c_type != "text/html": # else length will be provided by HTML code below
250 if c_len is not None:
251 output.append("[%s] " % (_humanize_bytes(c_len)))
252 else:
253 output.append("[no length] ")
254
255 # Try to add title if HTML
256 if c_type == 'text/html':
257 try:
258 responsebody = response.read(1024*1024)
259 print(type(responsebody))
260 except Exception as e:
261 output.append('Error reading response body: %s %r' % (type(e).__name__, e.args))
262 else:
263 if c_len is not None and len(responsebody) != int(c_len):
264 output.append("[actual %s; Content-Length %s] " % (_humanize_bytes(len(responsebody)), _humanize_bytes(c_len)))
265 else:
266 output.append("[%s] " % (_humanize_bytes(len(responsebody))))
267 try:
268 soup = BeautifulSoup(responsebody)
269 if soup.title:
07fbfaa6 270 output.append('Title: ' + unescape('%s' % (soup.find('title').string.strip())))
ecbed328
JR
271 else:
272 output.append('No title')
273 except Exception as e:
274 output.append('Title error: %s %r ' % (type(e).__name__, e.args))
275
276 return ''.join(output)
467acacf 277
278url_regex = (
ecbed328 279 re.compile(r'https?://(?:[^/\s.]+\.)+[^/\s.]+(?:/\S+)?'),
467acacf 280)
467acacf 281other_regexes = (
467acacf 282)
283regexes = other_regexes + (
ecbed328 284 (goturl, url_regex),
467acacf 285)