]>
Commit | Line | Data |
---|---|---|
58cd0191 | 1 | # Erebus IRC bot - Author: Conny Sjoblom |
4477123d | 2 | # vim: fileencoding=utf-8 |
a83e1f9c | 3 | # URL Checker |
4 | # This file is released into the public domain; see http://unlicense.org/ | |
5 | ||
6 | # module info | |
7 | modinfo = { | |
8 | 'author': 'Erebus Team', | |
9 | 'license': 'public domain', | |
fa93b933 | 10 | 'compatible': [0], |
a62d0d18 | 11 | 'depends': [], |
12 | 'softdeps': [], | |
a83e1f9c | 13 | } |
14 | ||
99366200 CS |
15 | # http://embed.ly/tools/generator |
16 | ||
a83e1f9c | 17 | # preamble |
18 | import modlib | |
19 | lib = modlib.modlib(__name__) | |
20 | modstart = lib.modstart | |
21 | modstop = lib.modstop | |
22 | ||
23 | # module code | |
a28e2ae9 | 24 | import sys |
25 | if sys.version_info.major < 3: | |
55bfe803 | 26 | stringbase = basestring |
a28e2ae9 | 27 | import urllib2 |
28 | import urlparse | |
29 | import HTMLParser | |
d266ce49 | 30 | html = HTMLParser.HTMLParser() |
a28e2ae9 | 31 | from BeautifulSoup import BeautifulSoup |
32 | else: | |
55bfe803 | 33 | stringbase = str |
a28e2ae9 | 34 | import urllib.request as urllib2 |
35 | import urllib.parse as urlparse | |
d266ce49 | 36 | import html |
a28e2ae9 | 37 | from bs4 import BeautifulSoup |
38 | ||
467acacf | 39 | import re, json, datetime |
a83e1f9c | 40 | |
390fbad4 | 41 | hostmask_regex = re.compile(r'^(.*)!(.*)@(.*)$') |
a83e1f9c | 42 | |
43 | def parser_hostmask(hostmask): | |
44 | if isinstance(hostmask, dict): | |
45 | return hostmask | |
46 | ||
47 | nick = None | |
48 | user = None | |
49 | host = None | |
50 | ||
51 | if hostmask is not None: | |
52 | match = hostmask_regex.match(hostmask) | |
53 | ||
54 | if not match: | |
55 | nick = hostmask | |
56 | else: | |
57 | nick = match.group(1) | |
58 | user = match.group(2) | |
59 | host = match.group(3) | |
60 | ||
61 | return { | |
62 | 'nick': nick, | |
63 | 'user': user, | |
64 | 'host': host | |
65 | } | |
66 | ||
394a7b69 CS |
67 | class SmartRedirectHandler(urllib2.HTTPRedirectHandler): |
68 | def http_error_301(self, req, fp, code, msg, headers): | |
69 | result = urllib2.HTTPRedirectHandler.http_error_301( | |
70 | self, req, fp, code, msg, headers) | |
71 | result.status = code | |
72 | return result | |
73 | ||
74 | def http_error_302(self, req, fp, code, msg, headers): | |
75 | result = urllib2.HTTPRedirectHandler.http_error_302( | |
76 | self, req, fp, code, msg, headers) | |
77 | result.status = code | |
78 | return result | |
79 | ||
467acacf | 80 | def process_line(line): |
81 | responses = [] | |
82 | num_found = 0 | |
83 | limit = lib.parent.cfg.getint('urls', 'limit', 2) | |
ecbed328 | 84 | for action, group in regexes: |
467acacf | 85 | for regex in group: |
86 | for match in regex.findall(line): | |
87 | if match: | |
88 | num_found += 1 | |
89 | if num_found > limit: | |
90 | return responses | |
55bfe803 JR |
91 | if isinstance(match, stringbase): |
92 | resp = action(match) | |
93 | else: | |
94 | resp = action(*match) | |
ecbed328 JR |
95 | if resp is not None and resp != "": |
96 | responses.append(resp) | |
467acacf | 97 | return responses |
98 | ||
a83e1f9c | 99 | @lib.hooknum("PRIVMSG") |
390fbad4 CS |
100 | def privmsg_hook(bot, textline): |
101 | user = parser_hostmask(textline[1:textline.find(' ')]) | |
102 | chan = textline.split()[2] | |
a83e1f9c | 103 | |
104 | try: | |
390fbad4 | 105 | line = textline.split(None, 3)[3][1:] |
a83e1f9c | 106 | except IndexError: |
390fbad4 | 107 | line = '' |
a83e1f9c | 108 | |
467acacf | 109 | responses = process_line(line) |
04d48353 | 110 | if len(responses) > 0: |
467acacf | 111 | if lib.parent.cfg.getboolean('urls', 'multiline'): |
112 | for r in responses: | |
113 | bot.msg(chan, r, True) | |
114 | else: | |
115 | bot.msg(chan, ' | '.join(responses), True) | |
a83e1f9c | 116 | |
390fbad4 | 117 | def unescape(line): |
d266ce49 | 118 | return re.sub('\s+', ' ', html.unescape(line)) |
a83e1f9c | 119 | |
120 | def gotspotify(type, track): | |
121 | url = 'http://ws.spotify.com/lookup/1/?uri=spotify:%s:%s' % (type, track) | |
122 | xml = urllib2.urlopen(url).read() | |
390fbad4 | 123 | soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.HTML_ENTITIES) |
a83e1f9c | 124 | lookup_type = soup.contents[2].name |
390fbad4 | 125 | |
a83e1f9c | 126 | if lookup_type == 'track': |
127 | name = soup.find('name').string | |
128 | album_name = soup.find('album').find('name').string | |
129 | artist_name = soup.find('artist').find('name').string | |
130 | popularity = soup.find('popularity') | |
131 | if popularity: | |
132 | popularity = float(popularity.string)*100 | |
133 | length = float(soup.find('length').string) | |
134 | minutes = int(length)/60 | |
467acacf | 135 | seconds = int(length)%60 |
390fbad4 | 136 | |
dafa38fc | 137 | return unescape('Track: %s - %s / %s %s:%.2d %2d%%' % (artist_name, name, album_name, minutes, seconds, popularity)) |
390fbad4 | 138 | |
a83e1f9c | 139 | elif lookup_type == 'album': |
140 | album_name = soup.find('album').find('name').string | |
141 | artist_name = soup.find('artist').find('name').string | |
142 | released = soup.find('released').string | |
dafa38fc | 143 | return unescape('Album: %s - %s - %s' % (artist_name, album_name, released)) |
390fbad4 | 144 | |
a83e1f9c | 145 | else: |
146 | return 'Unsupported type.' | |
147 | ||
467acacf | 148 | def _yt_duration(s): |
149 | mo = re.match(r'P(\d+D)?T(\d+H)?(\d+M)?(\d+S)?', s) | |
150 | pcs = [x for x in mo.groups() if x] | |
151 | return ''.join(pcs).lower() | |
152 | def _yt_date(s, f): | |
153 | mo = re.match(r'(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})\.(\d+)Z', s) | |
154 | return datetime.datetime(*(int(x) for x in mo.groups())).strftime(f) | |
155 | def _yt_round(n): | |
156 | n = float(n) | |
157 | if n >= 10**12: | |
158 | return '%.1ft' % (n/10**12) | |
159 | elif n >= 10**9: | |
160 | return '%.1fb' % (n/10**9) | |
161 | elif n >= 10**6: | |
162 | return '%.1fm' % (n/10**6) | |
163 | elif n >= 10**3: | |
164 | return '%.1fk' % (n/10**3) | |
165 | else: | |
166 | return int(n) | |
167 | ||
a83e1f9c | 168 | def gotyoutube(url): |
169 | url_data = urlparse.urlparse(url) | |
170 | query = urlparse.parse_qs(url_data.query) | |
171 | video = query["v"][0] | |
467acacf | 172 | api_url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet,contentDetails,statistics&id=%s&key=%s' % (video, lib.parent.cfg.get('urls', 'api_key')) |
a83e1f9c | 173 | try: |
174 | respdata = urllib2.urlopen(api_url).read() | |
467acacf | 175 | v = json.loads(respdata) |
176 | v = v['items'][0] | |
177 | ||
178 | return unescape(lib.parent.cfg.get('urls', 'yt_format', "\002%(author)s\002: \037%(title)s\037 [%(duration)s, uploaded %(uploaded)s, %(views)s v/%(likes)s l/%(dislikes)s d]") % { | |
179 | 'title': v['snippet']['title'], | |
180 | 'author': v['snippet']['channelTitle'], | |
181 | 'duration': _yt_duration(v['contentDetails']['duration']), | |
182 | 'uploaded': _yt_date(v['snippet']['publishedAt'], lib.parent.cfg.get('urls', 'yt_date_format', '%b %d %Y')), | |
183 | 'views': _yt_round(v['statistics']['viewCount']), | |
184 | 'likes': _yt_round(v['statistics']['likeCount']), | |
185 | 'dislikes': _yt_round(v['statistics']['dislikeCount']), | |
186 | }) | |
187 | except urllib2.HTTPError as e: | |
188 | if e.getcode() == 403: | |
189 | return 'API limit exceeded' | |
190 | else: | |
191 | return str(e) | |
192 | except IndexError: | |
193 | return 'no results' | |
194 | except Exception as e: | |
195 | return str(e) | |
a83e1f9c | 196 | |
390fbad4 | 197 | def gottwitch(uri): |
467acacf | 198 | url = 'https://api.twitch.tv/helix/streams?user_login=%s' % uri.split('/')[0] |
199 | opener = urllib2.build_opener() | |
200 | opener.addheaders = [('Client-ID', lib.parent.cfg.get('urls', 'twitch_api_key'))] | |
201 | respdata = opener.open(url).read() | |
202 | twitch = json.loads(respdata)['data'] | |
203 | try: | |
204 | # TODO: add current game. | |
205 | return unescape('\037%s\037 is %s (%s)' % (twitch[0]['user_name'], twitch[0]['type'], twitch[0]['title'])) | |
206 | except: | |
207 | return 'Channel offline.' | |
390fbad4 | 208 | |
ecbed328 JR |
209 | def _humanize_bytes(b): |
210 | b = int(b) | |
211 | i = 0 | |
212 | table = " kMGTPEZYRQ" | |
213 | while b > 1024: | |
214 | i += 1 | |
215 | b /= 1024.0 | |
216 | if i == 0: | |
217 | return "%dB" % (b) | |
218 | else: | |
219 | return "%.2f%siB" % (b, table[i]) | |
220 | ||
390fbad4 | 221 | def goturl(url): |
ecbed328 JR |
222 | output = [] |
223 | for _, group in other_regexes: | |
467acacf | 224 | for regex in group: |
225 | if regex.match(url): | |
226 | return None | |
9df62f90 | 227 | request = urllib2.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'}) |
394a7b69 | 228 | opener = urllib2.build_opener(SmartRedirectHandler()) |
ecbed328 JR |
229 | |
230 | # Send request and handle errors | |
993046cc | 231 | try: |
ecbed328 | 232 | response = opener.open(request, timeout=2) |
de8ab9cb | 233 | except urllib2.HTTPError as e: |
ecbed328 | 234 | return 'Request error: %s %s' % (e.code, e.reason) |
74dc2a9d | 235 | except urllib2.URLError as e: |
ecbed328 | 236 | return 'Request error: %s' % (e.reason) |
9df62f90 | 237 | except TimeoutError as e: |
ecbed328 | 238 | return 'Request error: request timed out' |
04d48353 | 239 | except Exception as e: |
ecbed328 JR |
240 | return 'Unknown error: %s %r' % (type(e).__name__, e.args) |
241 | ||
242 | # Try to add type and length headers to reply | |
243 | c_type = response.getheader('Content-Type', '').split(';', 1)[0] | |
244 | c_len = response.getheader('Content-Length') | |
245 | if c_type != '': | |
246 | output.append("[%s] " % (c_type)) | |
247 | else: | |
248 | output.append("[no type] ") | |
249 | if c_type != "text/html": # else length will be provided by HTML code below | |
250 | if c_len is not None: | |
251 | output.append("[%s] " % (_humanize_bytes(c_len))) | |
252 | else: | |
253 | output.append("[no length] ") | |
254 | ||
255 | # Try to add title if HTML | |
256 | if c_type == 'text/html': | |
257 | try: | |
258 | responsebody = response.read(1024*1024) | |
259 | print(type(responsebody)) | |
260 | except Exception as e: | |
261 | output.append('Error reading response body: %s %r' % (type(e).__name__, e.args)) | |
262 | else: | |
263 | if c_len is not None and len(responsebody) != int(c_len): | |
264 | output.append("[actual %s; Content-Length %s] " % (_humanize_bytes(len(responsebody)), _humanize_bytes(c_len))) | |
265 | else: | |
266 | output.append("[%s] " % (_humanize_bytes(len(responsebody)))) | |
267 | try: | |
268 | soup = BeautifulSoup(responsebody) | |
269 | if soup.title: | |
07fbfaa6 | 270 | output.append('Title: ' + unescape('%s' % (soup.find('title').string.strip()))) |
ecbed328 JR |
271 | else: |
272 | output.append('No title') | |
273 | except Exception as e: | |
274 | output.append('Title error: %s %r ' % (type(e).__name__, e.args)) | |
275 | ||
276 | return ''.join(output) | |
467acacf | 277 | |
278 | url_regex = ( | |
ecbed328 | 279 | re.compile(r'https?://(?:[^/\s.]+\.)+[^/\s.]+(?:/\S+)?'), |
467acacf | 280 | ) |
467acacf | 281 | other_regexes = ( |
467acacf | 282 | ) |
283 | regexes = other_regexes + ( | |
ecbed328 | 284 | (goturl, url_regex), |
467acacf | 285 | ) |