]> jfr.im git - erebus.git/blob - modules/urls.py
urls - massive rework
[erebus.git] / modules / urls.py
1 # Erebus IRC bot - Author: Erebus Team
2 # vim: fileencoding=utf-8
3 # URL Checker
4 # This file is released into the public domain; see http://unlicense.org/
5
6 # module info
7 modinfo = {
8 'author': 'Erebus Team',
9 'license': 'public domain',
10 'compatible': [0],
11 'depends': [],
12 'softdeps': [],
13 }
14
15 # http://embed.ly/tools/generator
16
17 # preamble
18 import modlib
19 lib = modlib.modlib(__name__)
20 modstart = lib.modstart
21 modstop = lib.modstop
22
23 # module code
24 import sys
25 if sys.version_info.major < 3:
26 import urllib2
27 import urlparse
28 import HTMLParser
29 from BeautifulSoup import BeautifulSoup
30 else:
31 import urllib.request as urllib2
32 import urllib.parse as urlparse
33 import html.parser as HTMLParser
34 from bs4 import BeautifulSoup
35
36 import re, json, datetime
37
38 html_parser = HTMLParser.HTMLParser()
39
40 hostmask_regex = re.compile(r'^(.*)!(.*)@(.*)$')
41
42 def parser_hostmask(hostmask):
43 if isinstance(hostmask, dict):
44 return hostmask
45
46 nick = None
47 user = None
48 host = None
49
50 if hostmask is not None:
51 match = hostmask_regex.match(hostmask)
52
53 if not match:
54 nick = hostmask
55 else:
56 nick = match.group(1)
57 user = match.group(2)
58 host = match.group(3)
59
60 return {
61 'nick': nick,
62 'user': user,
63 'host': host
64 }
65
66 class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
67 def http_error_301(self, req, fp, code, msg, headers):
68 result = urllib2.HTTPRedirectHandler.http_error_301(
69 self, req, fp, code, msg, headers)
70 result.status = code
71 return result
72
73 def http_error_302(self, req, fp, code, msg, headers):
74 result = urllib2.HTTPRedirectHandler.http_error_302(
75 self, req, fp, code, msg, headers)
76 result.status = code
77 return result
78
79 def process_line(line):
80 responses = []
81 num_found = 0
82 limit = lib.parent.cfg.getint('urls', 'limit', 2)
83 for action, group, prefix in regexes:
84 for regex in group:
85 for match in regex.findall(line):
86 if match:
87 num_found += 1
88 if num_found > limit:
89 return responses
90 resp = action(match)
91 if resp is not None:
92 responses.append("%s: %s" % (prefix, action(match)))
93 return responses
94
95 @lib.hooknum("PRIVMSG")
96 def privmsg_hook(bot, textline):
97 user = parser_hostmask(textline[1:textline.find(' ')])
98 chan = textline.split()[2]
99
100 try:
101 line = textline.split(None, 3)[3][1:]
102 except IndexError:
103 line = ''
104
105 responses = process_line(line)
106 if len(responses) > 0:
107 if lib.parent.cfg.getboolean('urls', 'multiline'):
108 for r in responses:
109 bot.msg(chan, r, True)
110 else:
111 bot.msg(chan, ' | '.join(responses), True)
112
113 def unescape(line):
114 return re.sub('\s+', ' ', html_parser.unescape(line))
115
116 def gotspotify(type, track):
117 url = 'http://ws.spotify.com/lookup/1/?uri=spotify:%s:%s' % (type, track)
118 xml = urllib2.urlopen(url).read()
119 soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.HTML_ENTITIES)
120 lookup_type = soup.contents[2].name
121
122 if lookup_type == 'track':
123 name = soup.find('name').string
124 album_name = soup.find('album').find('name').string
125 artist_name = soup.find('artist').find('name').string
126 popularity = soup.find('popularity')
127 if popularity:
128 popularity = float(popularity.string)*100
129 length = float(soup.find('length').string)
130 minutes = int(length)/60
131 seconds = int(length)%60
132
133 return unescape('Track: %s - %s / %s %s:%.2d %2d%%' % (artist_name, name, album_name, minutes, seconds, popularity))
134
135 elif lookup_type == 'album':
136 album_name = soup.find('album').find('name').string
137 artist_name = soup.find('artist').find('name').string
138 released = soup.find('released').string
139 return unescape('Album: %s - %s - %s' % (artist_name, album_name, released))
140
141 else:
142 return 'Unsupported type.'
143
144 def _yt_duration(s):
145 mo = re.match(r'P(\d+D)?T(\d+H)?(\d+M)?(\d+S)?', s)
146 pcs = [x for x in mo.groups() if x]
147 return ''.join(pcs).lower()
148 def _yt_date(s, f):
149 mo = re.match(r'(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})\.(\d+)Z', s)
150 return datetime.datetime(*(int(x) for x in mo.groups())).strftime(f)
151 def _yt_round(n):
152 n = float(n)
153 if n >= 10**12:
154 return '%.1ft' % (n/10**12)
155 elif n >= 10**9:
156 return '%.1fb' % (n/10**9)
157 elif n >= 10**6:
158 return '%.1fm' % (n/10**6)
159 elif n >= 10**3:
160 return '%.1fk' % (n/10**3)
161 else:
162 return int(n)
163
164 def gotyoutube(url):
165 url_data = urlparse.urlparse(url)
166 query = urlparse.parse_qs(url_data.query)
167 video = query["v"][0]
168 api_url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet,contentDetails,statistics&id=%s&key=%s' % (video, lib.parent.cfg.get('urls', 'api_key'))
169 try:
170 respdata = urllib2.urlopen(api_url).read()
171 v = json.loads(respdata)
172 v = v['items'][0]
173
174 return unescape(lib.parent.cfg.get('urls', 'yt_format', "\002%(author)s\002: \037%(title)s\037 [%(duration)s, uploaded %(uploaded)s, %(views)s v/%(likes)s l/%(dislikes)s d]") % {
175 'title': v['snippet']['title'],
176 'author': v['snippet']['channelTitle'],
177 'duration': _yt_duration(v['contentDetails']['duration']),
178 'uploaded': _yt_date(v['snippet']['publishedAt'], lib.parent.cfg.get('urls', 'yt_date_format', '%b %d %Y')),
179 'views': _yt_round(v['statistics']['viewCount']),
180 'likes': _yt_round(v['statistics']['likeCount']),
181 'dislikes': _yt_round(v['statistics']['dislikeCount']),
182 })
183 except urllib2.HTTPError as e:
184 if e.getcode() == 403:
185 return 'API limit exceeded'
186 else:
187 return str(e)
188 except IndexError:
189 return 'no results'
190 except Exception as e:
191 return str(e)
192
193 def gottwitch(uri):
194 url = 'https://api.twitch.tv/helix/streams?user_login=%s' % uri.split('/')[0]
195 opener = urllib2.build_opener()
196 opener.addheaders = [('Client-ID', lib.parent.cfg.get('urls', 'twitch_api_key'))]
197 respdata = opener.open(url).read()
198 twitch = json.loads(respdata)['data']
199 try:
200 # TODO: add current game.
201 return unescape('\037%s\037 is %s (%s)' % (twitch[0]['user_name'], twitch[0]['type'], twitch[0]['title']))
202 except:
203 return 'Channel offline.'
204
205 def goturl(url):
206 for _, group, _ in other_regexes:
207 for regex in group:
208 if regex.match(url):
209 return None
210 request = urllib2.Request(url)
211 opener = urllib2.build_opener(SmartRedirectHandler())
212 try:
213 soup = BeautifulSoup(opener.open(request, timeout=0.5))
214 return unescape('%s' % (soup.title.string))
215 except urllib2.HTTPError as e:
216 return 'Error: %s %s' % (e.code, e.reason)
217 except Exception as e:
218 return 'Error: %r' % (e.message)
219
220 url_regex = (
221 re.compile(r'https?://[^/\s]+\.[^/\s]+(?:/\S+)?'),
222 )
223 spotify_regex = (
224 re.compile(r'spotify:(?P<type>\w+):(?P<track_id>\w{22})'),
225 re.compile(r'https?://open.spotify.com/(?P<type>\w+)/(?P<track_id>\w+)')
226 )
227 youtube_regex = (
228 re.compile(r'https?://(?:www\.)?youtube\.com/watch\?[a-zA-Z0-9=&_\-]+'),
229 )
230 twitch_regex = (
231 re.compile(r'https?:\/\/(?:www\.)?twitch.tv\/([A-Za-z0-9]*)'),
232 )
233 other_regexes = (
234 (gotspotify, spotify_regex, 'Spotify'),
235 (gotyoutube, youtube_regex, 'YouTube'),
236 (gottwitch, twitch_regex, 'Twitch'),
237 )
238 regexes = other_regexes + (
239 (goturl, url_regex, 'Title'),
240 )