]> jfr.im git - irc/rizon/acid.git/blob - pyva/src/main/python/internets/api/feed.py
Import acidictive 4 and pyva plugin
[irc/rizon/acid.git] / pyva / src / main / python / internets / api / feed.py
1 import httplib
2 import json
3 import socket
4 import urllib2
5 import xpath
6 from BaseHTTPServer import BaseHTTPRequestHandler
7 from BeautifulSoup import BeautifulSoup
8 from decimal import Decimal
9 from StringIO import StringIO
10 from urlparse import urlparse
11 from xml.dom.minidom import Element, Document
12 from xml.dom.minidom import parse
13
14 class InputError(Exception):
15 def __init__(self, msg):
16 self.msg = msg
17
18 def __str__(self):
19 return str(self.msg)
20
21 class FeedError(Exception):
22 def __init__(self, e):
23 if hasattr(e, 'code'):
24 c = e.code
25
26 if c == 404:
27 self.msg = 'not found.'
28 elif c == 406:
29 self.msg = 'this resource is unavailable.'
30 elif c == 500:
31 self.msg = 'the server has encountered an unexpected error.'
32 elif c == 502:
33 self.msg = 'invalid response from the server. Try again later.'
34 elif c == 503:
35 self.msg = 'this resource is temporarily unavailable. Try again later.'
36 elif c == 512:
37 self.msg = 'this resource is not supported.'
38 else:
39 self.msg = 'something went wrong while connecting (%s)' % BaseHTTPRequestHandler.responses[e.code][0]
40
41 self.code = c
42 self.url = e.url
43 elif hasattr(e, 'reason'):
44 r = str(e.reason)
45
46 if r == 'timed out':
47 self.msg = 'connection timed out. Try again later.'
48 else:
49 self.msg = r
50
51 self.code = None
52 self.url = None
53 elif hasattr(e, 'message'):
54 if e.message == '':
55 self.msg = 'invalid response from the server. Try again later.'
56 self.code = None
57 self.url = None
58 else:
59 pass
60 else:
61 pass #???
62
63 def __str__(self):
64 return self.msg
65
66 class HtmlFeed:
67 def __init__(self, value, fake_ua=False):
68 if value == None:
69 raise InputError('Invalid feed input.')
70
71 if isinstance(value, str) or isinstance(value, unicode):
72 try:
73 opener = urllib2.build_opener()
74 if fake_ua:
75 opener.addheaders = [('User-Agent', 'User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1')]
76 else:
77 opener.addheaders = [('User-Agent', 'Rizon Internets bot - www.rizon.net')]
78
79 feed = opener.open(value.replace(' ', '%20'), timeout=20)
80 self._html = feed.read()
81 feed.close()
82 except urllib2.URLError, e:
83 raise FeedError(e)
84 except httplib.BadStatusLine, e:
85 raise FeedError(e)
86 else:
87 raise InputError('Invalid feed input type.')
88
89 def html(self):
90 return self._html
91
92 def get_soup(self):
93 return BeautifulSoup(self._html, convertEntities=BeautifulSoup.HTML_ENTITIES)
94
95 def get_json(value):
96 if value == None:
97 raise InputError('Invalid feed input.')
98
99 if isinstance(value, basestring):
100 feed = HtmlFeed(value)
101 return json.load(StringIO(feed.html()))
102 else:
103 raise InputError('Invalid feed input type.')
104
105 class XmlFeed:
106 def __init__(self, value, namespaces = None):
107 if value == None:
108 raise InputError('Invalid feed input.')
109
110 self.namespaces = namespaces
111
112 if isinstance(value, basestring):
113 feed = HtmlFeed(value)
114 self._element = parse(StringIO(feed.html()))
115 elif isinstance(value, Element) or isinstance(value, Document):
116 self._element = value
117 else:
118 raise InputError('Invalid feed input type.')
119
120 error = xpath.findvalue('/error/message', self._element)
121
122 if error != None:
123 raise FeedError(error)
124
125 def elements(self, query):
126 return [XmlFeed(x, self.namespaces) for x in xpath.find(query, self._element, namespaces=self.namespaces)]
127
128 def text(self, query, default=None):
129 result = xpath.findvalue(query, self._element, namespaces=self.namespaces)
130
131 if not result:
132 value = default
133 else:
134 value = result.strip()
135
136 if isinstance(value, unicode):
137 try:
138 value = value.encode('latin-1').decode('utf-8')
139 except:
140 pass
141
142 return value
143
144 def int(self, query, default = None):
145 result = self.text(query, None)
146
147 if result == None:
148 return default
149
150 try:
151 return int(result)
152 except:
153 return default
154
155 def decimal(self, query, default = None):
156 result = self.text(query, None)
157
158 if result == None:
159 return default
160
161 try:
162 return Decimal(result)
163 except:
164 return default
165
166 def bool(self, query, default = None):
167 result = self.text(query, None)
168
169 if result == None:
170 return default
171
172 if 'true' in result.lower() or result == '1':
173 return True
174 elif 'false' in result.lower() or result == '0':
175 return False
176 else:
177 try:
178 return int(result) > 0
179 except:
180 return default
181
182 def attribute(self, query, attr, default = None, checkEveryOccurrence = False):
183 elements = xpath.find(query, self._element)
184
185 if len(elements) > 0 and not checkEveryOccurrence:
186 if elements[0].hasAttribute(attr):
187 return elements[0].getAttribute(attr)
188 else:
189 return None
190 else:
191 for e in elements:
192 if e.hasAttribute(attr):
193 return e.getAttribute(attr)
194 return None