]>
jfr.im git - irc/rizon/acid.git/blob - pyva/src/main/python/internets/api/feed.py
6 from BaseHTTPServer
import BaseHTTPRequestHandler
7 from BeautifulSoup
import BeautifulSoup
8 from decimal
import Decimal
9 from StringIO
import StringIO
10 from urlparse
import urlparse
11 from xml
.dom
.minidom
import Element
, Document
12 from xml
.dom
.minidom
import parse
14 class InputError(Exception):
15 def __init__(self
, msg
):
21 class FeedError(Exception):
22 def __init__(self
, e
):
23 if hasattr(e
, 'code'):
27 self
.msg
= 'not found.'
29 self
.msg
= 'this resource is unavailable.'
31 self
.msg
= 'the server has encountered an unexpected error.'
33 self
.msg
= 'invalid response from the server. Try again later.'
35 self
.msg
= 'this resource is temporarily unavailable. Try again later.'
37 self
.msg
= 'this resource is not supported.'
39 self
.msg
= 'something went wrong while connecting (%s)' % BaseHTTPRequestHandler
.responses
[e
.code
][0]
43 elif hasattr(e
, 'reason'):
47 self
.msg
= 'connection timed out. Try again later.'
53 elif hasattr(e
, 'message'):
55 self
.msg
= 'invalid response from the server. Try again later.'
67 def __init__(self
, value
, fake_ua
=False):
69 raise InputError('Invalid feed input.')
71 if isinstance(value
, str) or isinstance(value
, unicode):
73 opener
= urllib2
.build_opener()
75 opener
.addheaders
= [('User-Agent', 'User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1')]
77 opener
.addheaders
= [('User-Agent', 'Rizon Internets bot - www.rizon.net')]
79 feed
= opener
.open(value
.replace(' ', '%20'), timeout
=20)
80 self
._html
= feed
.read()
82 except urllib2
.URLError
, e
:
84 except httplib
.BadStatusLine
, e
:
87 raise InputError('Invalid feed input type.')
93 return BeautifulSoup(self
._html
, convertEntities
=BeautifulSoup
.HTML_ENTITIES
)
97 raise InputError('Invalid feed input.')
99 if isinstance(value
, basestring
):
100 feed
= HtmlFeed(value
)
101 return json
.load(StringIO(feed
.html()))
103 raise InputError('Invalid feed input type.')
106 def __init__(self
, value
, namespaces
= None):
108 raise InputError('Invalid feed input.')
110 self
.namespaces
= namespaces
112 if isinstance(value
, basestring
):
113 feed
= HtmlFeed(value
)
114 self
._element
= parse(StringIO(feed
.html()))
115 elif isinstance(value
, Element
) or isinstance(value
, Document
):
116 self
._element
= value
118 raise InputError('Invalid feed input type.')
120 error
= xpath
.findvalue('/error/message', self
._element
)
123 raise FeedError(error
)
125 def elements(self
, query
):
126 return [XmlFeed(x
, self
.namespaces
) for x
in xpath
.find(query
, self
._element
, namespaces
=self
.namespaces
)]
128 def text(self
, query
, default
=None):
129 result
= xpath
.findvalue(query
, self
._element
, namespaces
=self
.namespaces
)
134 value
= result
.strip()
136 if isinstance(value
, unicode):
138 value
= value
.encode('latin-1').decode('utf-8')
144 def int(self
, query
, default
= None):
145 result
= self
.text(query
, None)
155 def decimal(self
, query
, default
= None):
156 result
= self
.text(query
, None)
162 return Decimal(result
)
166 def bool(self
, query
, default
= None):
167 result
= self
.text(query
, None)
172 if 'true' in result
.lower() or result
== '1':
174 elif 'false' in result
.lower() or result
== '0':
178 return int(result
) > 0
182 def attribute(self
, query
, attr
, default
= None, checkEveryOccurrence
= False):
183 elements
= xpath
.find(query
, self
._element
)
185 if len(elements
) > 0 and not checkEveryOccurrence
:
186 if elements
[0].hasAttribute(attr
):
187 return elements
[0].getAttribute(attr
)
192 if e
.hasAttribute(attr
):
193 return e
.getAttribute(attr
)