]>
Commit | Line | Data |
---|---|---|
1 | """ | |
2 | Internal utility functions. | |
3 | ||
4 | `htmlentitydecode` came from here: | |
5 | http://wiki.python.org/moin/EscapingHtml | |
6 | """ | |
7 | ||
8 | from __future__ import print_function | |
9 | ||
10 | import contextlib | |
11 | import re | |
12 | import sys | |
13 | import textwrap | |
14 | import time | |
15 | import socket | |
16 | ||
17 | PY_3_OR_HIGHER = sys.version_info >= (3, 0) | |
18 | ||
19 | try: | |
20 | from html.entities import name2codepoint | |
21 | unichr = chr | |
22 | import urllib.request as urllib2 | |
23 | import urllib.parse as urlparse | |
24 | except ImportError: | |
25 | from htmlentitydefs import name2codepoint | |
26 | import urllib2 | |
27 | import urlparse | |
28 | ||
29 | def htmlentitydecode(s): | |
30 | return re.sub( | |
31 | '&(%s);' % '|'.join(name2codepoint), | |
32 | lambda m: unichr(name2codepoint[m.group(1)]), s) | |
33 | ||
34 | def smrt_input(globals_, locals_, ps1=">>> ", ps2="... "): | |
35 | inputs = [] | |
36 | while True: | |
37 | if inputs: | |
38 | prompt = ps2 | |
39 | else: | |
40 | prompt = ps1 | |
41 | inputs.append(input(prompt)) | |
42 | try: | |
43 | ret = eval('\n'.join(inputs), globals_, locals_) | |
44 | if ret: | |
45 | print(str(ret)) | |
46 | return | |
47 | except SyntaxError: | |
48 | pass | |
49 | ||
50 | def printNicely(string): | |
51 | if hasattr(sys.stdout, 'buffer'): | |
52 | sys.stdout.buffer.write(string.encode('utf8')) | |
53 | print() | |
54 | sys.stdout.buffer.flush() | |
55 | sys.stdout.flush() | |
56 | else: | |
57 | print(string.encode('utf8')) | |
58 | ||
59 | def actually_bytes(stringy): | |
60 | if PY_3_OR_HIGHER: | |
61 | if type(stringy) == bytes: | |
62 | pass | |
63 | elif type(stringy) != str: | |
64 | stringy = str(stringy) | |
65 | if type(stringy) == str: | |
66 | stringy = stringy.encode("utf-8") | |
67 | else: | |
68 | if type(stringy) == str: | |
69 | pass | |
70 | elif type(stringy) != unicode: | |
71 | stringy = str(stringy) | |
72 | if type(stringy) == unicode: | |
73 | stringy = stringy.encode("utf-8") | |
74 | return stringy | |
75 | ||
76 | def err(msg=""): | |
77 | print(msg, file=sys.stderr) | |
78 | ||
79 | ||
80 | class Fail(object): | |
81 | """A class to count fails during a repetitive task. | |
82 | ||
83 | Args: | |
84 | maximum: An integer for the maximum of fails to allow. | |
85 | exit: An integer for the exit code when maximum of fail is reached. | |
86 | ||
87 | Methods: | |
88 | count: Count a fail, exit when maximum of fails is reached. | |
89 | wait: Same as count but also sleep for a given time in seconds. | |
90 | """ | |
91 | def __init__(self, maximum=10, exit=1): | |
92 | self.i = maximum | |
93 | self.exit = exit | |
94 | ||
95 | def count(self): | |
96 | self.i -= 1 | |
97 | if self.i == 0: | |
98 | err("Too many consecutive fails, exiting.") | |
99 | raise SystemExit(self.exit) | |
100 | ||
101 | def wait(self, delay=0): | |
102 | self.count() | |
103 | if delay > 0: | |
104 | time.sleep(delay) | |
105 | ||
106 | ||
107 | def find_links(line): | |
108 | """Find all links in the given line. The function returns a sprintf style | |
109 | format string (with %s placeholders for the links) and a list of urls.""" | |
110 | l = line.replace("%", "%%") | |
111 | regex = "(https?://[^ )]+)" | |
112 | return ( | |
113 | re.sub(regex, "%s", l), | |
114 | [m.group(1) for m in re.finditer(regex, l)]) | |
115 | ||
116 | def follow_redirects(link, sites= None): | |
117 | """Follow directs for the link as long as the redirects are on the given | |
118 | sites and return the resolved link.""" | |
119 | def follow(url): | |
120 | return sites == None or urlparse.urlparse(url).hostname in sites | |
121 | ||
122 | class RedirectHandler(urllib2.HTTPRedirectHandler): | |
123 | def __init__(self): | |
124 | self.last_url = None | |
125 | def redirect_request(self, req, fp, code, msg, hdrs, newurl): | |
126 | self.last_url = newurl | |
127 | if not follow(newurl): | |
128 | return None | |
129 | r = urllib2.HTTPRedirectHandler.redirect_request( | |
130 | self, req, fp, code, msg, hdrs, newurl) | |
131 | r.get_method = lambda : 'HEAD' | |
132 | return r | |
133 | ||
134 | if not follow(link): | |
135 | return link | |
136 | redirect_handler = RedirectHandler() | |
137 | opener = urllib2.build_opener(redirect_handler) | |
138 | req = urllib2.Request(link) | |
139 | req.get_method = lambda : 'HEAD' | |
140 | try: | |
141 | with contextlib.closing(opener.open(req,timeout=1)) as site: | |
142 | return site.url | |
143 | except: | |
144 | return redirect_handler.last_url if redirect_handler.last_url else link | |
145 | ||
146 | def expand_line(line, sites): | |
147 | """Expand the links in the line for the given sites.""" | |
148 | try: | |
149 | l = line.strip() | |
150 | msg_format, links = find_links(l) | |
151 | args = tuple(follow_redirects(l, sites) for l in links) | |
152 | line = msg_format % args | |
153 | except Exception as e: | |
154 | try: | |
155 | err("expanding line %s failed due to %s" % (line, unicode(e))) | |
156 | except: | |
157 | pass | |
158 | return line | |
159 | ||
160 | def parse_host_list(list_of_hosts): | |
161 | """Parse the comma separated list of hosts.""" | |
162 | p = set( | |
163 | m.group(1) for m in re.finditer("\s*([^,\s]+)\s*,?\s*", list_of_hosts)) | |
164 | return p | |
165 | ||
166 | ||
167 | def align_text(text, left_margin=17, max_width=160): | |
168 | lines = [] | |
169 | for line in text.split('\n'): | |
170 | temp_lines = textwrap.wrap(line, max_width - left_margin) | |
171 | temp_lines = [(' ' * left_margin + line) for line in temp_lines] | |
172 | lines.append('\n'.join(temp_lines)) | |
173 | ret = '\n'.join(lines) | |
174 | return ret.lstrip() | |
175 | ||
176 | ||
177 | __all__ = ["htmlentitydecode", "smrt_input"] |