import contextlib
import re
import sys
+import textwrap
import time
-import urllib2
-import urlparse
try:
from html.entities import name2codepoint
unichr = chr
+ import urllib.request as urllib2
+ import urllib.parse as urlparse
except ImportError:
from htmlentitydefs import name2codepoint
+ import urllib2
+ import urlparse
def htmlentitydecode(s):
return re.sub(
def find_links(line):
"""Find all links in the given line. The function returns a sprintf style
format string (with %s placeholders for the links) and a list of urls."""
- l = line.replace(u"%", u"%%")
+ l = line.replace("%", "%%")
regex = "(https?://[^ )]+)"
return (
- re.sub(regex, "%s", l),
+ re.sub(regex, "%s", l),
[m.group(1) for m in re.finditer(regex, l)])
-
+
def follow_redirects(link, sites= None):
"""Follow directs for the link as long as the redirects are on the given
sites and return the resolved link."""
def follow(url):
return sites == None or urlparse.urlparse(url).hostname in sites
-
+
class RedirectHandler(urllib2.HTTPRedirectHandler):
def __init__(self):
self.last_url = None
self, req, fp, code, msg, hdrs, newurl)
r.get_method = lambda : 'HEAD'
return r
-
+
if not follow(link):
return link
redirect_handler = RedirectHandler()
p = set(
m.group(1) for m in re.finditer("\s*([^,\s]+)\s*,?\s*", list_of_hosts))
return p
-
+
+
+def align_text(text, left_margin=16, max_width=80):
+ lines = []
+ for line in text.split('\n'):
+ temp_lines = textwrap.wrap(line, max_width - left_margin)
+ temp_lines = [(' ' * left_margin + line) for line in temp_lines]
+ lines.append('\n'.join(temp_lines))
+ ret = '\n'.join(lines)
+ return ret.lstrip()