jfr.im git - z_archive/twitter.git/blame

Commit	Line	Data
8ad2cf0b	1	"""
	2	Internal utility functions.
	3
	4	`htmlentitydecode` came from here:
	5	http://wiki.python.org/moin/EscapingHtml
	6	"""
	7
a7282452	8	from __future__ import print_function
8ad2cf0b	9
907402f6	10	import contextlib
8ad2cf0b	11	import re
098660ce	12	import sys
737cfb61	13	import textwrap
a7282452	14	import time
e107d209	15	import socket
a7282452	16
2b533cdc MV	17	PY_3_OR_HIGHER = sys.version_info >= (3, 0)
2b533cdc MV	18
3930cc7b MV	19	try:
3930cc7b MV	20	from html.entities import name2codepoint
d9e92207	21	unichr = chr
62ec1b07	22	import urllib.request as urllib2
62ec1b07	23	import urllib.parse as urlparse
3930cc7b MV	24	except ImportError:
3930cc7b MV	25	from htmlentitydefs import name2codepoint
62ec1b07	26	import urllib2
62ec1b07	27	import urlparse
8ad2cf0b	28
	29	def htmlentitydecode(s):
	30	return re.sub(
a5e40197	31	'&(%s);' % '\|'.join(name2codepoint),
1bb6d474	32	lambda m: unichr(name2codepoint[m.group(1)]), s)
8ad2cf0b	33
a5e40197 MV	34	def smrt_input(globals_, locals_, ps1=">>> ", ps2="... "):
	35	inputs = []
	36	while True:
	37	if inputs:
	38	prompt = ps2
	39	else:
	40	prompt = ps1
7bfe7d97	41	inputs.append(input(prompt))
a5e40197 MV	42	try:
	43	ret = eval('\n'.join(inputs), globals_, locals_)
	44	if ret:
30e61103	45	print(str(ret))
a5e40197 MV	46	return
	47	except SyntaxError:
	48	pass
	49
098660ce MV	50	def printNicely(string):
	51	if hasattr(sys.stdout, 'buffer'):
	52	sys.stdout.buffer.write(string.encode('utf8'))
e6631027	53	print()
368f536e R	54	sys.stdout.buffer.flush()
368f536e R	55	sys.stdout.flush()
098660ce MV	56	else:
	57	print(string.encode('utf8'))
	58
2b533cdc MV	59	def actually_bytes(stringy):
	60	if PY_3_OR_HIGHER:
	61	if type(stringy) == bytes:
	62	pass
	63	elif type(stringy) != str:
	64	stringy = str(stringy)
	65	if type(stringy) == str:
	66	stringy = stringy.encode("utf-8")
	67	else:
	68	if type(stringy) == str:
	69	pass
	70	elif type(stringy) != unicode:
	71	stringy = str(stringy)
	72	if type(stringy) == unicode:
	73	stringy = stringy.encode("utf-8")
	74	return stringy
a7282452 S	75
	76	def err(msg=""):
	77	print(msg, file=sys.stderr)
	78
2b533cdc	79
a7282452 S	80	class Fail(object):
	81	"""A class to count fails during a repetitive task.
	82
	83	Args:
	84	maximum: An integer for the maximum of fails to allow.
	85	exit: An integer for the exit code when maximum of fail is reached.
	86
	87	Methods:
	88	count: Count a fail, exit when maximum of fails is reached.
	89	wait: Same as count but also sleep for a given time in seconds.
	90	"""
	91	def __init__(self, maximum=10, exit=1):
	92	self.i = maximum
	93	self.exit = exit
	94
	95	def count(self):
	96	self.i -= 1
	97	if self.i == 0:
	98	err("Too many consecutive fails, exiting.")
	99	raise SystemExit(self.exit)
	100
	101	def wait(self, delay=0):
	102	self.count()
	103	if delay > 0:
	104	time.sleep(delay)
907402f6	105
	106
	107	def find_links(line):
	108	"""Find all links in the given line. The function returns a sprintf style
	109	format string (with %s placeholders for the links) and a list of urls."""
62ec1b07	110	l = line.replace("%", "%%")
907402f6	111	regex = "(https?://[^ )]+)"
907402f6	112	return (
be5f32da	113	re.sub(regex, "%s", l),
907402f6	114	[m.group(1) for m in re.finditer(regex, l)])
be5f32da	115
907402f6	116	def follow_redirects(link, sites= None):
	117	"""Follow directs for the link as long as the redirects are on the given
	118	sites and return the resolved link."""
	119	def follow(url):
	120	return sites == None or urlparse.urlparse(url).hostname in sites
be5f32da	121
907402f6	122	class RedirectHandler(urllib2.HTTPRedirectHandler):
	123	def __init__(self):
	124	self.last_url = None
	125	def redirect_request(self, req, fp, code, msg, hdrs, newurl):
	126	self.last_url = newurl
	127	if not follow(newurl):
	128	return None
	129	r = urllib2.HTTPRedirectHandler.redirect_request(
	130	self, req, fp, code, msg, hdrs, newurl)
	131	r.get_method = lambda : 'HEAD'
	132	return r
be5f32da	133
907402f6	134	if not follow(link):
	135	return link
	136	redirect_handler = RedirectHandler()
	137	opener = urllib2.build_opener(redirect_handler)
	138	req = urllib2.Request(link)
	139	req.get_method = lambda : 'HEAD'
	140	try:
e107d209	141	with contextlib.closing(opener.open(req,timeout=1)) as site:
907402f6	142	return site.url
62f2a207	143	except:
907402f6	144	return redirect_handler.last_url if redirect_handler.last_url else link
	145
	146	def expand_line(line, sites):
	147	"""Expand the links in the line for the given sites."""
62f2a207 EB	148	try:
	149	l = line.strip()
	150	msg_format, links = find_links(l)
	151	args = tuple(follow_redirects(l, sites) for l in links)
	152	line = msg_format % args
	153	except Exception as e:
	154	try:
	155	err("expanding line %s failed due to %s" % (line, unicode(e)))
	156	except:
	157	pass
	158	return line
907402f6	159
	160	def parse_host_list(list_of_hosts):
	161	"""Parse the comma separated list of hosts."""
	162	p = set(
	163	m.group(1) for m in re.finditer("\s([^,\s]+)\s,?\s*", list_of_hosts))
	164	return p
be5f32da	165
737cfb61	166
48a4e395	167	def align_text(text, left_margin=17, max_width=160):
737cfb61 KLT	168	lines = []
	169	for line in text.split('\n'):
	170	temp_lines = textwrap.wrap(line, max_width - left_margin)
	171	temp_lines = [(' ' * left_margin + line) for line in temp_lines]
	172	lines.append('\n'.join(temp_lines))
	173	ret = '\n'.join(lines)
	174	return ret.lstrip()
2b533cdc MV	175
	176
	177	__all__ = ["htmlentitydecode", "smrt_input"]