]> jfr.im git - z_archive/twitter.git/commitdiff
Fix html unescaping. Bump v to 0.3.
authormverdone <redacted>
Tue, 13 May 2008 16:24:57 +0000 (16:24 +0000)
committermverdone <redacted>
Tue, 13 May 2008 16:24:57 +0000 (16:24 +0000)
git-svn-id: http://svn.mike.verdone.ca/pyprojects/twitter/trunk@169 d723f978-dc38-0410-87ed-da353333cdcc

setup.py
twitter/ircbot.py
twitter/util.py [new file with mode: 0644]

index 7977039adce44aec048f39d671b936fcff5169d9..39a7c118a70e90d1359ac7afabd992a41fffb1ee 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 import sys, os
 
-version = '0.2.1'
+version = '0.3'
 
 setup(name='twitter',
       version=version,
index c5363eadb5ffd7a6fca142f8ce377589bd8f8e9f..90d01c516d4009a8d0d3a69d5c1d2a54ab5f539b 100644 (file)
@@ -28,8 +28,6 @@ password: <twitter_account_password>
 """
 
 # TODO add delimiter if first word isn't "is" or "was"
-# TODO handle newlines
-# TODO handle quotes
 
 BOT_VERSION = "TwitterBot 0.2.1 (mike.verdone.ca/twitter)"
 
@@ -46,6 +44,7 @@ from heapq import heappop, heappush
 import traceback
 
 from api import Twitter, TwitterError
+from util import htmlentitydecode
 
 try:
     import irclib
@@ -127,11 +126,9 @@ class TwitterBot(object):
         for update in updates:
             crt = parse(update['created_at']).utctimetuple()
             if (crt > self.lastUpdate):
-                text = (
-                    update['text']
-                    .replace('\n', ' ')
-                    .replace("&quot;", "\"")
-                    .replace('&amp;', '&'))
+                text = (htmlentitydecode(
+                    update['text'].replace('\n', ' '))
+                    .encode('utf-8', 'replace'))
                 self.privmsg_channel(
                     "=^_^= %s%s%s %s" %(
                         IRC_BOLD, update['user']['screen_name'],
diff --git a/twitter/util.py b/twitter/util.py
new file mode 100644 (file)
index 0000000..70ff7a5
--- /dev/null
@@ -0,0 +1,17 @@
+"""
+Internal utility functions.
+
+`htmlentitydecode` came from here:
+    http://wiki.python.org/moin/EscapingHtml
+"""
+
+
+import re
+from htmlentitydefs import name2codepoint
+
+def htmlentitydecode(s):
+    return re.sub(
+        '&(%s);' % '|'.join(name2codepoint), 
+        lambda m: unichr(name2codepoint[m.group(1)]), s)
+
+__all__ = ["htmlentitydecode"]