Merge pull request #216 from edi-bice/master

[z_archive/twitter.git] / twitter / util.py
diff --git a/twitter/util.py b/twitter/util.py

index aa7a837dd2e04abf5fca12dddc12b2bf66bdacbb..48c131f0dfe2a55c2976947873512c89398c6553 100644 (file)
--- a/twitter/util.py
+++ b/twitter/util.py
@@ -11,6 +11,7 @@ import contextlib
  import re
  import sys
  import time
+import socket
  
  try:
      from html.entities import name2codepoint
@@ -88,15 +89,15 @@ def find_links(line):
      l = line.replace("%", "%%")
      regex = "(https?://[^ )]+)"
      return (
-        re.sub(regex, "%s", l), 
+        re.sub(regex, "%s", l),
          [m.group(1) for m in re.finditer(regex, l)])
-    
+
  def follow_redirects(link, sites= None):
      """Follow directs for the link as long as the redirects are on the given
      sites and return the resolved link."""
      def follow(url):
          return sites == None or urlparse.urlparse(url).hostname in sites
-                
+
      class RedirectHandler(urllib2.HTTPRedirectHandler):
          def __init__(self):
              self.last_url = None
@@ -108,7 +109,7 @@ def follow_redirects(link, sites= None):
                  self, req, fp, code, msg, hdrs, newurl)
              r.get_method = lambda : 'HEAD'
              return r
-            
+
      if not follow(link):
          return link
      redirect_handler = RedirectHandler()
@@ -116,9 +117,9 @@ def follow_redirects(link, sites= None):
      req = urllib2.Request(link)
      req.get_method = lambda : 'HEAD'
      try:
-        with contextlib.closing(opener.open(req)) as site:
+        with contextlib.closing(opener.open(req,timeout=1)) as site:
              return site.url
-    except (urllib2.HTTPError, urllib2.URLError):
+    except (urllib2.HTTPError, urllib2.URLError, socket.timeout):
          return redirect_handler.last_url if redirect_handler.last_url else link
  
  def expand_line(line, sites):
@@ -133,4 +134,4 @@ def parse_host_list(list_of_hosts):
      p = set(
          m.group(1) for m in re.finditer("\s*([^,\s]+)\s*,?\s*", list_of_hosts))
      return p
-    
+