-try:
+import sys
+PY_3_OR_HIGHER = sys.version_info >= (3, 0)
+
+if PY_3_OR_HIGHER:
import urllib.request as urllib_request
import urllib.error as urllib_error
- import io
-except ImportError:
+else:
import urllib2 as urllib_request
import urllib2 as urllib_error
import json
from ssl import SSLError
import socket
+import io
+import codecs
import sys, select, time
from .api import TwitterCall, wrap_response, TwitterHTTPError
-PY_27_OR_HIGHER = sys.version_info >= (2, 7)
-PY_3_OR_HIGHER = sys.version_info >= (3, 0)
+CRLF = b'\r\n'
Timeout = {'timeout': True}
Hangup = {'hangup': True}
class ChunkDecodeError(Exception):
pass
-def recv_chunk(sock): # -> bytearray:
- header = sock.recv(8) # Scan for an up to 16MiB chunk size (0xffffff).
- crlf = header.find(b'\r\n') # Find the HTTP chunk size.
+class EndOfStream(Exception):
+ pass
+
+range = range if PY_3_OR_HIGHER else xrange
+
+
+class HttpDeChunker(object):
+
+ def __init__(self):
+ self.buf = bytearray()
+
+ def extend(self, data):
+ self.buf.extend(data)
+
+ def read_chunks(self): # -> [bytearray]
+ chunks = []
+ buf = self.buf
+ while True:
+ header_end_pos = buf.find(CRLF)
+ if header_end_pos == -1:
+ break
+
+ header = buf[:header_end_pos]
+ data_start_pos = header_end_pos + 2
+ try:
+ chunk_len = int(header.decode('ascii'), 16)
+ except ValueError:
+ raise ChunkDecodeError()
- if not crlf:
- raise ChunkDecodeError()
+ if chunk_len == 0:
+ raise EndOfStream()
- size = int(header[:crlf], 16) # Decode the chunk size. Rarely exceeds 8KiB.
- chunk = bytearray(size)
- start = crlf + 2 # Add in the length of the header's CRLF pair.
+ data_end_pos = data_start_pos + chunk_len
+
+ if len(buf) > data_end_pos + 2:
+ chunks.append(buf[data_start_pos:data_end_pos])
+ buf = buf[data_end_pos + 2:]
+ else:
+ break
+ self.buf = buf
+ return chunks
- if size <= 3: # E.g. an HTTP chunk with just a keep-alive delimiter or end of stream (0).
- chunk[:size] = header[start:start + size]
- # There are several edge cases (size == [4-6]) as the chunk size exceeds the length
- # of the initial read of 8 bytes. With Twitter, these do not, in practice, occur. The
- # shortest JSON message starts with '{"limit":{'. Hence, it exceeds in size the edge cases
- # and eliminates the need to address them.
- else: # There is more to read in the chunk.
- end = len(header) - start
- chunk[:end] = header[start:]
- if PY_27_OR_HIGHER: # When possible, use less memory by reading directly into the buffer.
- buffer = memoryview(chunk)[end:] # Create a view into the bytearray to hold the rest of the chunk.
- sock.recv_into(buffer)
- else: # less efficient for python2.6 compatibility
- chunk[end:] = sock.recv(max(0, size - end))
- sock.recv(2) # Read the trailing CRLF pair. Throw it away.
- return chunk
+class JsonDeChunker(object):
+
+ def __init__(self):
+ self.buf = u""
+ self.raw_decode = json.JSONDecoder().raw_decode
+
+ def extend(self, data):
+ self.buf += data
+
+ def read_json_chunks(self):
+ chunks = []
+ buf = self.buf
+ while True:
+ try:
+ buf = buf.lstrip()
+ res, ptr = self.raw_decode(buf)
+ buf = buf[ptr:]
+ chunks.append(res)
+ except ValueError:
+ break
+ self.buf = buf
+ return chunks
class Timer(object):
def __iter__(self):
actually_block = self.block and not self.timeout
- sock_timeout = min(self.timeout, self.heartbeat_timeout) if actually_block else None
+ sock_timeout = min(self.timeout or 1000000, self.heartbeat_timeout)
sock = self.handle.fp.raw._sock if PY_3_OR_HIGHER else self.handle.fp._sock.fp._sock
sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
sock.setblocking(actually_block)
- buf = ''
- raw_decode = json.JSONDecoder().raw_decode
+ headers = self.handle.headers
+ dechunker = HttpDeChunker()
+ utf8decoder = codecs.getincrementaldecoder("utf-8")()
+ json_dechunker = JsonDeChunker()
timer = Timer(self.timeout)
heartbeat_timer = Timer(self.heartbeat_timeout)
while True:
- buf = buf.lstrip() # Remove any keep-alive delimiters
- try:
- res, ptr = raw_decode(buf)
- buf = buf[ptr:]
- except ValueError:
- if not self.block and not self.timeout:
- yield None
- else:
- yield wrap_response(res, self.handle.headers)
+ json_chunks = json_dechunker.read_json_chunks()
+ for json in json_chunks:
+ yield wrap_response(json, headers)
+ if json_chunks:
timer.reset()
heartbeat_timer.reset()
- continue
+ if not self.block and not self.timeout:
+ yield None
if heartbeat_timer.expired():
yield HeartbeatTimeout
break
yield Timeout
try:
- if not buf and sock_timeout:
- ready_to_read = select.select([sock], [], [], sock_timeout)[0]
- if not ready_to_read:
- continue
- buf += recv_chunk(sock).decode('utf-8')
- if not buf:
- yield Hangup
- break
- heartbeat_timer.reset()
+ ready_to_read = select.select([sock], [], [], sock_timeout)[0]
+ if not ready_to_read:
+ continue
+ data = sock.read()
except SSLError as e:
# Code 2 is error from a non-blocking read of an empty buffer.
if e.errno != 2:
raise
+ continue
+
+ dechunker.extend(data)
+
+ try:
+ chunks = dechunker.read_chunks()
+ except (ChunkDecodeError, EndOfStream):
+ yield Hangup
+ break
+
+ for chunk in chunks:
+ if chunk:
+ json_dechunker.extend(utf8decoder.decode(chunk))
+ if chunks:
+ heartbeat_timer.reset()
def handle_stream_response(req, uri, arg_data, block, timeout, heartbeat_timeout):
try: