]> jfr.im git - z_archive/twitter.git/blob - twitter/stream.py
Further refine socket management.
[z_archive/twitter.git] / twitter / stream.py
1 try:
2 import urllib.request as urllib_request
3 import urllib.error as urllib_error
4 import io
5 except ImportError:
6 import urllib2 as urllib_request
7 import urllib2 as urllib_error
8 import json
9 from ssl import SSLError
10 import socket
11 import sys, select, time
12
13 from .api import TwitterCall, wrap_response, TwitterHTTPError
14
15 def recv_chunk(sock): # -> bytearray:
16
17 timeout = sock.gettimeout(); sock.setblocking(True) # Read the whole HTTP chunk.
18 buf = sock.recv(10) # Scan for an up to a 4GiB chunk size (0xffffffff).
19 if buf:
20 crlf = buf.find(b'\r\n') # Find the HTTP chunk size.
21 if crlf > 0:
22 remaining = int(buf[:crlf], 16) # Decode the chunk size.
23 chunk = bytearray(remaining) # Create the chunk buffer.
24
25 start = crlf + 2 # Add in the length of the header's CRLF pair.
26 end = len(buf) - start
27
28 chunk[:end] = buf[start:]
29 chunk[end:] = sock.recv(remaining - end)
30
31 sock.recv(2) # Read the trailing CRLF pair. Throw it away.
32 sock.settimeout(timeout)
33
34 return chunk
35
36 sock.settimeout(timeout)
37 return bytearray()
38
39 ## recv_chunk()
40
41
42 class TwitterJSONIter(object):
43
44 def __init__(self, handle, uri, arg_data, block=True, timeout=None):
45 self.handle = handle
46 self.uri = uri
47 self.arg_data = arg_data
48 self.block = block
49 self.timeout = timeout
50
51
52 def __iter__(self):
53 sock = self.handle.fp.raw._sock if sys.version_info >= (3, 0) else self.handle.fp._sock.fp._sock
54 sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
55 sock.setblocking(self.block and not self.timeout) # not (not self.block or self.timeout)
56 buf = u''
57 json_decoder = json.JSONDecoder()
58 timer = time.time()
59 while True:
60 try:
61 buf = buf.lstrip()
62 res, ptr = json_decoder.raw_decode(buf)
63 buf = buf[ptr:]
64 yield wrap_response(res, self.handle.headers)
65 timer = time.time()
66 continue
67 except ValueError as e:
68 if self.block:
69 pass
70 else:
71 yield None
72 try:
73 if self.timeout: # this is a non-blocking read (ie, it will return if any data is available)
74
75 ready_to_read = select.select([sock], [], [], self.timeout)
76 if ready_to_read[0]:
77 buf += recv_chunk(sock).decode('utf-8')
78 if time.time() - timer > self.timeout:
79 yield {'timeout': True}
80 else:
81 yield {'timeout': True}
82 else:
83 buf += recv_chunk(sock).decode('utf-8')
84 if not buf and self.block:
85 yield {'hangup': True}
86 except SSLError as e:
87 if (not self.block or self.timeout) and (e.errno == 2):
88 # Apparently this means there was nothing in the socket buf
89 pass
90 else:
91 raise
92 except urllib_error.HTTPError as e:
93 raise TwitterHTTPError(e, self.uri, 'json', self.arg_data)
94
95 def handle_stream_response(req, uri, arg_data, block, timeout=None):
96 handle = urllib_request.urlopen(req,)
97 return iter(TwitterJSONIter(handle, uri, arg_data, block, timeout=timeout))
98
99 class TwitterStreamCallWithTimeout(TwitterCall):
100 def _handle_response(self, req, uri, arg_data, _timeout=None):
101 return handle_stream_response(req, uri, arg_data, block=True, timeout=self.timeout)
102
103 class TwitterStreamCall(TwitterCall):
104 def _handle_response(self, req, uri, arg_data, _timeout=None):
105 return handle_stream_response(req, uri, arg_data, block=True)
106
107 class TwitterStreamCallNonBlocking(TwitterCall):
108 def _handle_response(self, req, uri, arg_data, _timeout=None):
109 return handle_stream_response(req, uri, arg_data, block=False)
110
111 class TwitterStream(TwitterStreamCall):
112 """
113 The TwitterStream object is an interface to the Twitter Stream API
114 (stream.twitter.com). This can be used pretty much the same as the
115 Twitter class except the result of calling a method will be an
116 iterator that yields objects decoded from the stream. For
117 example::
118
119 twitter_stream = TwitterStream(auth=OAuth(...))
120 iterator = twitter_stream.statuses.sample()
121
122 for tweet in iterator:
123 ...do something with this tweet...
124
125 The iterator will yield tweets forever and ever (until the stream
126 breaks at which point it raises a TwitterHTTPError.)
127
128 The `block` parameter controls if the stream is blocking. Default
129 is blocking (True). When set to False, the iterator will
130 occasionally yield None when there is no available message.
131 """
132 def __init__(
133 self, domain="stream.twitter.com", secure=True, auth=None,
134 api_version='1.1', block=True, timeout=None):
135 uriparts = ()
136 uriparts += (str(api_version),)
137
138 if block:
139 if timeout:
140 call_cls = TwitterStreamCallWithTimeout
141 else:
142 call_cls = TwitterStreamCall
143 else:
144 call_cls = TwitterStreamCallNonBlocking
145
146 TwitterStreamCall.__init__(
147 self, auth=auth, format="json", domain=domain,
148 callable_cls=call_cls,
149 secure=secure, uriparts=uriparts, timeout=timeout, gzip=False)