summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
29c8e01)
This also fixes https://github.com/tweepy/tweepy/issues/615 by properly
decoding the python3 bytes object to a string.
from __future__ import absolute_import, print_function
import logging
from __future__ import absolute_import, print_function
import logging
import requests
from requests.exceptions import Timeout
from threading import Thread
import requests
from requests.exceptions import Timeout
from threading import Thread
use small chunks so it can read the length and the tweet in 2 read calls.
"""
use small chunks so it can read the length and the tweet in 2 read calls.
"""
- def __init__(self, stream, chunk_size):
+ def __init__(self, stream, chunk_size, encoding='utf-8'):
+ self._buffer = six.b('')
self._chunk_size = chunk_size
self._chunk_size = chunk_size
+ self._encoding = encoding
def read_len(self, length):
while not self._stream.closed:
def read_len(self, length):
while not self._stream.closed:
read_len = max(self._chunk_size, length - len(self._buffer))
self._buffer += self._stream.read(read_len)
read_len = max(self._chunk_size, length - len(self._buffer))
self._buffer += self._stream.read(read_len)
- def read_line(self, sep='\n'):
+ def read_line(self, sep=six.b('\n')):
+ """Read the data stream until a given separator is found (default \n)
+
+ :param sep: Separator to read until. Must by of the bytes type (str in python 2,
+ bytes in python 3)
+ :return: The str of the data read until sep
+ """
start = 0
while not self._stream.closed:
loc = self._buffer.find(sep, start)
start = 0
while not self._stream.closed:
loc = self._buffer.find(sep, start)
def _pop(self, length):
r = self._buffer[:length]
self._buffer = self._buffer[length:]
def _pop(self, length):
r = self._buffer[:length]
self._buffer = self._buffer[length:]
+ return r.decode(self._encoding)
self.running = False
def _read_loop(self, resp):
self.running = False
def _read_loop(self, resp):
- buf = ReadBuffer(resp.raw, self.chunk_size)
+ charset = resp.headers.get('content-type', default='')
+ enc_search = re.search('charset=(?P<enc>\S*)', charset)
+ if enc_search is not None:
+ encoding = enc_search.group('enc')
+ else:
+ encoding = 'utf-8'
+
+ buf = ReadBuffer(resp.raw, self.chunk_size, encoding=encoding)
while self.running and not resp.raw.closed:
length = 0
while self.running and not resp.raw.closed:
length = 0