Commit 17fa3085 authored by Benjamin Fischer's avatar Benjamin Fischer
Browse files

Terminal&Codeeditor: handle interrupted utf8 multibyte sequence properly

actually fixes #2403
parent 26cddaec
......@@ -14,6 +14,7 @@ from subprocess import Popen, PIPE
logger = logging.getLogger(__name__)
from vispa.remote.helper import UTF8Buffer
def expand(path):
return os.path.expanduser(os.path.expandvars(path))
......@@ -43,7 +44,7 @@ class CodeEditorRpc:
logger.debug("CodeEditorRpc created")
#so far equivalent to abort (might be extended in the future)
def close(self):
def close(self):
self._abort = True
def _send(self, topic, data=None):
......@@ -87,12 +88,13 @@ class CodeEditorRpc:
self._abort = False
read_time = time.time()
read_amount = 0
buf = UTF8Buffer()
while not self._abort:
r, _, _ = select.select([self._pty_fno], [], [], 0.1)
if read_amount >= CodeEditorRpc.BURST_BUFFER:
time.sleep(CodeEditorRpc.BURST_DELAY)
elif self._pty_fno in r:
data = os.read(self._pty_fno, CodeEditorRpc.MAX_BURST)
data = buf.passThru(os.read(self._pty_fno, CodeEditorRpc.MAX_BURST))
read_amount += len(data)
self._send('data', data)
if read_amount > 0:
......
......@@ -10,6 +10,7 @@ import logging
import os
import vispa.remote
from vispa.remote.helper import UTF8Buffer
logger = logging.getLogger(__name__)
......@@ -74,6 +75,7 @@ class Terminal(object):
def _stream(self):
try:
buf = UTF8Buffer()
buffer_size = 1024*16
fout = self.__master_stdout.fileno()
returncode = self.__process.poll()
......@@ -82,7 +84,9 @@ class Terminal(object):
returncode = self.__process.poll()
r, _, _ = select.select([fout], [], [], 0.5)
if fout in r:
vispa.remote.send_topic(self._topic+".data", window_id=self._window_id, data=os.read(fout, buffer_size))
vispa.remote.send_topic(self._topic+".data", window_id=self._window_id,
data=buf.passThru(os.read(fout, buffer_size)))
elif returncode is not None:
self.close()
return
......
class UTF8Buffer(object):
"""
Buffers incoming UTF8 encoded data, and prevents chunks from being created in the middle of a
multi byte sequence.
"""
def __init__(self):
self.buffer = ""
def fill(self, data):
"""
Fill the buffer with the given *data*.
"""
self.buffer += data
def read(self, count=None):
"""
Get a maximum of *count* bytes from the buffer. UTF8 multibyte characters will not be
broken apart. If *count* is *None* the current buffer length is used.
"""
if count is None:
count = len(self.buffer)
else:
count = min(int(count), len(self.buffer))
if count <= 0:
return ""
for offset in range(4): # longest multibyte sequence is 4 bytes
if offset >= count: # we would attempt to cut everything off
return "" # just return nothing and keep buffering
selected = self.buffer[:count-offset]
try:
# this will fail if we have a truncated multibyte sequence
selected.decode("utf-8")
except UnicodeDecodeError:
continue
else:
self.buffer = self.buffer[count-offset:] # get the remainder of the buffer
break
else:
# everything failed (most likely there are illegal chars)
selected = self.buffer.decode('utf-8','ignore').encode("utf-8")
self.buffer = ""
return selected
def passThru(self, data, count=None):
"""
A convenience function for adding data to the buffer and reading up to *count* of it again.
If *count* is *None* the current buffer length is used.
"""
self.fill(data)
return self.read(count=count)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment