mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
special handling to resolve incomplete utf8 token sequences in qwen
This commit is contained in:
parent
32ac3153e4
commit
0028e71993
1 changed files with 11 additions and 1 deletions
12
koboldcpp.py
12
koboldcpp.py
|
@ -545,6 +545,15 @@ def tryparseint(value):
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
def is_incomplete_utf8_sequence(byte_seq): #note, this will only flag INCOMPLETE sequences, corrupted ones will be ignored.
|
||||||
|
try:
|
||||||
|
byte_seq.decode('utf-8')
|
||||||
|
return False # Valid UTF-8
|
||||||
|
except UnicodeDecodeError as e:
|
||||||
|
if e.reason == 'unexpected end of data':
|
||||||
|
return True #incomplete sequence
|
||||||
|
return False #invalid sequence, but not incomplete
|
||||||
|
|
||||||
def unpack_to_dir(destpath = ""):
|
def unpack_to_dir(destpath = ""):
|
||||||
import shutil
|
import shutil
|
||||||
srcpath = os.path.abspath(os.path.dirname(__file__))
|
srcpath = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
@ -1697,7 +1706,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
newbyte = ctypes.string_at(token)
|
newbyte = ctypes.string_at(token)
|
||||||
incomplete_token_buffer += bytearray(newbyte)
|
incomplete_token_buffer += bytearray(newbyte)
|
||||||
tokenSeg = incomplete_token_buffer.decode("UTF-8","ignore")
|
tokenSeg = incomplete_token_buffer.decode("UTF-8","ignore")
|
||||||
badFragment = (tokenSeg==" " and len(incomplete_token_buffer)>1) #partial incomplete unicode
|
incseq = is_incomplete_utf8_sequence(incomplete_token_buffer)
|
||||||
|
badFragment = (tokenSeg==" " and len(incomplete_token_buffer)>1) or incseq #partial incomplete unicode
|
||||||
if tokenSeg!="" and not badFragment:
|
if tokenSeg!="" and not badFragment:
|
||||||
incomplete_token_buffer.clear()
|
incomplete_token_buffer.clear()
|
||||||
tokenStr += tokenSeg
|
tokenStr += tokenSeg
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue