mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-30 12:40:29 +00:00
server : return HTTP 400 if prompt exceeds context length (#16486)
In streaming mode when prompt exceeds context length, the server returns HTTP 200 status code with a JSON error in the body. This is very confusing and inconsistent with all other inference engines which return HTTP 4xx error in this case. This patch fixes this problem and makes the server return HTTP 400 in such cases.
This commit is contained in:
parent
cdb6da468c
commit
68ee98ae18
3 changed files with 40 additions and 2 deletions
|
|
@ -35,6 +35,12 @@ class ServerResponse:
|
|||
body: dict | Any
|
||||
|
||||
|
||||
class ServerError(Exception):
|
||||
def __init__(self, code, body):
|
||||
self.code = code
|
||||
self.body = body
|
||||
|
||||
|
||||
class ServerProcess:
|
||||
# default options
|
||||
debug: bool = False
|
||||
|
|
@ -297,6 +303,8 @@ class ServerProcess:
|
|||
response = requests.post(url, headers=headers, json=data, stream=True)
|
||||
else:
|
||||
raise ValueError(f"Unimplemented method: {method}")
|
||||
if response.status_code != 200:
|
||||
raise ServerError(response.status_code, response.json())
|
||||
for line_bytes in response.iter_lines():
|
||||
line = line_bytes.decode("utf-8")
|
||||
if '[DONE]' in line:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue