koboldcpp/tools/server/tests/unit/test_compat_gcp.py
Xuan-Son Nguyen 29debb3a6a
server: support Vertex AI compatible API (#22545)
* server: support Vertex AI compatible API

* a bit safer

* support other AIP_* env var

* various fixes

* if AIP_MODE is unset, do nothing

* fix test case

* fix windows build
2026-05-08 15:23:04 +02:00

60 lines
1.8 KiB
Python

import pytest
from utils import *
server: ServerProcess
@pytest.fixture(autouse=True)
def create_server():
global server
server = ServerPreset.tinyllama2()
server.gcp_compat = True
def test_gcp_predict_camel_case():
global server
server.start()
res = server.make_request("POST", "/predict", data={
"instances": [
{
"@requestFormat": "chatCompletions",
"max_tokens": 8,
"messages": [
{"role": "user", "content": "What is the meaning of life?"},
],
}
],
})
assert res.status_code == 200
assert "predictions" in res.body
assert len(res.body["predictions"]) == 1
prediction = res.body["predictions"][0]
assert "choices" in prediction
assert len(prediction["choices"]) == 1
assert prediction["choices"][0]["message"]["role"] == "assistant"
assert len(prediction["choices"][0]["message"]["content"]) > 0
def test_gcp_predict_multiple_instances():
global server
server.n_slots = 2
server.start()
res = server.make_request("POST", "/predict", data={
"instances": [
{
"@requestFormat": "chatCompletions",
"max_tokens": 8,
"messages": [{"role": "user", "content": "Say hello"}],
},
{
"@requestFormat": "chatCompletions",
"max_tokens": 8,
"messages": [{"role": "user", "content": "Say world"}],
},
],
})
assert res.status_code == 200
assert len(res.body["predictions"]) == 2
for prediction in res.body["predictions"]:
assert "choices" in prediction
assert len(prediction["choices"][0]["message"]["content"]) > 0