mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 18:09:42 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # .github/workflows/server.yml # CMakeLists.txt # Makefile # examples/embedding/embedding.cpp # examples/imatrix/imatrix.cpp # examples/llama-bench/llama-bench.cpp # examples/llava/MobileVLM-README.md # examples/parallel/parallel.cpp # examples/perplexity/perplexity.cpp # examples/quantize/CMakeLists.txt # examples/server/README.md # examples/speculative/speculative.cpp # tests/test-backend-ops.cpp
This commit is contained in:
commit
e44ddf26ef
47 changed files with 117978 additions and 117646 deletions
|
@ -3014,12 +3014,39 @@ int main(int argc, char ** argv) {
|
|||
const auto handle_tokenize = [&ctx_server, &res_ok](const httplib::Request & req, httplib::Response & res) {
|
||||
const json body = json::parse(req.body);
|
||||
|
||||
std::vector<llama_token> tokens;
|
||||
json tokens_response = json::array();
|
||||
if (body.count("content") != 0) {
|
||||
const bool add_special = json_value(body, "add_special", false);
|
||||
tokens = ctx_server.tokenize(body.at("content"), add_special);
|
||||
const bool with_pieces = json_value(body, "with_pieces", false);
|
||||
std::vector<llama_token> tokens = ctx_server.tokenize(body.at("content"), add_special);
|
||||
|
||||
if (with_pieces) {
|
||||
for (const auto& token : tokens) {
|
||||
std::string piece = llama_token_to_piece(ctx_server.ctx, token);
|
||||
json piece_json;
|
||||
|
||||
// Check if the piece is valid UTF-8
|
||||
if (is_valid_utf8(piece)) {
|
||||
piece_json = piece;
|
||||
} else {
|
||||
// If not valid UTF-8, store as array of byte values
|
||||
piece_json = json::array();
|
||||
for (unsigned char c : piece) {
|
||||
piece_json.push_back(static_cast<int>(c));
|
||||
}
|
||||
}
|
||||
|
||||
tokens_response.push_back({
|
||||
{"id", token},
|
||||
{"piece", piece_json}
|
||||
});
|
||||
}
|
||||
} else {
|
||||
tokens_response = tokens;
|
||||
}
|
||||
}
|
||||
const json data = format_tokenizer_response(tokens);
|
||||
|
||||
const json data = format_tokenizer_response(tokens_response);
|
||||
res_ok(res, data);
|
||||
};
|
||||
|
||||
|
|
|
@ -105,6 +105,14 @@ Feature: llama.cpp server
|
|||
Given first token is removed
|
||||
Then tokens can be detokenized
|
||||
|
||||
Scenario: Tokenize with pieces
|
||||
When tokenizing with pieces:
|
||||
"""
|
||||
What is the capital of Germany?
|
||||
媽
|
||||
"""
|
||||
Then tokens are given with pieces
|
||||
|
||||
Scenario: Models available
|
||||
Given available models
|
||||
Then 1 models are supported
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
@ -697,6 +700,32 @@ def step_tokenize_set_add_special(context):
|
|||
context.tokenize_add_special = True
|
||||
|
||||
|
||||
@step("tokenizing with pieces")
|
||||
@async_run_until_complete
|
||||
async def step_tokenize_with_pieces(context):
|
||||
context.tokenized_text = context_text(context)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tokenize_args = {"content": context.tokenized_text, "with_pieces": True}
|
||||
if getattr(context, "tokenize_add_special", None) is not None:
|
||||
tokenize_args["add_special"] = context.tokenize_add_special
|
||||
|
||||
async with session.post(
|
||||
f"{context.base_url}/tokenize", json=tokenize_args
|
||||
) as response:
|
||||
assert response.status == 200
|
||||
tokenize_json = await response.json()
|
||||
context.tokens_with_pieces = tokenize_json["tokens"]
|
||||
|
||||
|
||||
@step("tokens are given with pieces")
|
||||
@async_run_until_complete
|
||||
async def step_tokenize_with_pieces(context):
|
||||
# Verify that the response contains both token IDs and pieces
|
||||
assert all(
|
||||
"id" in token and "piece" in token for token in context.tokens_with_pieces
|
||||
)
|
||||
|
||||
|
||||
@step('tokenizing')
|
||||
@async_run_until_complete
|
||||
async def step_tokenize(context):
|
||||
|
|
|
@ -616,7 +616,40 @@ static json format_embeddings_response_oaicompat(const json & request, const jso
|
|||
return res;
|
||||
}
|
||||
|
||||
static json format_tokenizer_response(const std::vector<llama_token> & tokens) {
|
||||
static bool is_valid_utf8(const std::string & str) {
|
||||
const unsigned char* bytes = reinterpret_cast<const unsigned char*>(str.data());
|
||||
const unsigned char* end = bytes + str.length();
|
||||
|
||||
while (bytes < end) {
|
||||
if (*bytes <= 0x7F) {
|
||||
// 1-byte sequence (0xxxxxxx)
|
||||
bytes++;
|
||||
} else if ((*bytes & 0xE0) == 0xC0) {
|
||||
// 2-byte sequence (110xxxxx 10xxxxxx)
|
||||
if (end - bytes < 2 || (bytes[1] & 0xC0) != 0x80)
|
||||
return false;
|
||||
bytes += 2;
|
||||
} else if ((*bytes & 0xF0) == 0xE0) {
|
||||
// 3-byte sequence (1110xxxx 10xxxxxx 10xxxxxx)
|
||||
if (end - bytes < 3 || (bytes[1] & 0xC0) != 0x80 || (bytes[2] & 0xC0) != 0x80)
|
||||
return false;
|
||||
bytes += 3;
|
||||
} else if ((*bytes & 0xF8) == 0xF0) {
|
||||
// 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
|
||||
if (end - bytes < 4 || (bytes[1] & 0xC0) != 0x80 ||
|
||||
(bytes[2] & 0xC0) != 0x80 || (bytes[3] & 0xC0) != 0x80)
|
||||
return false;
|
||||
bytes += 4;
|
||||
} else {
|
||||
// Invalid UTF-8 lead byte
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static json format_tokenizer_response(const json & tokens) {
|
||||
return json {
|
||||
{"tokens", tokens}
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue