Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	.github/workflows/server.yml
#	CMakeLists.txt
#	Makefile
#	examples/embedding/embedding.cpp
#	examples/imatrix/imatrix.cpp
#	examples/llama-bench/llama-bench.cpp
#	examples/llava/MobileVLM-README.md
#	examples/parallel/parallel.cpp
#	examples/perplexity/perplexity.cpp
#	examples/quantize/CMakeLists.txt
#	examples/server/README.md
#	examples/speculative/speculative.cpp
#	tests/test-backend-ops.cpp
This commit is contained in:
Concedo 2024-09-13 16:17:24 +08:00
commit e44ddf26ef
47 changed files with 117978 additions and 117646 deletions

View file

@ -105,6 +105,14 @@ Feature: llama.cpp server
Given first token is removed
Then tokens can be detokenized
Scenario: Tokenize with pieces
When tokenizing with pieces:
"""
What is the capital of Germany?
"""
Then tokens are given with pieces
Scenario: Models available
Given available models
Then 1 models are supported

View file

@ -1,3 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import asyncio
import json
import os
@ -697,6 +700,32 @@ def step_tokenize_set_add_special(context):
context.tokenize_add_special = True
@step("tokenizing with pieces")
@async_run_until_complete
async def step_tokenize_with_pieces(context):
context.tokenized_text = context_text(context)
async with aiohttp.ClientSession() as session:
tokenize_args = {"content": context.tokenized_text, "with_pieces": True}
if getattr(context, "tokenize_add_special", None) is not None:
tokenize_args["add_special"] = context.tokenize_add_special
async with session.post(
f"{context.base_url}/tokenize", json=tokenize_args
) as response:
assert response.status == 200
tokenize_json = await response.json()
context.tokens_with_pieces = tokenize_json["tokens"]
@step("tokens are given with pieces")
@async_run_until_complete
async def step_tokenize_with_pieces(context):
# Verify that the response contains both token IDs and pieces
assert all(
"id" in token and "piece" in token for token in context.tokens_with_pieces
)
@step('tokenizing')
@async_run_until_complete
async def step_tokenize(context):