merged, added ability to render special tokens

This commit is contained in:
Concedo 2024-04-22 18:19:58 +08:00
commit b4d2031215
37 changed files with 335 additions and 7328 deletions

View file

@ -109,7 +109,7 @@ int32_t get_num_physical_cores() {
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
}
#if defined(__x86_64__) && defined(__linux__)
#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__)
#include <pthread.h>
static void cpuid(unsigned leaf, unsigned subleaf,
@ -163,7 +163,7 @@ static int count_math_cpus(int cpu_count) {
* Returns number of CPUs on system that are useful for math.
*/
int get_math_cpu_count() {
#if defined(__x86_64__) && defined(__linux__)
#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__)
int cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
if (cpu_count < 1) {
return get_num_physical_cores();
@ -2329,10 +2329,10 @@ std::vector<llama_token> llama_tokenize(
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
std::vector<char> result(8, 0);
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
if (n_tokens < 0) {
result.resize(-n_tokens);
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
GGML_ASSERT(check == -n_tokens);
} else {
result.resize(n_tokens);