mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 01:54:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # examples/llava/clip-impl.h # examples/llava/clip.cpp # tests/test-arg-parser.cpp # tests/test-json-schema-to-grammar.cpp
This commit is contained in:
commit
36c8db1248
12 changed files with 464 additions and 657 deletions
103
common/arg.cpp
103
common/arg.cpp
|
@ -163,6 +163,10 @@ struct common_hf_file_res {
|
|||
|
||||
#ifdef LLAMA_USE_CURL
|
||||
|
||||
bool common_has_curl() {
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
#include <linux/limits.h>
|
||||
#elif defined(_WIN32)
|
||||
|
@ -528,6 +532,50 @@ static bool common_download_model(
|
|||
return true;
|
||||
}
|
||||
|
||||
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
|
||||
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
||||
curl_slist_ptr http_headers;
|
||||
std::vector<char> res_buffer;
|
||||
|
||||
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
||||
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
||||
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
||||
auto data_vec = static_cast<std::vector<char> *>(data);
|
||||
data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
|
||||
return size * nmemb;
|
||||
};
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
|
||||
#if defined(_WIN32)
|
||||
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
||||
#endif
|
||||
if (params.timeout > 0) {
|
||||
curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
|
||||
}
|
||||
if (params.max_size > 0) {
|
||||
curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
|
||||
}
|
||||
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
||||
for (const auto & header : params.headers) {
|
||||
http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
|
||||
}
|
||||
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl.get());
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
std::string error_msg = curl_easy_strerror(res);
|
||||
throw std::runtime_error("error: cannot make GET request: " + error_msg);
|
||||
}
|
||||
|
||||
long res_code;
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
|
||||
|
||||
return { res_code, std::move(res_buffer) };
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
|
||||
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
|
||||
|
@ -547,45 +595,26 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
|
|||
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
|
||||
}
|
||||
|
||||
// fetch model info from Hugging Face Hub API
|
||||
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
||||
curl_slist_ptr http_headers;
|
||||
std::string res_str;
|
||||
std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
|
||||
|
||||
std::string model_endpoint = get_model_endpoint();
|
||||
|
||||
std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag;
|
||||
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
||||
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
||||
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
||||
static_cast<std::string *>(data)->append((char * ) ptr, size * nmemb);
|
||||
return size * nmemb;
|
||||
};
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
||||
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str);
|
||||
#if defined(_WIN32)
|
||||
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
||||
#endif
|
||||
// headers
|
||||
std::vector<std::string> headers;
|
||||
headers.push_back("Accept: application/json");
|
||||
if (!bearer_token.empty()) {
|
||||
std::string auth_header = "Authorization: Bearer " + bearer_token;
|
||||
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
|
||||
headers.push_back("Authorization: Bearer " + bearer_token);
|
||||
}
|
||||
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
|
||||
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
||||
http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json");
|
||||
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
||||
// User-Agent header is already set in common_remote_get_content, no need to set it here
|
||||
|
||||
CURLcode res = curl_easy_perform(curl.get());
|
||||
// make the request
|
||||
common_remote_params params;
|
||||
params.headers = headers;
|
||||
auto res = common_remote_get_content(url, params);
|
||||
long res_code = res.first;
|
||||
std::string res_str(res.second.data(), res.second.size());
|
||||
std::string ggufFile;
|
||||
std::string mmprojFile;
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
throw std::runtime_error("error: cannot make GET request to HF API");
|
||||
}
|
||||
|
||||
long res_code;
|
||||
std::string ggufFile = "";
|
||||
std::string mmprojFile = "";
|
||||
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
|
||||
if (res_code == 200) {
|
||||
// extract ggufFile.rfilename in json, using regex
|
||||
{
|
||||
|
@ -619,6 +648,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
|
|||
|
||||
#else
|
||||
|
||||
bool common_has_curl() {
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool common_download_file_single(const std::string &, const std::string &, const std::string &) {
|
||||
LOG_ERR("error: built without CURL, cannot download model from internet\n");
|
||||
return false;
|
||||
|
@ -641,6 +674,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string &, const s
|
|||
return {};
|
||||
}
|
||||
|
||||
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
|
||||
throw std::runtime_error("error: built without CURL, cannot download model from the internet");
|
||||
}
|
||||
|
||||
#endif // LLAMA_USE_CURL
|
||||
|
||||
//
|
||||
|
|
|
@ -78,3 +78,12 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
|
|||
|
||||
// function to be used by test-arg-parser
|
||||
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
||||
bool common_has_curl();
|
||||
|
||||
struct common_remote_params {
|
||||
std::vector<std::string> headers;
|
||||
long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
|
||||
long max_size = 0; // max size of the response ; unlimited if 0 ; max is 2GB
|
||||
};
|
||||
// get remote file content, returns <http_code, raw_response_body>
|
||||
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
|
||||
|
|
|
@ -16,6 +16,9 @@ using json = nlohmann::ordered_json;
|
|||
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
||||
auto has_max = max_items != std::numeric_limits<int>::max();
|
||||
|
||||
if (max_items == 0) {
|
||||
return "";
|
||||
}
|
||||
if (min_items == 0 && max_items == 1) {
|
||||
return item_rule + "?";
|
||||
}
|
||||
|
|
|
@ -17,24 +17,15 @@
|
|||
#define KEY_FTYPE "general.file_type"
|
||||
#define KEY_NAME "general.name"
|
||||
#define KEY_DESCRIPTION "general.description"
|
||||
#define KEY_HAS_TEXT_ENC "clip.has_text_encoder"
|
||||
#define KEY_HAS_VIS_ENC "clip.has_vision_encoder"
|
||||
#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector"
|
||||
#define KEY_HAS_MINICPMV_PROJ "clip.has_minicpmv_projector"
|
||||
#define KEY_HAS_GLM_PROJ "clip.has_glm_projector"
|
||||
#define KEY_MINICPMV_VERSION "clip.minicpmv_version"
|
||||
#define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger"
|
||||
#define KEY_USE_GELU "clip.use_gelu"
|
||||
#define KEY_USE_SILU "clip.use_silu"
|
||||
#define KEY_USE_GLU_MLP "clip.use_glu_mlp"
|
||||
#define KEY_USE_RMS_NORM "clip.use_rms_norm"
|
||||
#define KEY_N_EMBD "clip.%s.embedding_length"
|
||||
#define KEY_N_FF "clip.%s.feed_forward_length"
|
||||
#define KEY_N_BLOCK "clip.%s.block_count"
|
||||
#define KEY_N_HEAD "clip.%s.attention.head_count"
|
||||
#define KEY_LAYER_NORM_EPS "clip.%s.attention.layer_norm_epsilon"
|
||||
#define KEY_PROJ_DIM "clip.%s.projection_dim"
|
||||
#define KEY_TOKENS "tokenizer.ggml.tokens"
|
||||
#define KEY_N_EMBD "clip.vision.embedding_length"
|
||||
#define KEY_N_FF "clip.vision.feed_forward_length"
|
||||
#define KEY_N_BLOCK "clip.vision.block_count"
|
||||
#define KEY_N_HEAD "clip.vision.attention.head_count"
|
||||
#define KEY_LAYER_NORM_EPS "clip.vision.attention.layer_norm_epsilon"
|
||||
#define KEY_PROJ_DIM "clip.vision.projection_dim"
|
||||
#define KEY_IMAGE_SIZE "clip.vision.image_size"
|
||||
#define KEY_PATCH_SIZE "clip.vision.patch_size"
|
||||
#define KEY_IMAGE_MEAN "clip.vision.image_mean"
|
||||
|
@ -46,8 +37,6 @@
|
|||
#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type"
|
||||
#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints"
|
||||
#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution"
|
||||
#define KEY_FULLATTN_BLK_IDX "clip.vision.fullatt_block_indexes"
|
||||
#define KEY_ATTN_WINDOW_SIZE "clip.vision.window_size"
|
||||
|
||||
|
||||
//
|
||||
|
@ -66,7 +55,6 @@
|
|||
#define TN_FFN_DOWN "%s.blk.%d.ffn_down.%s"
|
||||
#define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
|
||||
#define TN_FFN_UP "%s.blk.%d.ffn_up.%s"
|
||||
#define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
|
||||
#define TN_LN_1 "%s.blk.%d.ln1.%s"
|
||||
#define TN_LN_2 "%s.blk.%d.ln2.%s"
|
||||
#define TN_LN_PRE "%s.pre_ln.%s"
|
||||
|
@ -101,9 +89,9 @@ enum projector_type {
|
|||
PROJECTOR_TYPE_MLP_NORM,
|
||||
PROJECTOR_TYPE_LDP,
|
||||
PROJECTOR_TYPE_LDPV2,
|
||||
PROJECTOR_TYPE_RESAMPLER,
|
||||
PROJECTOR_TYPE_MINICPMV,
|
||||
PROJECTOR_TYPE_GLM_EDGE,
|
||||
PROJECTOR_TYPE_MERGER,
|
||||
PROJECTOR_TYPE_QWEN2VL,
|
||||
PROJECTOR_TYPE_GEMMA3,
|
||||
PROJECTOR_TYPE_IDEFICS3,
|
||||
PROJECTOR_TYPE_PIXTRAL,
|
||||
|
@ -114,9 +102,9 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
|||
{ PROJECTOR_TYPE_MLP, "mlp" },
|
||||
{ PROJECTOR_TYPE_LDP, "ldp" },
|
||||
{ PROJECTOR_TYPE_LDPV2, "ldpv2"},
|
||||
{ PROJECTOR_TYPE_RESAMPLER, "resampler"},
|
||||
{ PROJECTOR_TYPE_MINICPMV, "resampler"},
|
||||
{ PROJECTOR_TYPE_GLM_EDGE, "adapter"},
|
||||
{ PROJECTOR_TYPE_MERGER, "qwen2vl_merger"},
|
||||
{ PROJECTOR_TYPE_QWEN2VL, "qwen2vl_merger"},
|
||||
{ PROJECTOR_TYPE_GEMMA3, "gemma3"},
|
||||
{ PROJECTOR_TYPE_IDEFICS3, "idefics3"},
|
||||
{ PROJECTOR_TYPE_PIXTRAL, "pixtral"},
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -115,8 +115,6 @@ CLIP_API bool clip_is_llava(const struct clip_ctx * ctx);
|
|||
CLIP_API bool clip_is_gemma3(const struct clip_ctx * ctx);
|
||||
CLIP_API bool clip_is_pixtral(const struct clip_ctx * ctx);
|
||||
|
||||
CLIP_API int get_deepest_feature_layer(const struct clip_ctx * ctx);
|
||||
|
||||
CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
|
||||
|
||||
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
const SPACE_RULE = '| " " | "\\n"{1,2} [ \\t]{0,20}';
|
||||
|
||||
function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
|
||||
if (maxItems == 0) {
|
||||
return '';
|
||||
}
|
||||
if (minItems === 0 && maxItems === 1) {
|
||||
return `${itemRule}?`;
|
||||
}
|
||||
|
|
|
@ -133,6 +133,11 @@ extern "C" {
|
|||
|
||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
|
||||
|
||||
GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
|
||||
GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
|
||||
GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
|
||||
GGML_BACKEND_API void ggml_cpu_bf16_to_fp32(const ggml_bf16_t *, float *, int64_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -219,7 +219,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|||
.nrows = 1,
|
||||
},
|
||||
[GGML_TYPE_F16] = {
|
||||
.from_float = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
||||
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp16,
|
||||
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16,
|
||||
.vec_dot_type = GGML_TYPE_F16,
|
||||
.nrows = 1,
|
||||
|
@ -360,7 +360,7 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|||
.from_float = quantize_row_q8_K,
|
||||
},
|
||||
[GGML_TYPE_BF16] = {
|
||||
.from_float = (ggml_from_float_t) ggml_fp32_to_bf16_row,
|
||||
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_bf16,
|
||||
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_bf16,
|
||||
.vec_dot_type = GGML_TYPE_BF16,
|
||||
.nrows = 1,
|
||||
|
@ -3186,6 +3186,93 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g
|
|||
return ggml_graph_compute(cgraph, &cplan);
|
||||
}
|
||||
|
||||
void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
|
||||
int64_t i = 0;
|
||||
#if defined(__F16C__)
|
||||
#if defined(__AVX512F__)
|
||||
for (; i + 15 < n; i += 16) {
|
||||
__m512 x_vec = _mm512_loadu_ps(x + i);
|
||||
__m256i y_vec = _mm512_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
||||
_mm256_storeu_si256((__m256i *)(y + i), y_vec);
|
||||
}
|
||||
#endif
|
||||
for (; i + 7 < n; i += 8) {
|
||||
__m256 x_vec = _mm256_loadu_ps(x + i);
|
||||
__m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
||||
_mm_storeu_si128((__m128i *)(y + i), y_vec);
|
||||
}
|
||||
for (; i + 3 < n; i += 4) {
|
||||
__m128 x_vec = _mm_loadu_ps(x + i);
|
||||
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
||||
_mm_storel_epi64((__m128i *)(y + i), y_vec);
|
||||
}
|
||||
#endif
|
||||
for (; i < n; ++i) {
|
||||
y[i] = GGML_FP32_TO_FP16(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
|
||||
int64_t i = 0;
|
||||
#if defined(__F16C__)
|
||||
#if defined(__AVX512F__)
|
||||
for (; i + 15 < n; i += 16) {
|
||||
__m256i x_vec = _mm256_loadu_si256((const __m256i *)(x + i));
|
||||
__m512 y_vec = _mm512_cvtph_ps(x_vec);
|
||||
_mm512_storeu_ps(y + i, y_vec);
|
||||
}
|
||||
#endif
|
||||
for (; i + 7 < n; i += 8) {
|
||||
__m128i x_vec = _mm_loadu_si128((const __m128i *)(x + i));
|
||||
__m256 y_vec = _mm256_cvtph_ps(x_vec);
|
||||
_mm256_storeu_ps(y + i, y_vec);
|
||||
}
|
||||
for (; i + 3 < n; i += 4) {
|
||||
__m128i x_vec = _mm_loadl_epi64((const __m128i *)(x + i));
|
||||
__m128 y_vec = _mm_cvtph_ps(x_vec);
|
||||
_mm_storeu_ps(y + i, y_vec);
|
||||
}
|
||||
#endif
|
||||
for (; i < n; ++i) {
|
||||
y[i] = GGML_FP16_TO_FP32(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_cpu_fp32_to_bf16(const float * x, ggml_bf16_t * y, int64_t n) {
|
||||
int64_t i = 0;
|
||||
for (; i < n; ++i) {
|
||||
y[i] = GGML_FP32_TO_BF16(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_cpu_bf16_to_fp32(const ggml_bf16_t * x, float * y, int64_t n) {
|
||||
int64_t i = 0;
|
||||
#if defined(__AVX2__)
|
||||
#if defined(__AVX512F__)
|
||||
for (; i + 15 < n; i += 16) {
|
||||
_mm512_storeu_ps(y + i,
|
||||
_mm512_castsi512_ps(
|
||||
_mm512_slli_epi32(
|
||||
_mm512_cvtepu16_epi32(
|
||||
_mm256_loadu_si256(
|
||||
(const __m256i *)(x + i))),
|
||||
16)));
|
||||
}
|
||||
#endif
|
||||
for (; i + 7 < n; i += 8) {
|
||||
_mm256_storeu_ps(y + i,
|
||||
_mm256_castsi256_ps(
|
||||
_mm256_slli_epi32(
|
||||
_mm256_cvtepu16_epi32(
|
||||
_mm_loadu_si128(
|
||||
(const __m128i *)(x + i))),
|
||||
16)));
|
||||
}
|
||||
#endif
|
||||
for (; i < n; i++) {
|
||||
y[i] = GGML_BF16_TO_FP32(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int ggml_cpu_has_avx(void) {
|
||||
#if defined(__AVX__)
|
||||
|
|
|
@ -4222,7 +4222,7 @@ static void ggml_compute_forward_get_rows_f16(
|
|||
|
||||
GGML_ASSERT(i01 >= 0 && i01 < ne01);
|
||||
|
||||
ggml_fp16_to_fp32_row(
|
||||
ggml_cpu_fp16_to_fp32(
|
||||
(const ggml_fp16_t*) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
|
||||
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
|
||||
}
|
||||
|
@ -4263,7 +4263,7 @@ static void ggml_compute_forward_get_rows_bf16(
|
|||
|
||||
GGML_ASSERT(i01 >= 0 && i01 < ne01);
|
||||
|
||||
ggml_bf16_to_fp32_row(
|
||||
ggml_cpu_bf16_to_fp32(
|
||||
(const ggml_bf16_t *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
|
||||
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include "ggml-impl.h"
|
||||
#include "ggml-cpu-impl.h"
|
||||
#include "ggml-threading.h"
|
||||
#include "ggml-cpu.h"
|
||||
#include "ggml.h"
|
||||
|
||||
// FIXME: required here for quantization functions
|
||||
|
@ -383,58 +384,16 @@ void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
|
|||
}
|
||||
}
|
||||
|
||||
// FIXME: these functions must detect the instruction set at runtime, since they are part of the core ggml library
|
||||
// currently, the ggml_cpu_has_* functions are entirely compile-time
|
||||
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
|
||||
int64_t i = 0;
|
||||
#if defined(__F16C__)
|
||||
//if (ggml_cpu_has_f16c()) {
|
||||
for (; i + 7 < n; i += 8) {
|
||||
__m256 x_vec = _mm256_loadu_ps(x + i);
|
||||
__m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
||||
_mm_storeu_si128((__m128i *)(y + i), y_vec);
|
||||
}
|
||||
for(; i + 3 < n; i += 4) {
|
||||
__m128 x_vec = _mm_loadu_ps(x + i);
|
||||
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
||||
_mm_storel_epi64((__m128i *)(y + i), y_vec);
|
||||
}
|
||||
//}
|
||||
#endif
|
||||
for (; i < n; i++) {
|
||||
int i = 0;
|
||||
for (; i < n; ++i) {
|
||||
y[i] = GGML_FP32_TO_FP16(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
|
||||
int64_t i = 0;
|
||||
#if defined(__AVX512F__)
|
||||
//if (ggml_cpu_has_avx512()) {
|
||||
for (; i + 16 <= n; i += 16) {
|
||||
_mm512_storeu_ps(y + i,
|
||||
_mm512_castsi512_ps(
|
||||
_mm512_slli_epi32(
|
||||
_mm512_cvtepu16_epi32(
|
||||
_mm256_loadu_si256(
|
||||
(const __m256i *)(x + i))),
|
||||
16)));
|
||||
}
|
||||
//}
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
//if (ggml_cpu_has_avx2()) {
|
||||
for (; i + 8 <= n; i += 8) {
|
||||
_mm256_storeu_ps(y + i,
|
||||
_mm256_castsi256_ps(
|
||||
_mm256_slli_epi32(
|
||||
_mm256_cvtepu16_epi32(
|
||||
_mm_loadu_si128(
|
||||
(const __m128i *)(x + i))),
|
||||
16)));
|
||||
}
|
||||
//}
|
||||
#endif
|
||||
for (; i < n; i++) {
|
||||
int i = 0;
|
||||
for (; i < n; ++i) {
|
||||
y[i] = GGML_BF16_TO_FP32(x[i]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,9 @@ from typing import Any, List, Optional, Set, Tuple, Union
|
|||
|
||||
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
||||
|
||||
if max_items == 0:
|
||||
return ""
|
||||
|
||||
if min_items == 0 and max_items == 1:
|
||||
return f'{item_rule}?'
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue