Merge branch 'master' into concedo_experimental

# Conflicts: # .devops/tools.sh # README.md
2025-09-10 17:14:36 +00:00 · 2023-04-27 16:12:00 +08:00 · 2023-04-27 16:12:00 +08:00 · 95bbd46019
commit 95bbd46019
parent 5070815dcf 0b2da20538
8 changed files with 757 additions and 36 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -491,6 +491,8 @@ struct llama_file_loader {
                case GGML_TYPE_Q4_1:
                case GGML_TYPE_Q4_2:
                case GGML_TYPE_Q4_3:
+                case GGML_TYPE_Q5_0:
+                case GGML_TYPE_Q5_1:
                case GGML_TYPE_Q8_0:
                    break;
                default: {
@ -566,6 +568,8 @@ struct llama_file_saver {
            case GGML_TYPE_Q4_1:
            case GGML_TYPE_Q4_2:
            case GGML_TYPE_Q4_3:
+            case GGML_TYPE_Q5_0:
+            case GGML_TYPE_Q5_1:
            case GGML_TYPE_Q8_0:
                break;
            default: LLAMA_ASSERT(false);
@ -857,6 +861,8 @@ static const char *llama_ftype_name(enum llama_ftype ftype) {
                                      return "mostly Q4_1, some F16";
        case LLAMA_FTYPE_MOSTLY_Q4_2: return "mostly Q4_2";
        case LLAMA_FTYPE_MOSTLY_Q4_3: return "mostly Q4_3";
+        case LLAMA_FTYPE_MOSTLY_Q5_0: return "mostly Q5_0";
+        case LLAMA_FTYPE_MOSTLY_Q5_1: return "mostly Q5_1";
        case LLAMA_FTYPE_MOSTLY_Q8_0: return "mostly Q8_0";
        default:                      return "unknown, may not work";
    }
@ -1595,6 +1601,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
        case LLAMA_FTYPE_MOSTLY_Q4_1: quantized_type = GGML_TYPE_Q4_1; break;
        case LLAMA_FTYPE_MOSTLY_Q4_2: quantized_type = GGML_TYPE_Q4_2; break;
        case LLAMA_FTYPE_MOSTLY_Q4_3: quantized_type = GGML_TYPE_Q4_3; break;
+        case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break;
+        case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break;
        case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break;
        default: throw format("invalid output file type %d\n", ftype);
    };
@ -2089,6 +2097,13 @@ int llama_get_kv_cache_token_count(struct llama_context * ctx) {

 #define LLAMA_MAX_RNG_STATE 64*1024

+void llama_set_rng_seed(struct llama_context * ctx, int seed) {
+    if (seed <= 0) {
+        seed = time(NULL);
+    }
+    ctx->rng.seed(seed);
+}
+
 // Returns the size of the state
 size_t llama_get_state_size(struct llama_context * ctx) {
    // we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.