mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # README.md # scripts/sync-ggml.sh
This commit is contained in:
commit
ea3fd87f68
14 changed files with 93 additions and 24 deletions
|
@ -29,19 +29,25 @@ git clone https://huggingface.co/liuhaotian/llava-v1.5-7b
|
||||||
git clone https://huggingface.co/openai/clip-vit-large-patch14-336
|
git clone https://huggingface.co/openai/clip-vit-large-patch14-336
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Use `llava-surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
|
2. Install the required Python packages:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pip install -r examples/llava/requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Use `llava-surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python ./examples/llava/llava-surgery.py -m ../llava-v1.5-7b
|
python ./examples/llava/llava-surgery.py -m ../llava-v1.5-7b
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Use `convert-image-encoder-to-gguf.py` to convert the LLaVA image encoder to GGUF:
|
4. Use `convert-image-encoder-to-gguf.py` to convert the LLaVA image encoder to GGUF:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python ./examples/llava/convert-image-encoder-to-gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
|
python ./examples/llava/convert-image-encoder-to-gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Use `convert.py` to convert the LLaMA part of LLaVA to GGUF:
|
5. Use `convert.py` to convert the LLaMA part of LLaVA to GGUF:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python ./convert.py ../llava-v1.5-7b
|
python ./convert.py ../llava-v1.5-7b
|
||||||
|
|
|
@ -42,5 +42,5 @@ if len(clip_tensors) > 0:
|
||||||
torch.save(checkpoint, path)
|
torch.save(checkpoint, path)
|
||||||
|
|
||||||
print("Done!")
|
print("Done!")
|
||||||
print(f"Now you can convert {args.model} to a a regular LLaMA GGUF file.")
|
print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.")
|
||||||
print(f"Also, use {args.model}/llava.projector to prepare a llava-encoder.gguf file.")
|
print(f"Also, use {args.model}/llava.projector to prepare a llava-encoder.gguf file.")
|
||||||
|
|
3
examples/llava/requirements.txt
Normal file
3
examples/llava/requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
-r ../../requirements/requirements-convert.txt
|
||||||
|
pillow~=10.2.0
|
||||||
|
torch~=2.1.1
|
|
@ -1593,10 +1593,6 @@ struct llama_server_context
|
||||||
LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed);
|
LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past);
|
|
||||||
|
|
||||||
llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1);
|
|
||||||
|
|
||||||
slot.cache_tokens = prompt_tokens;
|
slot.cache_tokens = prompt_tokens;
|
||||||
|
|
||||||
if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0)
|
if (slot.n_past == slot.num_prompt_tokens && slot.n_past > 0)
|
||||||
|
@ -1610,6 +1606,10 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past);
|
||||||
|
|
||||||
|
llama_kv_cache_seq_rm(ctx, slot.id, system_tokens.size() + slot.n_past, -1);
|
||||||
|
|
||||||
LOG_VERBOSE("prompt ingested", {
|
LOG_VERBOSE("prompt ingested", {
|
||||||
{"n_past", slot.n_past},
|
{"n_past", slot.n_past},
|
||||||
{"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)},
|
{"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)},
|
||||||
|
|
|
@ -653,6 +653,9 @@ struct ggml_backend_cpu_context {
|
||||||
int n_threads;
|
int n_threads;
|
||||||
void * work_data;
|
void * work_data;
|
||||||
size_t work_size;
|
size_t work_size;
|
||||||
|
|
||||||
|
ggml_abort_callback abort_callback;
|
||||||
|
void * abort_callback_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
GGML_CALL static const char * ggml_backend_cpu_name(ggml_backend_t backend) {
|
GGML_CALL static const char * ggml_backend_cpu_name(ggml_backend_t backend) {
|
||||||
|
@ -691,6 +694,9 @@ GGML_CALL static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(gg
|
||||||
cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size);
|
cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
|
||||||
|
cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
|
||||||
|
|
||||||
return cpu_plan;
|
return cpu_plan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -721,9 +727,11 @@ GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, str
|
||||||
cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size);
|
cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size);
|
||||||
cpu_ctx->work_size = cplan.work_size;
|
cpu_ctx->work_size = cplan.work_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
cplan.work_data = cpu_ctx->work_data;
|
cplan.work_data = cpu_ctx->work_data;
|
||||||
|
|
||||||
|
cplan.abort_callback = cpu_ctx->abort_callback;
|
||||||
|
cplan.abort_callback_data = cpu_ctx->abort_callback_data;
|
||||||
|
|
||||||
ggml_graph_compute(cgraph, &cplan);
|
ggml_graph_compute(cgraph, &cplan);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -762,6 +770,8 @@ ggml_backend_t ggml_backend_cpu_init(void) {
|
||||||
ctx->n_threads = GGML_DEFAULT_N_THREADS;
|
ctx->n_threads = GGML_DEFAULT_N_THREADS;
|
||||||
ctx->work_data = NULL;
|
ctx->work_data = NULL;
|
||||||
ctx->work_size = 0;
|
ctx->work_size = 0;
|
||||||
|
ctx->abort_callback = NULL;
|
||||||
|
ctx->abort_callback_data = NULL;
|
||||||
|
|
||||||
ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend));
|
ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend));
|
||||||
|
|
||||||
|
@ -783,6 +793,14 @@ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
|
||||||
ctx->n_threads = n_threads;
|
ctx->n_threads = n_threads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
|
||||||
|
GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
|
||||||
|
|
||||||
|
struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
|
||||||
|
ctx->abort_callback = abort_callback;
|
||||||
|
ctx->abort_callback_data = abort_callback_data;
|
||||||
|
}
|
||||||
|
|
||||||
GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size) {
|
GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size) {
|
||||||
return ggml_backend_buffer_init(ggml_backend_cpu_buffer_type(), cpu_backend_buffer_i_from_ptr, ptr, size);
|
return ggml_backend_buffer_init(ggml_backend_cpu_buffer_type(), cpu_backend_buffer_i_from_ptr, ptr, size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,6 +85,7 @@ extern "C" {
|
||||||
|
|
||||||
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
|
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
|
||||||
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
|
||||||
|
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||||
|
|
||||||
// Create a backend buffer from an existing pointer
|
// Create a backend buffer from an existing pointer
|
||||||
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
|
||||||
|
|
|
@ -687,6 +687,7 @@ static bool ggml_metal_graph_compute(
|
||||||
struct ggml_metal_context * ctx,
|
struct ggml_metal_context * ctx,
|
||||||
struct ggml_cgraph * gf) {
|
struct ggml_cgraph * gf) {
|
||||||
|
|
||||||
|
@autoreleasepool {
|
||||||
MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
|
MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
|
||||||
edesc.dispatchType = MTLDispatchTypeSerial;
|
edesc.dispatchType = MTLDispatchTypeSerial;
|
||||||
|
|
||||||
|
@ -2272,6 +2273,7 @@ static bool ggml_metal_graph_compute(
|
||||||
[[MTLCaptureManager sharedCaptureManager] stopCapture];
|
[[MTLCaptureManager sharedCaptureManager] stopCapture];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -270,6 +270,17 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128
|
||||||
#endif // defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
|
#endif // defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
|
||||||
|
|
||||||
#if defined(__ARM_NEON)
|
#if defined(__ARM_NEON)
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
|
#define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !defined(__aarch64__)
|
#if !defined(__aarch64__)
|
||||||
|
|
||||||
// 64-bit compatibility
|
// 64-bit compatibility
|
||||||
|
@ -8700,10 +8711,10 @@ void ggml_vec_dot_iq3_xxs_q8_K(const int n, float * restrict s, const void * res
|
||||||
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
|
||||||
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
||||||
memcpy(aux32, gas, 2*sizeof(uint32_t)); gas += 2*sizeof(uint32_t);
|
memcpy(aux32, gas, 2*sizeof(uint32_t)); gas += 2*sizeof(uint32_t);
|
||||||
const uint32x4_t aux32x4_0 = {iq3xxs_grid[q3[ 0]], iq3xxs_grid[q3[ 1]], iq3xxs_grid[q3[ 2]], iq3xxs_grid[q3[ 3]]};
|
const uint32x4_t aux32x4_0 = ggml_vld1q_u32(iq3xxs_grid[q3[ 0]], iq3xxs_grid[q3[ 1]], iq3xxs_grid[q3[ 2]], iq3xxs_grid[q3[ 3]]);
|
||||||
const uint32x4_t aux32x4_1 = {iq3xxs_grid[q3[ 4]], iq3xxs_grid[q3[ 5]], iq3xxs_grid[q3[ 6]], iq3xxs_grid[q3[ 7]]};
|
const uint32x4_t aux32x4_1 = ggml_vld1q_u32(iq3xxs_grid[q3[ 4]], iq3xxs_grid[q3[ 5]], iq3xxs_grid[q3[ 6]], iq3xxs_grid[q3[ 7]]);
|
||||||
const uint32x4_t aux32x4_2 = {iq3xxs_grid[q3[ 8]], iq3xxs_grid[q3[ 9]], iq3xxs_grid[q3[10]], iq3xxs_grid[q3[11]]};
|
const uint32x4_t aux32x4_2 = ggml_vld1q_u32(iq3xxs_grid[q3[ 8]], iq3xxs_grid[q3[ 9]], iq3xxs_grid[q3[10]], iq3xxs_grid[q3[11]]);
|
||||||
const uint32x4_t aux32x4_3 = {iq3xxs_grid[q3[12]], iq3xxs_grid[q3[13]], iq3xxs_grid[q3[14]], iq3xxs_grid[q3[15]]};
|
const uint32x4_t aux32x4_3 = ggml_vld1q_u32(iq3xxs_grid[q3[12]], iq3xxs_grid[q3[13]], iq3xxs_grid[q3[14]], iq3xxs_grid[q3[15]]);
|
||||||
q3 += 16;
|
q3 += 16;
|
||||||
q3s.val[0] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 0) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 7) & 127))));
|
q3s.val[0] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 0) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 7) & 127))));
|
||||||
q3s.val[1] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 14) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 21) & 127))));
|
q3s.val[1] = vcombine_s8(vld1_s8((const void *)(signs64 + ((aux32[0] >> 14) & 127))), vld1_s8((const void *)(signs64 + ((aux32[0] >> 21) & 127))));
|
||||||
|
|
2
ggml.c
2
ggml.c
|
@ -16649,7 +16649,7 @@ struct ggml_compute_state_shared {
|
||||||
atomic_int node_n; // active graph node
|
atomic_int node_n; // active graph node
|
||||||
atomic_int node_task; // active graph node task phase
|
atomic_int node_task; // active graph node task phase
|
||||||
|
|
||||||
bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
|
ggml_abort_callback abort_callback; // abort ggml_graph_compute when true
|
||||||
void * abort_callback_data;
|
void * abort_callback_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
7
ggml.h
7
ggml.h
|
@ -574,6 +574,11 @@ extern "C" {
|
||||||
|
|
||||||
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
||||||
|
|
||||||
|
// Abort callback
|
||||||
|
// If not NULL, called before ggml computation
|
||||||
|
// If it returns true, the computation is aborted
|
||||||
|
typedef bool (*ggml_abort_callback)(void * data);
|
||||||
|
|
||||||
// the compute plan that needs to be prepared for ggml_graph_compute()
|
// the compute plan that needs to be prepared for ggml_graph_compute()
|
||||||
// since https://github.com/ggerganov/ggml/issues/287
|
// since https://github.com/ggerganov/ggml/issues/287
|
||||||
struct ggml_cplan {
|
struct ggml_cplan {
|
||||||
|
@ -583,7 +588,7 @@ extern "C" {
|
||||||
int n_threads;
|
int n_threads;
|
||||||
|
|
||||||
// abort ggml_graph_compute when true
|
// abort ggml_graph_compute when true
|
||||||
bool (*abort_callback)(void * data);
|
ggml_abort_callback abort_callback;
|
||||||
void * abort_callback_data;
|
void * abort_callback_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -2067,6 +2067,8 @@ type_names = {
|
||||||
|
|
||||||
K_QUANTS_PER_ITERATION = 2
|
K_QUANTS_PER_ITERATION = 2
|
||||||
|
|
||||||
|
ASYNCIO_CONCURRENCY = 64
|
||||||
|
|
||||||
output_dir = gettempdir()
|
output_dir = gettempdir()
|
||||||
|
|
||||||
lock = asyncio.Lock()
|
lock = asyncio.Lock()
|
||||||
|
@ -2291,7 +2293,14 @@ async def main():
|
||||||
tasks.append(string_to_spv("rope_neox_f32", rope_neox_src, {"A_TYPE": "float", "D_TYPE": "float"}))
|
tasks.append(string_to_spv("rope_neox_f32", rope_neox_src, {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
tasks.append(string_to_spv("rope_neox_f16", rope_neox_src, {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
tasks.append(string_to_spv("rope_neox_f16", rope_neox_src, {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
||||||
|
|
||||||
await asyncio.gather(*tasks)
|
# Helper to decorate tasks with semaphore acquisition.
|
||||||
|
async def withSemaphore(sem, task):
|
||||||
|
async with sem:
|
||||||
|
return await task
|
||||||
|
|
||||||
|
# Run tasks concurrently guarded by a concurrency limit.
|
||||||
|
sem = asyncio.Semaphore(ASYNCIO_CONCURRENCY)
|
||||||
|
await asyncio.gather(*(withSemaphore(sem, task) for task in tasks))
|
||||||
|
|
||||||
with open("ggml-vulkan-shaders.hpp", "w") as f:
|
with open("ggml-vulkan-shaders.hpp", "w") as f:
|
||||||
f.write("#include <cstdint>\n\n")
|
f.write("#include <cstdint>\n\n")
|
||||||
|
|
|
@ -7360,7 +7360,9 @@ static int llama_decode_internal(
|
||||||
// TODO: this is mostly important for Apple Silicon where CBLAS is still performing very well
|
// TODO: this is mostly important for Apple Silicon where CBLAS is still performing very well
|
||||||
// we still need some threads to process all non-mul_mat ops, but not too much to avoid interfering
|
// we still need some threads to process all non-mul_mat ops, but not too much to avoid interfering
|
||||||
// with the BLAS calls. need a better solution
|
// with the BLAS calls. need a better solution
|
||||||
if (n_tokens >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas()) {
|
// MoE Special Case: This logic applies when hparams.n_expert == 0, i.e. the model is NOT an MoE model. When an MoE is
|
||||||
|
// being processed then Accelerate/BLAS will not be involved, so capping would limit performance.
|
||||||
|
if (n_tokens >= 32 && hparams.n_expert == 0 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas()) {
|
||||||
n_threads = std::min(4, n_threads);
|
n_threads = std::min(4, n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -97,6 +97,8 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||||
# src/ggml-cuda.cu -> ggml-cuda.cu
|
# src/ggml-cuda.cu -> ggml-cuda.cu
|
||||||
# src/ggml-cuda.h -> ggml-cuda.h
|
# src/ggml-cuda.h -> ggml-cuda.h
|
||||||
# src/ggml-impl.h -> ggml-impl.h
|
# src/ggml-impl.h -> ggml-impl.h
|
||||||
|
# src/ggml-kompute.cpp -> ggml-kompute.cpp
|
||||||
|
# src/ggml-kompute.h -> ggml-kompute.h
|
||||||
# src/ggml-metal.h -> ggml-metal.h
|
# src/ggml-metal.h -> ggml-metal.h
|
||||||
# src/ggml-metal.m -> ggml-metal.m
|
# src/ggml-metal.m -> ggml-metal.m
|
||||||
# src/ggml-mpi.h -> ggml-mpi.h
|
# src/ggml-mpi.h -> ggml-mpi.h
|
||||||
|
@ -105,6 +107,10 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||||
# src/ggml-opencl.h -> ggml-opencl.h
|
# src/ggml-opencl.h -> ggml-opencl.h
|
||||||
# src/ggml-quants.c -> ggml-quants.c
|
# src/ggml-quants.c -> ggml-quants.c
|
||||||
# src/ggml-quants.h -> ggml-quants.h
|
# src/ggml-quants.h -> ggml-quants.h
|
||||||
|
# src/ggml-sycl.cpp -> ggml-sycl.cpp
|
||||||
|
# src/ggml-sycl.h -> ggml-sycl.h
|
||||||
|
# src/ggml-vulkan.cpp -> ggml-vulkan.cpp
|
||||||
|
# src/ggml-vulkan.h -> ggml-vulkan.h
|
||||||
# include/ggml/ggml.h -> ggml.h
|
# include/ggml/ggml.h -> ggml.h
|
||||||
# include/ggml/ggml-alloc.h -> ggml-alloc.h
|
# include/ggml/ggml-alloc.h -> ggml-alloc.h
|
||||||
# include/ggml/ggml-backend.h -> ggml-backend.h
|
# include/ggml/ggml-backend.h -> ggml-backend.h
|
||||||
|
@ -123,6 +129,8 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||||
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
|
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
|
||||||
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
|
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
|
||||||
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
|
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
|
||||||
|
-e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \
|
||||||
|
-e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
|
||||||
-e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
|
-e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
|
||||||
-e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
|
-e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
|
||||||
-e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
|
-e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
|
||||||
|
@ -131,6 +139,10 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||||
-e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
|
-e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
|
||||||
-e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
|
-e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
|
||||||
-e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
|
-e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
|
||||||
|
-e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \
|
||||||
|
-e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \
|
||||||
|
-e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \
|
||||||
|
-e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \
|
||||||
-e 's/include\/ggml\/ggml\.h/ggml.h/g' \
|
-e 's/include\/ggml\/ggml\.h/ggml.h/g' \
|
||||||
-e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
|
-e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
|
||||||
-e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
|
-e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
475cbad5c1c834e31e26a2283bc1413181644360
|
2c7cf49810d523b9632da393a9e8270b60bf3b24
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue