Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/docker.yml
#	CMakeLists.txt
#	CONTRIBUTING.md
#	docs/android.md
#	docs/docker.md
#	examples/embedding/embedding.cpp
#	examples/imatrix/imatrix.cpp
#	examples/infill/infill.cpp
#	examples/llama-bench/llama-bench.cpp
#	examples/main/README.md
#	examples/parallel/parallel.cpp
#	examples/perplexity/perplexity.cpp
#	examples/quantize-stats/quantize-stats.cpp
#	examples/save-load-state/save-load-state.cpp
#	examples/server/README.md
#	examples/simple/CMakeLists.txt
#	examples/speculative/speculative.cpp
#	flake.lock
#	ggml/src/CMakeLists.txt
#	ggml/src/ggml-blas.cpp
#	pocs/vdot/q8dot.cpp
#	pocs/vdot/vdot.cpp
#	scripts/debug-test.sh
#	scripts/sync-ggml.last
#	src/llama.cpp
#	tests/test-backend-ops.cpp
#	tests/test-chat-template.cpp
#	tests/test-quantize-fns.cpp
#	tests/test-quantize-perf.cpp
#	tests/test-tokenizer-0.cpp
#	tests/test-tokenizer-1-bpe.cpp
#	tests/test-tokenizer-1-spm.cpp
This commit is contained in:
Concedo 2024-10-11 11:59:59 +08:00
commit e692a79aab
61 changed files with 2579 additions and 1949 deletions

View file

@ -1070,10 +1070,25 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor
try {
buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
} catch (const vk::SystemError& e) {
// Out of Host/Device memory, clean up buffer
device->device.destroyBuffer(buf->buffer);
buf->size = 0;
throw e;
if (buf->memory_property_flags != fallback_flags) {
// Try again with fallback flags
memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags);
buf->memory_property_flags = fallback_flags;
try {
buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index });
}
catch (const vk::SystemError& e) {
device->device.destroyBuffer(buf->buffer);
buf->size = 0;
throw e;
}
} else {
// Out of Host/Device memory, clean up buffer
device->device.destroyBuffer(buf->buffer);
buf->size = 0;
throw e;
}
}
buf->ptr = nullptr;
@ -5272,9 +5287,9 @@ static void ggml_vk_dequantize_data(const void * from, float * to, size_t ne, gg
return;
}
ggml_type_traits_t tt = ggml_internal_get_type_traits(quant);
const auto * tt = ggml_get_type_traits(quant);
ggml_to_float_t dequant_fn = tt.to_float;
ggml_to_float_t dequant_fn = tt->to_float;
dequant_fn(from, to, ne);
}