mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/bench.yml.disabled # Makefile # README.md # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # ggml/src/ggml-vulkan.cpp
This commit is contained in:
commit
1edf83761a
49 changed files with 110670 additions and 109313 deletions
|
@ -111,8 +111,34 @@ int32_t cpu_get_num_physical_cores() {
|
|||
if (result == 0) {
|
||||
return num_physical_cores;
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
//TODO: Implement
|
||||
#elif defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
|
||||
// TODO: windows + arm64 + mingw64
|
||||
unsigned int n_threads_win = std::thread::hardware_concurrency();
|
||||
unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4;
|
||||
|
||||
DWORD buffer_size = 0;
|
||||
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
|
||||
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
||||
return default_threads;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<char> buffer(buffer_size);
|
||||
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
|
||||
return default_threads;
|
||||
}
|
||||
|
||||
int32_t num_physical_cores = 0;
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
|
||||
while (buffer_size > 0) {
|
||||
if (info->Relationship == RelationProcessorCore) {
|
||||
num_physical_cores += info->Processor.GroupCount;
|
||||
}
|
||||
buffer_size -= info->Size;
|
||||
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
|
||||
}
|
||||
|
||||
return num_physical_cores > 0 ? num_physical_cores : default_threads;
|
||||
#endif
|
||||
unsigned int n_threads = std::thread::hardware_concurrency();
|
||||
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
||||
|
@ -1728,7 +1754,13 @@ std::string gpt_params_get_system_info(const gpt_params & params) {
|
|||
if (params.n_threads_batch != -1) {
|
||||
os << " (n_threads_batch = " << params.n_threads_batch << ")";
|
||||
}
|
||||
#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
|
||||
// TODO: windows + arm64 + mingw64
|
||||
DWORD logicalProcessorCount = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
|
||||
os << " / " << logicalProcessorCount << " | " << llama_print_system_info();
|
||||
#else
|
||||
os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info();
|
||||
#endif
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
@ -2703,12 +2735,6 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
|
|||
return text;
|
||||
}
|
||||
|
||||
bool llama_should_add_bos_token(const llama_model * model) {
|
||||
const int add_bos = llama_add_bos_token(model);
|
||||
|
||||
return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
||||
}
|
||||
|
||||
//
|
||||
// Chat template utils
|
||||
//
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue