fix swappable mem in termux

This commit is contained in:
Lizonghang 2024-12-12 15:15:16 +04:00
parent fa05a482f8
commit b642d70188
3 changed files with 24 additions and 13 deletions

View file

@ -101,7 +101,7 @@ uint32_t device_cpu_cores() {
static float device_flops(struct llama_model * model, enum ggml_type src0t, enum ggml_type src1t, enum profiler_backend_type btype, int n_threads) { static float device_flops(struct llama_model * model, enum ggml_type src0t, enum ggml_type src1t, enum profiler_backend_type btype, int n_threads) {
int n_repeat = 1; int n_repeat = 1;
int n_embd = std::min(llama_n_embd(model), 4096); int n_embd = std::min(llama_n_embd(model), 4096);
if (btype == PROFILER_BACKEND_TYPE_CPU) n_embd /= 8; // simulate small tensor calculation on cpu // if (btype == PROFILER_BACKEND_TYPE_CPU) n_embd /= 8; // simulate small tensor calculation on cpu
std::vector<float> matrix_A(n_embd * n_embd, 1.0f); std::vector<float> matrix_A(n_embd * n_embd, 1.0f);
std::vector<float> matrix_B(n_embd * n_embd, 1.0f / n_embd); std::vector<float> matrix_B(n_embd * n_embd, 1.0f / n_embd);
@ -1381,6 +1381,13 @@ static uint64_t device_termux_swappable_memory() {
return total_swappable; return total_swappable;
} }
uint64_t device_swappable_memory() {
if (access("/data/data/com.termux/files/usr/bin", F_OK) == 0) {
return device_termux_swappable_memory();
}
return 0;
}
static float device_disk_access_delay(struct device_info & dev_info, struct llama_model * model, const struct llama_context_params cparams) { static float device_disk_access_delay(struct device_info & dev_info, struct llama_model * model, const struct llama_context_params cparams) {
auto n_bytes = dev_info.model_bytes; auto n_bytes = dev_info.model_bytes;
int n_layers = llama_model_n_layers(model); int n_layers = llama_model_n_layers(model);
@ -1463,18 +1470,12 @@ static float device_disk_access_delay(struct device_info & dev_info, struct llam
if (getenv("TERMUX_VERSION") != NULL) { if (getenv("TERMUX_VERSION") != NULL) {
// termux on android: swap has higher priority than releasing mmap // termux on android: swap has higher priority than releasing mmap
// non-app memory that can be swapped to disk // non-app memory that can be swapped to disk
float used_mem_can_swap = (float)(static_cast<double>(device_termux_swappable_memory()) / 1024.0 / 1024.0 / 1024.0);
float swapout_gib = std::min( float swapout_gib = std::min(
std::min(0.0f, total_mem_needed - dev_info.memory.available_physical), std::max(0.0f, total_mem_needed - dev_info.memory.available_physical),
std::min(used_mem_can_swap, dev_info.memory.available_swap) std::min(dev_info.memory.used_can_swap, dev_info.memory.available_swap)
); );
float disk_write_bw = dev_info.disk.write_seq_bw * 1e9 / 1024.0 / 1024.0 / 1024.0;
float swapout_delay = swapout_gib / disk_write_bw * 1000; // ms
float mmapin_gib = total_mem_needed - (dev_info.memory.available_physical + swapout_gib); float mmapin_gib = total_mem_needed - (dev_info.memory.available_physical + swapout_gib);
float mmapin_delay = mmapin_gib / disk_read_bw * 1000; // ms return mmapin_gib / disk_read_bw * 1000; // ms
return swapout_delay + mmapin_delay;
} else { } else {
// if this linux not in termux env, use sequantial read bandwidth // if this linux not in termux env, use sequantial read bandwidth
// POSIX_FADV_SEQUENTIAL is set on linux // POSIX_FADV_SEQUENTIAL is set on linux
@ -1592,6 +1593,12 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
} }
LOG_INF("\n"); LOG_INF("\n");
LOG_INF("| Used Mem Swappable (GiB) ");
for (int i = 0; i < n; ++i) {
LOG_INF("| %-10.2f ", dev_info_set[i].memory.used_can_swap);
}
LOG_INF("\n");
LOG_INF("| Swap Mem Total (GiB) "); LOG_INF("| Swap Mem Total (GiB) ");
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
LOG_INF("| %-10.2f ", dev_info_set[i].memory.total_swap); LOG_INF("| %-10.2f ", dev_info_set[i].memory.total_swap);

View file

@ -37,6 +37,7 @@ struct cpu_props {
struct memory_info { struct memory_info {
float total_physical; // in GiB float total_physical; // in GiB
float available_physical; // in GiB float available_physical; // in GiB
float used_can_swap; // in GiB
float total_swap; // in GiB float total_swap; // in GiB
float available_swap; // in GiB float available_swap; // in GiB
float cpu_read_ram_bw; // in GB/s float cpu_read_ram_bw; // in GB/s
@ -44,6 +45,7 @@ struct memory_info {
memory_info() : memory_info() :
total_physical (0.0f), total_physical (0.0f),
available_physical(0.0f), available_physical(0.0f),
used_can_swap (0.0f),
total_swap (0.0f), total_swap (0.0f),
available_swap (0.0f), available_swap (0.0f),
cpu_read_ram_bw (0.0f) {} cpu_read_ram_bw (0.0f) {}
@ -251,6 +253,7 @@ float device_cuda_flops (struct llama_model * model, enum ggml_type sr
float device_inp_embd_delay (struct llama_model * model, enum ggml_type src0t, int n_tokens, int n_threads); float device_inp_embd_delay (struct llama_model * model, enum ggml_type src0t, int n_tokens, int n_threads);
uint64_t device_physical_memory (bool available); uint64_t device_physical_memory (bool available);
uint64_t device_swap_memory (bool available); uint64_t device_swap_memory (bool available);
uint64_t device_swappable_memory ();
void device_disk_seq_bw (float * read_seq_bw, float * write_seq_bw, int n_threads); void device_disk_seq_bw (float * read_seq_bw, float * write_seq_bw, int n_threads);
void device_disk_rnd_bw (float * read_rnd_bw, float * write_rnd_bw, int n_threads); void device_disk_rnd_bw (float * read_rnd_bw, float * write_rnd_bw, int n_threads);
float device_memory_bw (int n_thread); float device_memory_bw (int n_thread);

View file

@ -3583,6 +3583,7 @@ void llama_profile_device(
dev_info->memory.total_physical = round(device_physical_memory(false) / (double)(1 << 30) * 100) / 100; dev_info->memory.total_physical = round(device_physical_memory(false) / (double)(1 << 30) * 100) / 100;
dev_info->memory.available_physical = round(device_physical_memory(true) / (double)(1 << 30) * 100) / 100; dev_info->memory.available_physical = round(device_physical_memory(true) / (double)(1 << 30) * 100) / 100;
dev_info->memory.used_can_swap = round(device_swappable_memory() / (double)(1 << 30) * 100) / 100;
dev_info->memory.total_swap = round(device_swap_memory(false) / (double)(1 << 30) * 100) / 100; dev_info->memory.total_swap = round(device_swap_memory(false) / (double)(1 << 30) * 100) / 100;
dev_info->memory.available_swap = round(device_swap_memory(true) / (double)(1 << 30) * 100) / 100; dev_info->memory.available_swap = round(device_swap_memory(true) / (double)(1 << 30) * 100) / 100;
dev_info->memory.cpu_read_ram_bw = device_memory_bw(n_threads); dev_info->memory.cpu_read_ram_bw = device_memory_bw(n_threads);