prima.cpp

mirror of https://github.com/Lizonghang/prima.cpp.git synced 2025-09-10 00:44:35 +00:00

Author	SHA1	Message	Date
Lizonghang	585864b05a	add inactive_memory in Mac UMA	2024-12-04 11:56:09 +04:00
Lizonghang	99d48157a0	compatible with Mac ARM64 and x86_64 arch, and ignore memory access delay	2024-12-04 11:33:46 +04:00
Zonghang Li	6d7801de87	fix available_memory calculation	2024-12-04 10:36:43 +04:00
Lizonghang	75a86e4eb1	use internal_mem - purgeable_mem to calculate the available_mem	2024-12-04 10:24:54 +04:00
Lizonghang	bd5e034746	set fio engine to sync if posixaio not loadable	2024-12-04 00:55:50 +04:00
Lizonghang	508c8ae83c	set fio engine to sync if posixaio not loadable	2024-12-04 00:52:01 +04:00
Lizonghang	c3d708862c	use disk read_rnd_bw by default	2024-12-04 00:37:59 +04:00
Lizonghang	7cd4936d58	rename to CPU RAM Read BW	2024-12-04 00:04:22 +04:00
Lizonghang	dc03b6216f	set default page size to 4KB if not available from system	2024-12-03 23:25:47 +04:00
Lizonghang	74dbec5086	set default readahead size to 128KB if permission is needed	2024-12-03 22:55:16 +04:00
Zonghang Li	1a7237636e	add cgroup version detect	2024-11-30 09:48:19 +04:00
Zonghang Li	3074763ed4	fix memory detect in docker container	2024-11-30 09:36:01 +04:00
Zonghang Li	eb25858e87	remove cpus_allowed_policy in fio	2024-11-29 22:16:32 +04:00
Zonghang Li	7e4bb65eab	fix bugs on cpu	2024-11-29 22:06:02 +04:00
Zonghang Li	81fd77033e	add gpu support in device_memory_access_delay	2024-11-29 21:56:01 +04:00
Lizonghang	6f54a12c7d	add gpu support in llama_model_kvcache_size and llama_model_compute_buf_size	2024-11-29 21:06:32 +04:00
Lizonghang	f8e9dc2713	add GPU support in device_compute_delay and device_disk_access_delay	2024-11-29 20:21:22 +04:00
Lizonghang	f8bcf38bfe	fix var name overwrite	2024-11-29 20:00:17 +04:00
Lizonghang	1123c00e45	recover comments in llama_profile_device	2024-11-29 19:07:07 +04:00
Lizonghang	68ecabc8c3	add cpu_read_ram_bw, metal_read_vram_bw, cuda_read_vram_bw	2024-11-29 19:04:53 +04:00
Lizonghang	0a6ffe68e0	add kernel kernel_read_vram	2024-11-29 17:15:36 +04:00
Lizonghang	639a33dfa5	add op GGML_OP_READ and GGML_METAL_KERNEL_TYPE_READ_VRAM	2024-11-29 17:15:19 +04:00
Lizonghang	6934d52d8a	graph->nodes[i] => node	2024-11-29 17:14:25 +04:00
Lizonghang	ce45587ea9	correct GB to GiB	2024-11-29 11:20:19 +04:00
Lizonghang	0f73d12247	decrease compute buf from available memory	2024-11-29 11:15:54 +04:00
Lizonghang	329d084061	add illustration for memory allocation of activations	2024-11-29 10:34:21 +04:00
Lizonghang	45a1e55eec	reduce kv cache from available memory	2024-11-28 20:21:21 +04:00
Lizonghang	9858d90ce4	get system readahead size automatically	2024-11-28 16:18:41 +04:00
Lizonghang	9a7bbce7ad	fix t_load_us	2024-11-28 15:55:21 +04:00
Lizonghang	740f7f0b95	use multithread disk r/w test	2024-11-27 22:14:17 +04:00
Lizonghang	f7507ec20b	fix disk r/w test, add disk access latency, and correct units (GB, GiB)	2024-11-27 21:36:12 +04:00
Lizonghang	9cd22177d0	remove arg test_file	2024-11-27 21:34:45 +04:00
Lizonghang	0a91ad3edc	fix cuda compatibility errors	2024-11-26 22:35:58 +04:00
Zonghang Li	3f008f2ad9	add op GGML_OP_READ	2024-11-26 22:28:34 +04:00
Zonghang Li	f78c437172	add device_inp_embd_delay test, device_memory_bw test, device_cuda_memory_bw test,	2024-11-26 22:28:02 +04:00
Lizonghang	a7a95b53fe	add q80xf32 and count_n_params	2024-11-24 23:11:12 +04:00
Lizonghang	3fe00a16a0	count model flops for f32xf32, f16xf32, q4kxf32, q6kxf32	2024-11-24 13:13:32 +04:00
Lizonghang	a5ba34169a	add f32, f16, q4k_f32, q6k_f32 flops test and fix duplicate inp_embd in subgraphs	2024-11-23 21:36:34 +04:00
Zonghang Li	7ee1423006	add model_flops	2024-11-21 20:06:16 +04:00
Zonghang Li	80f6b72e71	remove device_flops from profiler api	2024-11-21 08:37:57 +04:00
Lizonghang	477ecf2084	add llama_model_n_flops	2024-11-20 19:40:27 +04:00
Lizonghang	10f6f92c7e	add f32, f16, q8, q4k speed test for cuda	2024-11-10 23:41:13 +04:00
Lizonghang	f4260bb346	add device_flops() for cpu, metal, and cuda	2024-11-10 23:11:05 +04:00
Lizonghang	5fae6ac36f	add cpu flops test	2024-11-09 20:53:42 +04:00
Lizonghang	2bd4d03aa8	add automatic layer window size assignment workflow	2024-11-08 18:21:03 +04:00
Lizonghang	53cb3a6069	synchronize device info	2024-11-07 22:02:01 +04:00
Lizonghang	ef7fdf70cc	add LLAMA_API llama_profile_device	2024-11-07 09:30:39 +04:00
Zonghang Li	b922418cca	convert MB to GB	2024-11-06 20:47:17 +04:00
Lizonghang	407c71ae52	add cpu and gpu profile	2024-11-06 20:42:28 +04:00
Lizonghang	4e1be1065d	add memory speed test	2024-11-06 10:57:30 +04:00

... 4 5 6 7 8 ...

4217 commits