prima.cpp

mirror of https://github.com/Lizonghang/prima.cpp.git synced 2025-09-07 17:09:03 +00:00

Author	SHA1	Message	Date
Lizonghang	8e9ab45458	fix model bytes counter	2024-12-10 14:57:48 +04:00
Zonghang Li	2d79554694	fix device_physical_memory	2024-12-09 14:42:49 +04:00
Zonghang Li	4104eb5b7a	fix std::max val type	2024-12-09 13:36:53 +04:00
Zonghang Li	a11e0dd0fa	fix device_cgroup_physical_memory in docker container	2024-12-09 13:34:01 +04:00
Lizonghang	d78fa427e7	add memory copy speed test	2024-12-09 10:07:42 +04:00
Lizonghang	1aee5bd6da	hide metal log if not debug	2024-12-09 09:17:49 +04:00
Lizonghang	cead0f2e5a	device_disk_access_delay: adapt for linux (on termux)	2024-12-08 22:58:08 +04:00
Lizonghang	2c2171cebf	fix display	2024-12-08 22:57:12 +04:00
Lizonghang	aad604ec8b	add disk access delay while swapping	2024-12-08 20:07:30 +04:00
Zonghang Li	f5243571d1	use sequential vram read test	2024-12-08 10:14:52 +04:00
Zonghang Li	d2dc8fd3b8	set CUDA_READ_BLOCK_SIZE to 512	2024-12-08 10:14:20 +04:00
Zonghang Li	df813675d0	fix flops count and ram/vram speed test	2024-12-08 10:14:05 +04:00
Lizonghang	26c2ffb5b7	set fio read-job startdelay 1.5s	2024-12-06 17:03:42 +04:00
Lizonghang	a79ab3dde3	device_disk_access_delay: fix delay calculation when physical memory is busy (all mapped tensors will be reloaded)	2024-12-06 16:41:55 +04:00
Lizonghang	cd823546dd	llama_profile_device: add arg n_predict	2024-12-06 16:37:25 +04:00
Lizonghang	a46d56cc60	llama_model_n_flops: remove ctxs	2024-12-06 11:31:53 +04:00
Lizonghang	f1c1d1b929	add support for Q5_K and fix byte count for Q6_K	2024-12-06 07:59:45 +04:00
Lizonghang	e6f4c009ab	device_disk_access_delay: add delay for loading one row from lookup table	2024-12-05 20:38:28 +04:00
Lizonghang	4fd3b6679e	fix n_gpu_layers	2024-12-05 20:28:19 +04:00
Lizonghang	3c8dbd11fc	device_compute_delay: compatible with the shared memory in Metal	2024-12-05 20:02:13 +04:00
Lizonghang	e74967c488	throw error when using async upload, will support later	2024-12-05 14:18:39 +04:00
Zonghang Li	5a99c5894d	update test model, enable warm-up and sched	2024-12-05 09:52:16 +04:00
Lizonghang	b9538f048a	add sched in device_flops but disabled by default	2024-12-04 23:54:57 +04:00
Zonghang Li	7521e532c4	device_memory_bw: simulate cache-friendly block access and multi-threading	2024-12-04 15:36:59 +04:00
Zonghang Li	44b4718c8b	recover device_memory_access_delay	2024-12-04 12:29:24 +04:00
Zonghang Li	89aad7880e	fix sysctlbyname on Linux and Win	2024-12-04 12:24:07 +04:00
Lizonghang	585864b05a	add inactive_memory in Mac UMA	2024-12-04 11:56:09 +04:00
Lizonghang	99d48157a0	compatible with Mac ARM64 and x86_64 arch, and ignore memory access delay	2024-12-04 11:33:46 +04:00
Zonghang Li	6d7801de87	fix available_memory calculation	2024-12-04 10:36:43 +04:00
Lizonghang	75a86e4eb1	use internal_mem - purgeable_mem to calculate the available_mem	2024-12-04 10:24:54 +04:00
Lizonghang	bd5e034746	set fio engine to sync if posixaio not loadable	2024-12-04 00:55:50 +04:00
Lizonghang	508c8ae83c	set fio engine to sync if posixaio not loadable	2024-12-04 00:52:01 +04:00
Lizonghang	c3d708862c	use disk read_rnd_bw by default	2024-12-04 00:37:59 +04:00
Lizonghang	7cd4936d58	rename to CPU RAM Read BW	2024-12-04 00:04:22 +04:00
Lizonghang	dc03b6216f	set default page size to 4KB if not available from system	2024-12-03 23:25:47 +04:00
Lizonghang	74dbec5086	set default readahead size to 128KB if permission is needed	2024-12-03 22:55:16 +04:00
Zonghang Li	1a7237636e	add cgroup version detect	2024-11-30 09:48:19 +04:00
Zonghang Li	3074763ed4	fix memory detect in docker container	2024-11-30 09:36:01 +04:00
Zonghang Li	eb25858e87	remove cpus_allowed_policy in fio	2024-11-29 22:16:32 +04:00
Zonghang Li	7e4bb65eab	fix bugs on cpu	2024-11-29 22:06:02 +04:00
Zonghang Li	81fd77033e	add gpu support in device_memory_access_delay	2024-11-29 21:56:01 +04:00
Lizonghang	6f54a12c7d	add gpu support in llama_model_kvcache_size and llama_model_compute_buf_size	2024-11-29 21:06:32 +04:00
Lizonghang	f8e9dc2713	add GPU support in device_compute_delay and device_disk_access_delay	2024-11-29 20:21:22 +04:00
Lizonghang	f8bcf38bfe	fix var name overwrite	2024-11-29 20:00:17 +04:00
Lizonghang	1123c00e45	recover comments in llama_profile_device	2024-11-29 19:07:07 +04:00
Lizonghang	68ecabc8c3	add cpu_read_ram_bw, metal_read_vram_bw, cuda_read_vram_bw	2024-11-29 19:04:53 +04:00
Lizonghang	0a6ffe68e0	add kernel kernel_read_vram	2024-11-29 17:15:36 +04:00
Lizonghang	639a33dfa5	add op GGML_OP_READ and GGML_METAL_KERNEL_TYPE_READ_VRAM	2024-11-29 17:15:19 +04:00
Lizonghang	6934d52d8a	graph->nodes[i] => node	2024-11-29 17:14:25 +04:00
Lizonghang	ce45587ea9	correct GB to GiB	2024-11-29 11:20:19 +04:00

1 2 3 4 5 ...

3993 commits