clean and rename old clblast files in preparation for merge

2025-09-11 09:34:37 +00:00 · 2024-12-15 15:29:02 +08:00 · 2024-12-15 15:29:02 +08:00 · 1e07043a6e
commit 1e07043a6e
parent a577015425
6 changed files with 247 additions and 294 deletions
--- a/2
+++ b/2
@ -578,7 +578,7 @@ ggml_v1_failsafe.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
 	$(CC)  $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@

 #opencl
-ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
+ggml-opencl.o: otherarch/ggml_v3b-opencl.cpp otherarch/ggml_v3b-opencl.h
 	$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
 ggml_v2-opencl.o: otherarch/ggml_v2-opencl.cpp otherarch/ggml_v2-opencl.h
 	$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@ -87,7 +87,7 @@
 #include <Accelerate/Accelerate.h>
 #endif
 #if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
-#include "ggml-opencl.h"
+#include "ggml_v3b-opencl.h"
 #endif

 // floating point type used to accumulate sums
--- a/klite.embd
+++ b/klite.embd
@ -4125,7 +4125,9 @@ Current version indicated by LITEVER below.
 	const xtts_gen_endpoint = "/tts_to_audio/";
 	const xtts_voices_endpoint = "/speakers_list";
 	const alltalk_gen_endpoint = "/api/tts-generate";
+	const alltalk_stream_endpoint = "/api/tts-generate-streaming";
 	const alltalk_voices_endpoint = "/api/voices";
+	const alltalk_rvc_voices_endpoint = "/api/rvcvoices";

 	//support for quick news updates
 	const horde_news_endpoint = "https://hordenews.concedo.workers.dev"
@ -4181,7 +4183,7 @@ Current version indicated by LITEVER below.
 	var current_wi = []; //each item stores a wi object.
 	var wi_insertlocation = 0; //after memory
 	var wi_searchdepth = 0; //search everything
-	var generateimagesinterval = 700; //if generated images is enabled, it will trigger after every 700 new characters in context.
+	var generateimagesinterval = 750; //if generated images is enabled, it will trigger after every 700 new characters in context.
 	var nextgeneratedimagemilestone = generateimagesinterval; //used to keep track of when to generate the next image
 	var image_db = {}; //stores a dictionary of pending images
 	var interrogation_db = {};
@ -5052,7 +5054,11 @@ Current version indicated by LITEVER below.
 		const foundChub = urlParams.get('chub');
 		const foundPyg = urlParams.get('pyg');
 		const foundAicc = urlParams.get('aicc');
-		const foundQuery = urlParams.get('query');
+		let foundQuery = urlParams.get('query');
+		if (!foundQuery || foundQuery == "")
+		{
+			foundQuery = urlParams.get('q');
+		}

 		if (foundStory && foundStory != "") {
 			if (localsettings.persist_session && !safe_to_overwrite()) {
@ -8054,9 +8060,18 @@ Current version indicated by LITEVER below.
 			let style = (elem.trusted ? "style=\"color:#dd77ff;\"" : "");
 			let brokenstyle = (elem.maintenance_mode ? "style=\"color:#ee4444;\"" : "");
 			let workerNameHtml = escapeHtml(elem.name.substring(0, 40));
+			let clickinfo = "";
 			if(elem.info && elem.info!="")
 			{
-				workerNameHtml = "<a class=\"color_blueurl\" href=\"#\" onclick=\"msgbox(\'"+escapeHtml(replaceAll(elem.info,"\'","\\\'"))+"\','Worker Info',false,false,hide_msgbox)\">"+workerNameHtml+"</a>";
+				clickinfo += escapeHtml(replaceAll(elem.info,"\'","\\\'"));
+			}
+			if(elem.threads>1)
+			{
+				clickinfo += (clickinfo==""?"":"<br><br>") + "Threads: " + elem.threads;
+			}
+			if(clickinfo!="")
+			{
+				workerNameHtml = "<a class=\"color_blueurl\" href=\"#\" onclick=\"msgbox(\'"+clickinfo+"\','Worker Info',false,false,hide_msgbox)\">"+workerNameHtml+"</a>";
 			}
 			let allmdls = "";
 			for (let n = 0; n < elem.models.length; ++n) {
@ -11653,7 +11668,7 @@ Current version indicated by LITEVER below.
 	function do_auto_gen_image(truncated_context)
 	{
 		var tclen = truncated_context.length;
-		var sentence = truncated_context.substring(tclen - 380, tclen);
+		var sentence = truncated_context.substring(tclen - 400, tclen);
 		sentence = start_trim_to_sentence(sentence);
 		sentence = end_trim_to_sentence(sentence,true);
 		if (sentence.length > 0) {
@ -11827,6 +11842,28 @@ Current version indicated by LITEVER below.
 					//alltalk mode
 					data = data.voices;
 				}
+				else if(data && !data.length && data.constructor == Object)
+				{
+					//hybrid new xtts mantella
+					let newdata = [];
+					for(key in data)
+					{
+						let lang = data[key];
+						if(lang && lang.speakers && lang.speakers.length>0)
+						{
+							for(let i=0;i<lang.speakers.length;++i)
+							{
+								newdata.push(lang.speakers[i]);
+							}
+						}
+					}
+					if(newdata.length > 0)
+					{
+						data = newdata;
+					}
+				}
+
+
 				let dropdown = document.getElementById("xtts_voices");
 					let selectionhtml = ``;
 					for (var i = 0; i < data.length; ++i) {
@ -11864,46 +11901,92 @@ Current version indicated by LITEVER below.
 	{
 		document.getElementById("xtts_container").classList.add("hidden");
 		document.getElementById("oai_tts_container").classList.add("hidden");
-		if(document.getElementById("ttsselect").value==XTTS_ID || document.getElementById("ttsselect").value==ALLTALK_ID)
-		{
+		document.getElementById("alltalk_specific_controls").classList.add("hidden");
+
+		const selectedTTS = document.getElementById("ttsselect").value;
+
+		if(selectedTTS == XTTS_ID || selectedTTS == ALLTALK_ID) {
 			document.getElementById("xtts_container").classList.remove("hidden");
-			fetch_xtts_voices(true, document.getElementById("ttsselect").value==XTTS_ID);
+
+			if(selectedTTS == ALLTALK_ID) {
+				document.getElementById("alltalk_specific_controls").classList.remove("hidden");
+				fetch_rvc_voices();
+				adjust_alltalk_controls();
 			}
-		else if(document.getElementById("ttsselect").value==OAI_TTS_ID)
-		{
+			fetch_xtts_voices(true, selectedTTS == XTTS_ID);
+		}
+		else if(selectedTTS == OAI_TTS_ID) {
 			document.getElementById("oai_tts_container").classList.remove("hidden");
 		}
 	}
-	function set_xtts_url()
+
+	// Fetch RVC voices for AllTalk
+	function fetch_rvc_voices()
 	{
+		if(!xtts_is_connected) //prevent it from constantly fetching, will only fetch once before connecting
+		{
+			fetch(localsettings.saved_alltalk_url + alltalk_rvc_voices_endpoint)
+			.then(response => response.json())
+			.then(data => {
+				console.log("RVC voices response:", data); // Debug log
+				const rvcSelect = document.getElementById("alltalk_rvc_voice");
+				rvcSelect.innerHTML = '<option value="Disabled">Disabled</option>';
+				if (data.status === "success" && Array.isArray(data.rvcvoices)) {  // Changed from data.voices to data.rvcvoices
+					data.rvcvoices.forEach(voice => {  // Changed from data.voices to data.rvcvoices
+						if (voice !== "Disabled") {
+							const option = document.createElement("option");
+							option.value = voice;
+							option.textContent = voice.split("\\").pop().replace(".pth", "");
+							rvcSelect.appendChild(option);
+						}
+					});
+				}
+			})
+			.catch(error => {
+				console.log("Error fetching RVC voices:", error);
+			});
+		}
+	}
+
+	//single callback to update alltalk controls on any alltalk UI event.
+	function adjust_alltalk_controls() {
+		const pitchSlider = document.getElementById("alltalk_rvc_pitch");
+		const pitchValue = document.getElementById("alltalk_rvc_pitch_value");
+		pitchValue.textContent = pitchSlider.value;
+		const streamingMode = (document.getElementById("alltalk_streaming").checked ? true : false);
+		const rvcSelect = document.getElementById("alltalk_rvc_voice");
+		const rvcPitch = document.getElementById("alltalk_rvc_pitch");
+		rvcSelect.disabled = streamingMode;
+		rvcPitch.disabled = streamingMode;
+	}
+
+	// Update set_xtts_url to use the new fetch_rvc_voices function
+	function set_xtts_url() {
 		let is_xtts = (document.getElementById("ttsselect").value==XTTS_ID);
 		let epname = (is_xtts?"XTTS":"AllTalk");
 		inputBox("Enter "+epname+" API Server URL.",epname+" API Server URL",(is_xtts?localsettings.saved_xtts_url:localsettings.saved_alltalk_url),"Input "+epname+" API Server URL", ()=>{
 			let userinput = getInputBoxValue();
 			userinput = userinput.trim();
-			if(userinput!="" && userinput.slice(-1)=="/")
-			{
+			if(userinput!="" && userinput.slice(-1)=="/") {
 				userinput = userinput.slice(0, -1);
 			}
-			if(userinput=="")
-			{
+			if(userinput=="") {
 				userinput = (is_xtts?default_xtts_base:default_alltalk_base);
 			}
 			if (userinput != null && userinput!="") {
-				if(is_xtts)
-				{
-					localsettings.saved_xtts_url = userinput.trim();
-				}
-				else
-				{
-					localsettings.saved_alltalk_url = userinput.trim();
-				}
-
 				xtts_is_connected = false;
+				if(is_xtts) {
+					localsettings.saved_xtts_url = userinput.trim();
+				} else {
+					localsettings.saved_alltalk_url = userinput.trim();
+					// Fetch RVC voices with new URL
+					fetch_rvc_voices();
+				}
 				fetch_xtts_voices(false, is_xtts);
 			}
 		},false);
 	}
+
 	function tts_speak(text, speech_synth_override=null)
 	{
 		if(!text || text=="" || text.trim()=="")
@ -12019,35 +12102,18 @@ Current version indicated by LITEVER below.
 							},300);
 						};
 					}).catch((error) => {
+						xtts_is_playing = false;
+						update_submit_button(false);
 						console.log("XTTS Speak Error: " + error);
 					});
 				}
 				else
 				{
 					//alltalk
-					const formData = new FormData();
-					formData.append("text_input", text); // max 2000 chars
-					formData.append("text_filtering", "none"); // (none|standard|html)
-					formData.append("character_voice_gen", document.getElementById("xtts_voices").value);
-					formData.append("narrator_enabled", false);
-					formData.append("narrator_voice_gen", document.getElementById("xtts_voices").value);
-					formData.append("text_not_inside", "character"); // character or narrator, determines which to use
-					formData.append("language", document.getElementById("xtts_lang").value.trim().toLowerCase());
-					formData.append("output_file_name", "audiofile"); // NOTE: file name only, with no extension and no dashes!
-					formData.append("output_file_timestamp", true);
-					formData.append("autoplay", false); //to play in browser
-					formData.append("autoplay_volume", 1.0); // (0.1..2.0)
-					formData.append("streaming", true); // unknown why
+					const isStreaming = (document.getElementById("alltalk_streaming").checked ? true : false);

-					fetch(localsettings.saved_alltalk_url + alltalk_gen_endpoint, {
-						method: 'POST',
-						body: formData, // send payload as FormData
-					})
-					.then(response => response.arrayBuffer())
-					.then(data => {
-						return audioContext.decodeAudioData(data);
-					})
-					.then(decodedData => {
+					let playDecodedAllTalkData = function(decodedData)
+					{
 						const playSound = audioContext.createBufferSource();
 						playSound.buffer = decodedData;
 						playSound.connect(audioContext.destination);
@ -12061,9 +12127,109 @@ Current version indicated by LITEVER below.
 								console.log("Audio finished playing");
 							},300);
 						};
-					}).catch((error) => {
-						console.log("AllTalk Speak Error: " + error);
+					}
+
+					if (isStreaming) {
+						// Create a URLSearchParams object for streaming
+						const params = new URLSearchParams({
+							text: text,
+							voice: document.getElementById("xtts_voices").value,
+							language: document.getElementById("xtts_lang").value.trim().toLowerCase(),
+							output_file: "klite_stream_output.wav",
 						});
+
+						// Create streaming URL, but right now it's as good as sync
+						const streamingUrl = `${localsettings.saved_alltalk_url}${alltalk_stream_endpoint}?${params.toString()}`;
+						fetch(streamingUrl)
+						.then(response => response.arrayBuffer())
+						.then(data => {
+							return audioContext.decodeAudioData(data);
+						})
+						.then(decodedData => {
+							playDecodedAllTalkData(decodedData);
+						})
+						.catch((error) => {
+							console.log("AllTalk v2 Speak Error:", data);
+							xtts_is_playing = false;
+							update_submit_button(false);
+						});
+
+					} else {
+						// Standard mode using FormData
+						const formData = new FormData();
+						formData.append("text_input", text);
+						formData.append("text_filtering", "none");
+						formData.append("character_voice_gen", document.getElementById("xtts_voices").value);
+						formData.append("narrator_enabled", false);
+						formData.append("narrator_voice_gen", document.getElementById("xtts_voices").value);
+						formData.append("text_not_inside", "character");
+						formData.append("language", document.getElementById("xtts_lang").value.trim().toLowerCase());
+						formData.append("output_file_name", "audiofile");
+						formData.append("output_file_timestamp", true);
+						formData.append("autoplay", false);
+						formData.append("autoplay_volume", 1.0);
+						formData.append("rvccharacter_voice_gen", document.getElementById("alltalk_rvc_voice").value);
+						formData.append("rvccharacter_pitch", document.getElementById("alltalk_rvc_pitch").value);
+						formData.append("rvcnarrator_voice_gen", document.getElementById("alltalk_rvc_voice").value);
+						formData.append("rvcnarrator_pitch", document.getElementById("alltalk_rvc_pitch").value);
+
+						fetch(localsettings.saved_alltalk_url + alltalk_gen_endpoint, {
+							method: 'POST',
+							body: formData, // send payload as FormData
+						}).then(response => {
+							//content type can be JSON (alltalk v2) or raw audio (v1)
+							const contentType = response.headers.get("Content-Type");
+							//alltalk v2 json
+							if (contentType && contentType.toLowerCase().includes("application/json"))
+							{
+								return response.json().then(data => {
+									if (data && data.output_file_url && data.status === "generate-success")
+									{
+										const audioUrl = `${localsettings.saved_alltalk_url}${data.output_file_url}`;
+										fetch(audioUrl)
+										.then(response => response.arrayBuffer())
+										.then(data => {
+											return audioContext.decodeAudioData(data);
+										})
+										.then(decodedData => {
+											playDecodedAllTalkData(decodedData);
+										})
+										.catch((error) => {
+											console.log("AllTalk v2 Speak Error:", data);
+											xtts_is_playing = false;
+											update_submit_button(false);
+										});
+									} else {
+										console.log("AllTalk Generation Error:", data);
+										xtts_is_playing = false;
+										update_submit_button(false);
+									}
+								})
+								.catch((error) => {
+									console.log("AllTalk Request Error:", error);
+									xtts_is_playing = false;
+									update_submit_button(false);
+								});
+							}
+							else //alltalk v1 audio
+							{
+								return response.arrayBuffer().then(data => {
+									return audioContext.decodeAudioData(data);
+								})
+								.then(decodedData => {
+									playDecodedAllTalkData(decodedData);
+								}).catch((error) => {
+									console.log("AllTalk v1 Speak Error: " + error);
+									xtts_is_playing = false;
+									update_submit_button(false);
+								});
+							}
+						}).catch((error) => {
+							console.log("AllTalk Non-Stream Req Error: " + error);
+							xtts_is_playing = false;
+							update_submit_button(false);
+						});
+					}
 				}
 			}
 		}
@ -15880,7 +16046,7 @@ Current version indicated by LITEVER below.
 					whorun = "<br>You're using the Cohere API";
 				}
 				else {
-					whorun = `<br>There are <span class="color_orange">${selected_models.reduce((s, a) => s + a.count, 0)}</span> <a class="color_green mainnav" href="#" tabindex="${mainmenu_is_untab?`-1`:`0`}" onclick="get_and_show_workers()">volunteer(s)</a> running selected models with a total queue length of <span class="color_orange">${selected_models.reduce((s, a) => s + a.queued, 0)}</span> tokens`;
+					whorun = `<br>Horde <a class="color_green mainnav" href="#" tabindex="${mainmenu_is_untab?`-1`:`0`}" onclick="get_and_show_workers()">Volunteer(s)</a> are running <span class="color_orange">${selected_models.reduce((s, a) => s + a.count, 0)} threads</span> for selected models with a total queue length of <span class="color_orange">${selected_models.reduce((s, a) => s + a.queued, 0)}</span> tokens`;
 				}
 				let nowmode = (localsettings.opmode==1?"Story Mode":(localsettings.opmode==2?"Adventure Mode":(localsettings.opmode==3?"Chat Mode":"Instruct Mode")));
 				let selmodelstr = "";
@ -19562,6 +19728,7 @@ Current version indicated by LITEVER below.
 							</select>
 							<button id="test_tts" type="button" class="bg_green btn btn-primary" style="height:20px; width:30px; padding:2px 3px;font-size:11px; margin-left: 2px;" onclick="test_tts()">Test</button>
 							<div id="xtts_container" class="settinglabel hidden">
+								<div>
 									<table width="100%"><tr>
 									<td><button id="xtts_url" type="button" class="btn btn-primary" style="width:100%; padding:2px 3px;margin-top:2px;font-size:11px;" onclick="set_xtts_url()">Set URL</button></td>
 									<td><select class="form-control" id="xtts_voices" style="font-size:12px;height:20px;padding:0;margin:0px 0 0;">
@ -19570,6 +19737,26 @@ Current version indicated by LITEVER below.
 									</tr><tr style="font-size:12px;padding:2px;margin:0px 0 0;"><td>Language </td><td><input class="settinglabel miniinput" type="text" value="EN" id="xtts_lang" style="margin-left:3px; height:18px; width: 40px; padding: 2px;"></td></tr>
 									</table>
 								</div>
+								<div id="alltalk_specific_controls" style="width:100%;font-size: 11px;" class="settinglabel hidden">
+									<div>
+										<div class="justifyleft" style="padding:2px"  title="AllTalk Streaming">Audio Streaming </div>
+										<input title="AllTalk Streaming" onchange="adjust_alltalk_controls();" type="checkbox" id="alltalk_streaming" style="margin:0px 0px 0px auto;">
+									</div>
+									<div>
+										<div>RVC Voice</div>
+										<select class="form-control" id="alltalk_rvc_voice" style="font-size:12px;height:20px;padding:0;margin:0px 0 0;width:100%;">
+										<option value="Disabled">Disabled</option>
+										</select>
+									</div>
+									<div>
+										<div>RVC Pitch</div>
+										<div style="display:flex;align-items:center;">
+										<input oninput="adjust_alltalk_controls();" type="range" id="alltalk_rvc_pitch" min="-24" max="24" value="0" style="flex:1;height:20px;">
+										<span id="alltalk_rvc_pitch_value" style="margin-left:5px;font-size:12px;">0</span>
+										</div>
+									</div>
+								</div>
+							</div>
 							<div id="oai_tts_container" class="settinglabel hidden">
 								<table width="100%"><tr>
 								<td><button type="button" class="btn btn-primary" style="width:100%; padding:2px 3px;margin-top:2px;font-size:11px;" onclick="set_oai_tts_url()">Set URL</button></td>
--- a/otherarch/ggml_v3b-opencl.cpp
+++ b/otherarch/ggml_v3b-opencl.cpp
@ -1,5 +1,5 @@
 #include "ggml.h"
-#include "ggml-opencl.h"
+#include "ggml_v3b-opencl.h"
 #include "ggml-backend-impl.h"
 #include "ggml-cpu.h"

@ -2012,224 +2012,3 @@ void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
    tensor->extra = dst;
    GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
 }
-
-// ggml-backend
-
-// buffer
-
-struct ggml_backend_opencl_buffer_context {
-    ~ggml_backend_opencl_buffer_context() {
-        if (buffer) {
-            clReleaseMemObject(buffer);
-        }
-        for (auto * sub_buffer : sub_buffers) {
-            clReleaseMemObject(sub_buffer);
-        }
-    }
-
-    cl_mem buffer;
-    std::vector<cl_mem> sub_buffers;
-};
-
-static void * const cl_ptr_base = (void *)(uintptr_t) 0x1000;
-
-static const char * ggml_backend_opencl_buffer_get_name(ggml_backend_buffer_t buffer) {
-    return "OpenCL";
-
-    GGML_UNUSED(buffer);
-}
-
-static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
-    ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
-    delete ctx;
-}
-
-static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) {
-    return cl_ptr_base;
-
-    GGML_UNUSED(buffer);
-}
-
-static void ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
-    if (tensor->view_src != NULL && tensor->view_offs == 0) {
-        tensor->extra = tensor->view_src->extra;
-    } else {
-        ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
-        cl_buffer_region region = {(size_t)((char *)tensor->data - (char *)cl_ptr_base), ggml_nbytes(tensor)};
-        cl_int err;
-        cl_mem sub_buffer = clCreateSubBuffer(ctx->buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &err);
-        CL_CHECK(err);
-        ctx->sub_buffers.push_back(sub_buffer);
-        tensor->extra = sub_buffer;
-    }
-    tensor->backend = GGML_BACKEND_TYPE_GPU;
-}
-
-static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
-    cl_mem tensor_buffer = (cl_mem) tensor->extra;
-    CL_CHECK(clEnqueueWriteBuffer(queue, tensor_buffer, true, offset, size, data, 0, NULL, NULL));
-    CL_CHECK(clFinish(queue));
-
-    GGML_UNUSED(buffer);
-}
-
-static void ggml_backend_opencl_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
-    cl_mem tensor_buffer = (cl_mem) tensor->extra;
-    CL_CHECK(clEnqueueReadBuffer(queue, tensor_buffer, true, offset, size, data, 0, NULL, NULL));
-    CL_CHECK(clFinish(queue));
-
-    GGML_UNUSED(buffer);
-}
-
-static void ggml_backend_opencl_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
-    ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
-    CL_CHECK(clEnqueueFillBuffer(queue, ctx->buffer, &value, sizeof(value), 0, buffer->size, 0, NULL, NULL));
-    CL_CHECK(clFinish(queue));
-}
-
-static void ggml_backend_opencl_buffer_reset(ggml_backend_buffer_t buffer) {
-    ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
-    for (auto * sub_buffer : ctx->sub_buffers) {
-        clReleaseMemObject(sub_buffer);
-    }
-    ctx->sub_buffers.clear();
-}
-
-static ggml_backend_buffer_i ggml_backend_opencl_buffer_interface = {
-    /* .free_buffer     = */ ggml_backend_opencl_buffer_free_buffer,
-    /* .get_base        = */ ggml_backend_opencl_buffer_get_base,
-    /* .init_tensor     = */ ggml_backend_opencl_buffer_init_tensor,
-    /* .memset_tensor   = */ NULL,
-    /* .set_tensor      = */ ggml_backend_opencl_buffer_set_tensor,
-    /* .get_tensor      = */ ggml_backend_opencl_buffer_get_tensor,
-    /* .cpy_tensor      = */ NULL,
-    /* .clear           = */ ggml_backend_opencl_buffer_clear,
-    /* .reset           = */ ggml_backend_opencl_buffer_reset,
-};
-
-// buffer type
-
-static const char * ggml_backend_opencl_buffer_type_name(ggml_backend_buffer_type_t buffer_type) {
-    return "OpenCL";
-
-    GGML_UNUSED(buffer_type);
-}
-
-static ggml_backend_buffer_t ggml_backend_opencl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buffer_type, size_t size) {
-    ggml_cl_init();
-
-    cl_int err;
-    cl_mem mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &err);
-    if (err != CL_SUCCESS) {
-        fprintf(stderr, "%s: failed to allocate %.2f MiB\n", __func__, size / 1024.0 / 1024.0);
-        return nullptr;
-    }
-
-    ggml_backend_opencl_buffer_context * ctx = new ggml_backend_opencl_buffer_context{mem, {}};
-
-    return ggml_backend_buffer_init(buffer_type, ggml_backend_opencl_buffer_interface, ctx, size);
-}
-
-static size_t ggml_backend_opencl_buffer_type_get_alignment(ggml_backend_buffer_type_t buffer_type) {
-    // FIXME: not thread safe, device may not be initialized yet
-    static cl_uint alignment = -1;
-    if (alignment == (cl_uint)-1) {
-        ggml_cl_init();
-        clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &alignment, NULL);
-        alignment /= 8; // bits to bytes
-    }
-    return alignment;
-
-    GGML_UNUSED(buffer_type);
-}
-
-static size_t ggml_backend_opencl_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) {
-    static size_t max_size = -1;
-    if (max_size == (size_t)-1) {
-        ggml_cl_init();
-        clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_size, NULL);
-    }
-    return max_size;
-}
-
-static bool ggml_backend_opencl_buffer_type_supports_backend(ggml_backend_buffer_type_t buffer_type, ggml_backend_t backend) {
-    //return ggml_backend_is_opencl(backend); // opencl must be used through the cpu backend
-    return ggml_backend_is_cpu(backend);
-
-    GGML_UNUSED(buffer_type);
-}
-
-static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
-    /* .get_name         = */ ggml_backend_opencl_buffer_type_name,
-    /* .alloc_buffer     = */ ggml_backend_opencl_buffer_type_alloc_buffer,
-    /* .get_alignment    = */ ggml_backend_opencl_buffer_type_get_alignment,
-    /* .get_max_size     = */ ggml_backend_opencl_buffer_type_get_max_size,
-    /* .get_alloc_size   = */ NULL,
-    /* .is_host          = */ NULL,
-};
-
-
-ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type() {
-    static ggml_backend_buffer_type buffer_type = {
-        /* .iface   = */ ggml_backend_opencl_buffer_type_interface,
-        /* .context = */ nullptr,
-    };
-
-    return &buffer_type;
-}
-
-#if 0
-// host buffer type
-
-static const char * ggml_backend_opencl_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
-    return "CL_Host";
-
-    GGML_UNUSED(buft);
-}
-
-static const char * ggml_backend_opencl_host_buffer_name(ggml_backend_buffer_t buffer) {
-    return "CL_Host";
-
-    GGML_UNUSED(buffer);
-}
-
-static void ggml_backend_opencl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
-    ggml_cl_host_free(buffer->context);
-}
-
-static ggml_backend_buffer_t ggml_backend_opencl_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
-    void * ptr = ggml_cl_host_malloc(size);
-
-    if (ptr == nullptr) {
-        // fallback to cpu buffer
-        return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
-    }
-
-    ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
-    buffer->buft = buft;
-    buffer->iface.get_name = ggml_backend_opencl_host_buffer_name;
-    buffer->iface.free_buffer = ggml_backend_opencl_host_buffer_free_buffer;
-
-    return buffer;
-}
-
-ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type() {
-    static struct ggml_backend_buffer_type ggml_backend_opencl_buffer_type_host = {
-        /* .iface    = */ {
-            /* .get_name         = */ ggml_backend_opencl_host_buffer_type_name,
-            /* .alloc_buffer     = */ ggml_backend_opencl_host_buffer_type_alloc_buffer,
-            /* .get_alignment    = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
-            /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
-            /* .get_alloc_size   = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
-            /* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend,
-            /* .is_host          = */ ggml_backend_cpu_buffer_type()->iface.is_host,
-        },
-        /* .context  = */ nullptr,
-    };
-
-    return &ggml_backend_opencl_buffer_type_host;
-}
-
-// backend
-
-#endif
--- a/otherarch/ggml_v3b-opencl.h
+++ b/otherarch/ggml_v3b-opencl.h
@ -16,22 +16,9 @@ GGML_API bool   ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struc
 GGML_API size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
 GGML_API void   ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);

-// GGML_API void * ggml_cl_host_malloc(size_t size);
-// GGML_API void   ggml_cl_host_free(void * ptr);
-
 GGML_API void ggml_cl_free_data(const struct ggml_tensor* tensor);
-
 GGML_API void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);

-// backend API
-
-// GGML_API ggml_backend_t ggml_backend_opencl_init(void);
-
-// GGML_API bool ggml_backend_is_opencl(ggml_backend_t backend);
-
-GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void);
-// GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void);
-
 #ifdef  __cplusplus
 }
 #endif
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -15,7 +15,7 @@
 #ifdef GGML_USE_CUDA
 #  include "ggml-cuda.h"
 #elif defined(GGML_USE_CLBLAST)
-#  include "ggml-opencl.h"
+#  include "ggml_v3b-opencl.h"
 #endif