diff --git a/expose.cpp b/expose.cpp
index ec2506c49..89089844f 100644
--- a/expose.cpp
+++ b/expose.cpp
@@ -277,11 +277,11 @@ extern "C"
     }
 
     static std::vector<int> toks; //just share a static object for token counting
-    token_count_outputs token_count(const char * input)
+    token_count_outputs token_count(const char * input, bool addbos)
     {
         std::string inputstr = input;
         token_count_outputs output;
-        toks = gpttype_get_token_arr(inputstr);
+        toks = gpttype_get_token_arr(inputstr,addbos);
         output.count = toks.size();
         output.ids = toks.data(); //this may be slightly unsafe
         return output;
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 97aabe545..ba4a33563 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -798,10 +798,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
     kcpp_params->n_threads_batch = inputs.blasthreads;
     bool isGguf = (file_format == FileFormat::GGUF_GENERIC);
     kcpp_params->n_batch = GetBatchSize(inputs.blasbatchsize, in_file_format);
-    if(kcpp_params->n_batch>512)
-    {
-        kcpp_params->n_ubatch = (kcpp_params->n_batch>1024?1024:kcpp_params->n_batch);
-    }
+    kcpp_params->n_ubatch = kcpp_params->n_batch;
     kcpp_params->flash_attn = inputs.flash_attention;
     modelname = kcpp_params->model = inputs.model_filename;
     useSmartContext = inputs.use_smartcontext;
@@ -1544,7 +1541,7 @@ bool gpttype_generate_abort()
     return true;
 }
 
-std::vector<int> gpttype_get_token_arr(const std::string & input)
+std::vector<int> gpttype_get_token_arr(const std::string & input, bool addbos)
 {
     std::vector<int> toks;
     if(kcpp_params==nullptr)
@@ -1556,7 +1553,7 @@ std::vector<int> gpttype_get_token_arr(const std::string & input)
     {
         printf("\nFileFormat: %d, Tokenizing: %s",file_format ,input.c_str());
     }
-    TokenizeString(input, toks, file_format);
+    TokenizeString(input, toks, file_format,addbos);
     int tokcount = toks.size();
     if(debugmode==1)
     {
diff --git a/klite.embd b/klite.embd
index 824d1cd53..93b69c72d 100644
--- a/klite.embd
+++ b/klite.embd
@@ -7022,7 +7022,7 @@ Current version: 145
 							{
 								key = parseInt(tokarr[x]);
 								val = parseInt(val);
-								if (!isNaN(key) && key!=1) {
+								if (!isNaN(key)) {
 									dict[key] = parseInt(val);
 								}
 							}
@@ -12839,7 +12839,8 @@ Current version: 145
 	function kcpp_tokenize(prompt,onDone)
 	{
 		let payload = {
-			"prompt": prompt
+			"prompt": prompt,
+			"special": false,
 		};
 		fetch(apply_proxy_url(custom_kobold_endpoint + koboldcpp_tokenize_endpoint), {
 			method: 'POST',
diff --git a/koboldcpp.py b/koboldcpp.py
index 2a5544af5..03763c550 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -1352,7 +1352,8 @@ Enter Prompt:<br>
             try:
                 genparams = json.loads(body)
                 countprompt = genparams.get('prompt', "")
-                rawcountdata = handle.token_count(countprompt.encode("UTF-8"))
+                tcaddspecial = genparams.get('special', True)
+                rawcountdata = handle.token_count(countprompt.encode("UTF-8"),tcaddspecial)
                 countlimit = rawcountdata.count if (rawcountdata.count>=0 and rawcountdata.count<50000) else 0
                 # the above protects the server in case the count limit got corrupted
                 countdata = [rawcountdata.ids[i] for i in range(countlimit)]
diff --git a/model_adapter.h b/model_adapter.h
index 7629a3f30..0ba7582bb 100644
--- a/model_adapter.h
+++ b/model_adapter.h
@@ -77,7 +77,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
 generation_outputs gpttype_generate(const generation_inputs inputs);
 bool gpttype_generate_abort();
 const std::string & gpttype_get_pending_output();
-std::vector<int> gpttype_get_token_arr(const std::string & input);
+std::vector<int> gpttype_get_token_arr(const std::string & input, bool addbos);
 
 bool sdtype_load_model(const sd_load_model_inputs inputs);
 sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs);