diff --git a/expose.cpp b/expose.cpp
index 0d42e2a57..526538ce5 100644
--- a/expose.cpp
+++ b/expose.cpp
@@ -362,14 +362,14 @@ extern "C"
     }
 
     static std::string detokenized_str = ""; //just share a static object for detokenizing
-    const char * detokenize(const token_count_outputs input)
+    const char * detokenize(const detokenize_inputs input)
     {
         std::vector<int> input_arr;
         for(int i=0;i<input.count;++i)
         {
             input_arr.push_back(input.ids[i]);
         }
-        detokenized_str = gpttype_detokenize(input_arr,false);
+        detokenized_str = gpttype_detokenize(input_arr,input.special);
         return detokenized_str.c_str();
     }
 
diff --git a/expose.h b/expose.h
index 695771a9f..ff3d3d6d7 100644
--- a/expose.h
+++ b/expose.h
@@ -158,6 +158,12 @@ struct token_count_outputs
     int count = 0;
     int * ids; //we'll just use shared memory for this one, bit of a hack
 };
+struct detokenize_inputs
+{
+    int count = 0;
+    int * ids; //we'll just use shared memory for this one, bit of a hack
+    bool special = false;
+};
 
 struct logprob_item {
     int option_count;
diff --git a/koboldcpp.py b/koboldcpp.py
index 43d57409d..3ff430a35 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -221,6 +221,11 @@ class token_count_outputs(ctypes.Structure):
     _fields_ = [("count", ctypes.c_int),
                 ("ids", ctypes.POINTER(ctypes.c_int))]
 
+class detokenize_inputs(ctypes.Structure):
+    _fields_ = [("count", ctypes.c_int),
+                ("ids", ctypes.POINTER(ctypes.c_int)),
+                ("special", ctypes.c_bool)]
+
 # returns top 5 logprobs per token
 class logprob_item(ctypes.Structure):
      _fields_ = [("option_count", ctypes.c_int),
@@ -969,7 +974,7 @@ def init_library():
     handle.music_generate.argtypes = [music_generation_inputs]
     handle.music_generate.restype = music_generation_outputs
     handle.last_logprobs.restype = last_logprobs_outputs
-    handle.detokenize.argtypes = [token_count_outputs]
+    handle.detokenize.argtypes = [detokenize_inputs]
     handle.detokenize.restype = ctypes.c_char_p
     handle.set_environment_variable.restype = ctypes.c_int
     handle.set_environment_variable.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
@@ -3037,12 +3042,13 @@ def tokenize_ids(countprompt,tcaddspecial):
     countdata = [rawcountdata.ids[i] for i in range(countlimit)]
     return countdata
 
-def detokenize_ids(tokids):
+def detokenize_ids(tokids,addspecial):
     tokidslen = len(tokids)
     detokstr = ""
     if tokidslen > 0 and tokidslen < 65536:
-        inputs = token_count_outputs()
+        inputs = detokenize_inputs()
         inputs.count = tokidslen
+        inputs.special = addspecial
         inputs.ids = (ctypes.c_int * tokidslen)()
         for i, cid in enumerate(tokids):
             inputs.ids[i] = cid
@@ -4235,7 +4241,7 @@ ws ::= | " " | "\n" [ \t]{0,20}
         assistant_message_start = adapter_obj.get("assistant_start", "\n\n### Response:\n")
         assistant_message_gen = adapter_obj.get("assistant_gen", assistant_message_start)
         try:
-            detokstr = detokenize_ids(tokids)
+            detokstr = detokenize_ids(tokids,True)
         except Exception as e:
             utfprint("Ollama Context Error: " + str(e))
         ollamasysprompt = genparams.get('system', "")
@@ -6026,7 +6032,8 @@ Change Mode<br>
             try:
                 genparams = json.loads(body)
                 tokids = genparams.get('ids', [])
-                detokstr = detokenize_ids(tokids)
+                addspecial = genparams.get('special', True)
+                detokstr = detokenize_ids(tokids,addspecial)
                 response_body = (json.dumps({"result": detokstr,"success":True}).encode())
             except Exception as e:
                 utfprint("Detokenize Error: " + str(e))