diff --git a/agent.py b/agent.py
index 8acb509d1..f653a9be9 100644
--- a/agent.py
+++ b/agent.py
@@ -1,41 +1,57 @@
+import json
 import time, importlib, inspect
-from typing import Optional, Dict
-from tools.helpers import extract_tools, rate_limiter, files
+import traceback
+from typing import Optional, Dict, TypedDict
+from tools.helpers import extract_tools, rate_limiter, files, errors
 from tools.helpers.print_style import PrintStyle
 from langchain.schema import AIMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain_core.messages import HumanMessage
+from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.embeddings import Embeddings
+
+# rate_limit = rate_limiter.rate_limiter(30,160000) #TODO! implement properly
 
-rate_limit = rate_limiter.rate_limiter(30,160000) #TODO! implement properly
 
 class Agent:
 
     paused=False
     streaming_agent=None
-
-    @staticmethod
-    def configure(model_chat, model_embedding, memory_subdir="", memory_results=3):
-
-        #save configuration
-        Agent.model_chat = model_chat
-
-        # initialize memory tool
-        from tools import memory_tool
-        memory_tool.initialize(
-            embeddings_model=model_embedding, 
-            messages_returned=memory_results, 
-            subdir=memory_subdir )
     
-    def __init__(self, system_prompt:Optional[str]=None, tools_prompt:Optional[str]=None, number=0):
-  
-        self.number = number
-        self.name = f"Agent {self.number}"
+    def __init__(self,
+                agent_number: int,
+                chat_llm:BaseChatModel,
+                embeddings_model:Embeddings,
+                memory_subdir: str = "",
+                auto_memory_count: int = 3,
+                auto_memory_skip: int = 2,
+                rate_limit_seconds: int = 60,
+                rate_limit_input_tokens: int = 0,
+                rate_limit_output_tokens: int = 0,
+                msgs_keep_max: int =25,
+                msgs_keep_start: int =5,
+                msgs_keep_end: int =10,
+                **kwargs):
 
-        if system_prompt is None: system_prompt = files.read_file("./prompts/agent.system.md")     
-        if tools_prompt is None: tools_prompt = files.read_file("./prompts/agent.tools.md")
-        self.system_prompt = system_prompt.replace("{", "{{").replace("}", "}}")
-        self.tools_prompt = tools_prompt.replace("{", "{{").replace("}", "}}")
+        # agent config
+        self.agent_number = agent_number
+        self.chat_model = chat_llm
+        self.embeddings_model = embeddings_model
+        self.memory_subdir = memory_subdir
+        self.auto_memory_count = auto_memory_count
+        self.auto_memory_skip = auto_memory_skip
+        self.rate_limit_seconds = rate_limit_seconds
+        self.rate_limit_input_tokens = rate_limit_input_tokens
+        self.rate_limit_output_tokens = rate_limit_output_tokens
+        self.msgs_keep_max = msgs_keep_max
+        self.msgs_keep_start = msgs_keep_start
+        self.msgs_keep_end = msgs_keep_end
+
+        # non-config vars
+        self.agent_name = f"Agent {self.agent_number}"
+
+        self.system_prompt = files.read_file("./prompts/agent.system.md").replace("{", "{{").replace("}", "}}")
+        self.tools_prompt = files.read_file("./prompts/agent.tools.md").replace("{", "{{").replace("}", "}}")
 
         self.history = []
         self.last_message = ""
@@ -43,64 +59,67 @@ class Agent:
         self.intervention_status = False
 
         self.data = {} # free data object all the tools can use
-                
-        self.prompt = ChatPromptTemplate.from_messages([
-            ("system", self.system_prompt + "\n\n" + self.tools_prompt),
-            MessagesPlaceholder(variable_name="messages") ])
 
     def message_loop(self, msg: str):
         try:
             printer = PrintStyle(italic=True, font_color="#b3ffd9", padding=False)    
             user_message = files.read_file("./prompts/fw.user_message.md", message=msg)
             self.append_message(user_message, human=True) # Append the user's input to the history                        
-    
-            self.stop_loop = False
-            self.loop_result = []
-            
+            memories = self.fetch_memories(True)
+                
             while True: # let the agent iterate on his thoughts until he stops by using a tool
                 Agent.streaming_agent = self #mark self as current streamer
                 agent_response = ""
                 self.intervention_status = False # reset interventon status
+
                 try:
 
+                    system = self.system_prompt + "\n\n" + self.tools_prompt
+                    memories = self.fetch_memories()
+                    if memories: system+= "\n\n"+memories
+
+                    prompt = ChatPromptTemplate.from_messages([
+                        SystemMessage(content=system),
+                        MessagesPlaceholder(variable_name="messages") ])
+                    
                     inputs = {"messages": self.history}
-                    chain = self.prompt | Agent.model_chat
-                    formatted_inputs = self.prompt.format(**inputs)
+                    chain = prompt | self.chat_model
+                    formatted_inputs = prompt.format(messages=self.history)
                 
-                    rate_limit(len(formatted_inputs)/4) #wait for rate limiter - A helpful rule of thumb is that one token generally corresponds to ~4 characters of text for common English text. This translates to roughly ¾ of a word (so 100 tokens ~= 75 words).
+                    # rate_limit(len(formatted_inputs)/4) #wait for rate limiter - A helpful rule of thumb is that one token generally corresponds to ~4 characters of text for common English text. This translates to roughly ¾ of a word (so 100 tokens ~= 75 words).
 
                     # output that the agent is starting
-                    PrintStyle(bold=True, font_color="green", padding=True, background_color="white").print(f"{self.name}: Starting a message:")
+                    PrintStyle(bold=True, font_color="green", padding=True, background_color="white").print(f"{self.agent_name}: Starting a message:")
                                             
                     for chunk in chain.stream(inputs):
-
                         if self.handle_intervention(agent_response): break # wait for intervention and handle it, if paused
-                         
-                        if chunk.content is not None and chunk.content != '':
-                            printer.stream(chunk.content) # output the agent response stream                
-                            agent_response += chunk.content # type: ignore | concatenate stream into the response
+
+                        if isinstance(chunk, str): content = chunk
+                        elif hasattr(chunk, "content"): content = str(chunk.content)
+                        else: content = str(chunk)
+                        
+                        if content:
+                            printer.stream(content) # output the agent response stream                
+                            agent_response += content # concatenate stream into the response
             
                     if not self.handle_intervention(agent_response):
-                        #if assistant_response is the same as last message in history, let him know
-                        if self.last_message == agent_response:
-                            agent_response = files.read_file("./prompts/fw.msg_repeat.md")
-                            PrintStyle(font_color="orange", padding=True).print(agent_response)
-                        self.last_message = agent_response
-                        
-                        self.append_message(agent_response) # Append the assistant's response to the history
+                        if self.last_message == agent_response: #if assistant_response is the same as last message in history, let him know
+                            self.append_message(agent_response) # Append the assistant's response to the history
+                            warning_msg = files.read_file("./prompts/fw.msg_repeat.md")
+                            self.append_message(warning_msg, human=True) # Append warning message to the history
+                            PrintStyle(font_color="orange", padding=True).print(warning_msg)
 
-                        tools_result = self.process_tools(agent_response) # process tools requested in agent message
-                        if tools_result: return tools_result #break the execution if the task is done
+                        else: #otherwise proceed with tool
+                            self.append_message(agent_response) # Append the assistant's response to the history
+                            tools_result = self.process_tools(agent_response) # process tools requested in agent message
+                            if tools_result: return tools_result #break the execution if the task is done
 
                 # Forward errors to the LLM, maybe he can fix them
                 except Exception as e:
-                    msg_response = files.read_file("./prompts/fw.error.md", error=str(e)) # error message template
+                    error_message = errors.format_error(e)
+                    msg_response = files.read_file("./prompts/fw.error.md", error=error_message) # error message template
                     self.append_message(msg_response, human=True)
                     PrintStyle(font_color="red", padding=True).print(msg_response)
-                finally:
-                    if self.get_last_message().type=="ai": #type: ignore
-                        user_message = files.read_file("./prompts/fw.msg_continue.md")
-                        PrintStyle(font_color="yellow", padding=False).print(user_message)
                     
         finally:
             Agent.streaming_agent = None # unset current streamer
@@ -118,25 +137,74 @@ class Agent:
         else:
             new_message = HumanMessage(content=msg) if human else AIMessage(content=msg)
             self.history.append(new_message)
-            self.cleanup_history(5, 10)
+            self.cleanup_history(self.msgs_keep_max, self.msgs_keep_start, self.msgs_keep_end)
         if message_type=="ai":
             self.last_message = msg
 
+    def concat_messages(self,messages):
+        return "\n".join([f"{msg.type}: {msg.content}" for msg in messages])
+
+    def send_adhoc_message(self, system: str, msg: str, output_label:str):
+        prompt = ChatPromptTemplate.from_messages([
+            SystemMessage(content=system),
+            HumanMessage(content=msg)])
+
+        chain = prompt | self.chat_model
+        response = ""
+        printer = None
+
+        if output_label:
+            PrintStyle(bold=True, font_color="orange", padding=True, background_color="white").print(f"{self.agent_name}: {output_label}:")
+            printer = PrintStyle(italic=True, font_color="orange", padding=False)                
+            
+        for chunk in chain.stream({}):
+            if self.handle_intervention(response): break # wait for intervention and handle it, if paused
+
+            if isinstance(chunk, str): content = chunk
+            elif hasattr(chunk, "content"): content = str(chunk.content)
+            else: content = str(chunk)
+
+            if printer: printer.stream(content)
+            response+=content
+
+        return response
+            
     def get_last_message(self):
         if self.history:
             return self.history[-1]
 
-    def cleanup_history(self,x, y):
-        if len(self.history) <= x + y:
-            return self.history
-        
-        first_x = self.history[:x]
-        last_y = self.history[-y:]
-
+    def replace_middle_messages(self,middle_messages):
         cleanup_prompt = files.read_file("./prompts/fw.msg_cleanup.md")
-        middle_values = [AIMessage(content=cleanup_prompt)]
-        
-        self.history = first_x + middle_values + last_y
+        summary = self.send_adhoc_message(system=cleanup_prompt,msg=self.concat_messages(middle_messages), output_label="Mid messages cleanup summary")
+        new_human_message = HumanMessage(content=summary)
+        return [new_human_message]
+
+    def cleanup_history(self, max:int, keep_start:int, keep_end:int):
+        if len(self.history) <= max:
+            return self.history
+
+        first_x = self.history[:keep_start]
+        last_y = self.history[-keep_end:]
+
+        # Identify the middle part
+        middle_part = self.history[keep_start:-keep_end]
+
+        # Ensure the first message in the middle is "human", if not, move one message back
+        if middle_part and middle_part[0].type != "human":
+            if len(first_x) > 0:
+                middle_part.insert(0, first_x.pop())
+
+        # Ensure the middle part has an odd number of messages
+        if len(middle_part) % 2 == 0:
+            middle_part = middle_part[:-1]
+
+        # Replace the middle part using the replacement function
+        new_middle_part = self.replace_middle_messages(middle_part)
+
+        self.history = first_x + new_middle_part + last_y
+
+        return self.history
+
 
     def handle_intervention(self, progress:str="") -> bool:
         while self.paused: time.sleep(0.1) # wait if paused
@@ -151,10 +219,12 @@ class Agent:
     def process_tools(self, msg: str):
         # search for tool usage requests in agent message
         tool_request = extract_tools.json_parse_dirty(msg)
+        tool_name = tool_request.get("tool_name", "")
+        tool_args = tool_request.get("tool_args", {})
 
         tool = self.get_tool(
-                    tool_request["tool_name"],
-                    tool_request["tool_args"],
+                    tool_name,
+                    tool_args,
                     msg)
             
         if self.handle_intervention(): return # wait if paused and handle intervention message if needed
@@ -179,4 +249,23 @@ class Agent:
                     tool_class = cls[1]
                     break
 
-        return tool_class(agent=self, name=name, args=args, message=message, **kwargs)
\ No newline at end of file
+        return tool_class(agent=self, name=name, args=args, message=message, **kwargs)
+
+    def fetch_memories(self,reset_skip=False):
+        if reset_skip: self.memory_skip_counter = 0
+
+        if self.memory_skip_counter > 0:
+            self.memory_skip_counter-=1
+            return ""
+        else:
+            self.memory_skip_counter = self.auto_memory_skip
+            from tools import memory_tool
+            messages = self.concat_messages(self.history)
+            memories = memory_tool.process_query(self,messages,"load")
+            input = {
+                "conversation_history" : messages,
+                "raw_memories": memories
+            }
+            cleanup_prompt = files.read_file("./prompts/msg.memory_cleanup.md").replace("{", "{{")       
+            clean_memories = self.send_adhoc_message(cleanup_prompt,json.dumps(input), output_label="Memory cleanup summary")
+            return clean_memories
\ No newline at end of file
diff --git a/main.py b/main.py
index 6e361f328..01e254bc0 100644
--- a/main.py
+++ b/main.py
@@ -23,23 +23,18 @@ def chat():
     # chat_llm = models.get_openai_gpt4o(temperature=0)
     # chat_llm = models.get_anthropic_opus(temperature=0)
     # chat_llm = models.get_anthropic_sonnet(temperature=0)
-    chat_llm = models.get_anthropic_haiku(temperature=0)
+    chat_llm = models.get_anthropic_sonnet_35(temperature=0)
+    # chat_llm = models.get_anthropic_haiku(temperature=0)
     # chat_llm = models.get_ollama_dolphin()
 
     # embedding model used for memory
     # embedding_llm = models.get_embedding_openai()
     embedding_llm = models.get_embedding_hf()
-
-    # initial configuration
-    Agent.configure(
-            model_chat = chat_llm, 
-            model_embedding = embedding_llm,
-            #memory_subdir=""
-            #memory_results=3
-            )
     
     # create the first agent
-    agent0 = Agent()
+    agent0 = Agent(agent_number=0,
+                   chat_llm=chat_llm,
+                   embeddings_model=embedding_llm)
 
     # start the conversation loop  
     while True:
@@ -73,7 +68,7 @@ def chat():
         assistant_response = agent0.message_loop(user_input)
         
         # print agent0 response
-        PrintStyle(font_color="white",background_color="#1D8348", bold=True, padding=True).print(f"{agent0.name}: reponse:")        
+        PrintStyle(font_color="white",background_color="#1D8348", bold=True, padding=True).print(f"{agent0.agent_name}: reponse:")        
         PrintStyle(font_color="white").print(f"{assistant_response}")        
                         
 
diff --git a/models.py b/models.py
index 14587282c..2174eaebd 100755
--- a/models.py
+++ b/models.py
@@ -22,6 +22,11 @@ def get_anthropic_haiku(api_key=None, temperature=DEFAULT_TEMPERATURE):
     api_key = api_key or get_api_key("anthropic")
     return ChatAnthropic(model_name="claude-3-haiku-20240307", temperature=temperature, api_key=api_key) # type: ignore
 
+def get_anthropic_sonnet_35(api_key=None, temperature=DEFAULT_TEMPERATURE):
+    api_key = api_key or get_api_key("anthropic")
+    return ChatAnthropic(model_name="claude-3-5-sonnet-20240620", temperature=temperature, api_key=api_key) # type: ignore
+
+
 def get_anthropic_sonnet(api_key=None, temperature=DEFAULT_TEMPERATURE):
     api_key = api_key or get_api_key("anthropic")
     return ChatAnthropic(model_name="claude-3-sonnet-20240229", temperature=temperature, api_key=api_key) # type: ignore
@@ -68,10 +73,10 @@ def get_groq_gemma(api_key=None, temperature=DEFAULT_TEMPERATURE):
     return ChatGroq(model_name="gemma-7b-it", temperature=temperature, api_key=api_key) # type: ignore
 
 def get_ollama_dolphin(api_key=None, temperature=DEFAULT_TEMPERATURE):
-    return Ollama(model="dolphin-llama3:8b-256k-v2.9-fp16")
+    return Ollama(model="dolphin-llama3:8b-256k-v2.9-fp16", temperature=temperature)
 
 def get_ollama_phi(api_key=None, temperature=DEFAULT_TEMPERATURE):
-    return Ollama(model="phi3:3.8b-mini-instruct-4k-fp16")
+    return Ollama(model="phi3:3.8b-mini-instruct-4k-fp16",temperature=temperature)
 
 def get_embedding_hf(model_name="sentence-transformers/all-MiniLM-L6-v2"):
     return HuggingFaceEmbeddings(model_name=model_name)
diff --git a/prompts/agent.memory.md b/prompts/agent.memory.md
new file mode 100644
index 000000000..c768be129
--- /dev/null
+++ b/prompts/agent.memory.md
@@ -0,0 +1,5 @@
+# Memories
+- following are your memories on the current topic
+- you may find some of them helpful to solve the current task
+
+{{memories}}
\ No newline at end of file
diff --git a/prompts/agent.system.md b/prompts/agent.system.md
index 7c2ee5318..7dcfc7ab4 100644
--- a/prompts/agent.system.md
+++ b/prompts/agent.system.md
@@ -9,6 +9,7 @@
         - Tools help you gather knowledge and execute actions
     3. **tool_args**: Object of arguments that are passed to the tool
         - Each tool has specific arguments listed in Available tools section
+- No text before or after the JSON object. End message there.
 
 ## Response example that must be used every time
 ~~~json
diff --git a/prompts/agent.tools.md b/prompts/agent.tools.md
index e4db6ffc0..bbe059aed 100644
--- a/prompts/agent.tools.md
+++ b/prompts/agent.tools.md
@@ -41,28 +41,21 @@ Always verify memory by online.
 }
 ~~~
 
-### memory_tool:
-Access your persistent memory to load or save memories.
-Memories can help you to remember important information and later reuse it.
-With this you are able to learn and improve.
-Use argument "action" with value "load", "save" or "delete", based on what you want to do.
-Use argument "memory" for content to load or save.
-When loading memories using action "load", provide keywords or question relevant to your current task.
-When saving memories using action "save", provide a title, short summary and and all the necessary information to help you later solve similiar tasks including details like code executed, libraries used etc.
-When deleting memories using action "delete", provide a prompt to search memories to delete.
-Be specific with your question, do not input vague queries.
+### memorize:
+Save information to persistent memory.
+Memories can help you remember important details and later reuse them.
+Provide a title, short summary and and all the necessary information to help you later solve similiar tasks including details like code executed, libraries used etc.
 **Example usages**:
 ~~~json
 {
     "thoughts": [
-        "I need to do...",
-        "Maybe I have done it in the past...",
-        "Let me check the memory...",
+        "I have finished my...",
+        "Details of this process will be valuable...",
+        "Let's save tools and code used...",
     ],
-    "tool_name": "memory_tool",
+    "tool_name": "memorize",
     "tool_args": {
-        "action": "load",
-        "question": "How to...",
+        "memory": "# How to...",
     }
 }
 ~~~
diff --git a/prompts/fw.memorized.md b/prompts/fw.memorized.md
new file mode 100644
index 000000000..f82e9344b
--- /dev/null
+++ b/prompts/fw.memorized.md
@@ -0,0 +1 @@
+Information saved to memory.
\ No newline at end of file
diff --git a/prompts/fw.msg_cleanup.md b/prompts/fw.msg_cleanup.md
index 64fa4bd70..581ee6686 100644
--- a/prompts/fw.msg_cleanup.md
+++ b/prompts/fw.msg_cleanup.md
@@ -1 +1,11 @@
-NOTICE: Some messages here have been removed to save memory.
\ No newline at end of file
+# Provide a JSON summary of given messages
+- From the messages you are given, write a summary of key points in the conversation.
+- Include important aspects and remove unnecessary details.
+
+# Expected output format
+~~~json
+{
+    "system_info": "Messages have been summarized to save space.",
+    "messages_summary": ["Key point 1...", "Key point 2..."]
+}
+~~~
\ No newline at end of file
diff --git a/prompts/fw.msg_continue.md b/prompts/fw.msg_continue.md
deleted file mode 100644
index d86cbfcb6..000000000
--- a/prompts/fw.msg_continue.md
+++ /dev/null
@@ -1 +0,0 @@
-Continue with your thoughts and use tool or message when ready.
\ No newline at end of file
diff --git a/prompts/fw.msg_info_sent.md b/prompts/fw.msg_info_sent.md
deleted file mode 100644
index eace3e6ec..000000000
--- a/prompts/fw.msg_info_sent.md
+++ /dev/null
@@ -1,2 +0,0 @@
-Information sent, the user will not respond to info messages. 
-If you required user interaction use response_required="true" instead.
\ No newline at end of file
diff --git a/prompts/fw.msg_repeat.md b/prompts/fw.msg_repeat.md
index 8c12783ea..116031899 100644
--- a/prompts/fw.msg_repeat.md
+++ b/prompts/fw.msg_repeat.md
@@ -1 +1,5 @@
-I tried the same response twice. I have to do something else.
\ No newline at end of file
+~~~json
+{
+    "system_warning": "You have sent the same message again. You have to do something else!"
+}
+~~~
\ No newline at end of file
diff --git a/prompts/fw.msg_sent.md b/prompts/fw.msg_sent.md
deleted file mode 100644
index 0392f5716..000000000
--- a/prompts/fw.msg_sent.md
+++ /dev/null
@@ -1 +0,0 @@
-Message sent, wait for response.
\ No newline at end of file
diff --git a/prompts/fw.tool_not_found.md b/prompts/fw.tool_not_found.md
index c9c8eeb5c..a7a69cac4 100644
--- a/prompts/fw.tool_not_found.md
+++ b/prompts/fw.tool_not_found.md
@@ -1,2 +1,5 @@
-Tool {{tool_name}} not found. Available tools:
-{{tools_prompt}}
\ No newline at end of file
+~~~json
+{
+    "system_warning": "Tool {{tool_name}} not found. Available tools: \n{{tools_prompt}}"
+}
+~~~
\ No newline at end of file
diff --git a/prompts/msg.memory_cleanup.md b/prompts/msg.memory_cleanup.md
new file mode 100644
index 000000000..0dcf3fc8d
--- /dev/null
+++ b/prompts/msg.memory_cleanup.md
@@ -0,0 +1,13 @@
+# Cleanup raw memories from database
+- You will receive two data collections:
+    1. Conversation history of AI agent.
+    2. Raw memories from vector database based on similarity score.
+- Your job is to remove all memories from the database that are not relevant to the topic of the conversation history and only return memories that are relevant and helpful for future of the conversation.
+- Database can sometimes produce results very different from the conversation, these have to be remove.
+- Focus on the end of the conversation history, that is where the most current topic is.
+
+# Expected output format
+- Return filtered list of bullet points of key elements in the memories
+- Include every important detail relevant to conversation
+- Include code snippets if relevant
+- Omit any unrelevant information
\ No newline at end of file
diff --git a/test.py b/test.py
new file mode 100644
index 000000000..65d08ba67
--- /dev/null
+++ b/test.py
@@ -0,0 +1,26 @@
+def extract_json_string(content):
+    start = content.find('{')
+    if start == -1:
+        print("No JSON content found.")
+        return ""
+
+    # Find the first '{'
+    end = content.rfind('}')
+    if end == -1:
+        # If there's no closing '}', return from start to the end
+        return content[start:]
+    else:
+        # If there's a closing '}', return the substring from start to end
+        return content[start:end+1]
+
+# Test cases
+test_cases = [
+    'Some text before {"key1": "value1", "key2": 123, "key3": true, "key4": null} some text after',
+    '{"key1": "value1", "key2": 123, "key3": true, "key4": null',  # Incomplete JSON
+    '{"nested": {"key": "value"}, "list": [1, 2, 3], "bool": true}',
+    'text without json',
+]
+
+# Run the test cases
+results = [extract_json_string(tc) for tc in test_cases]
+print(results)
diff --git a/tools/delegation.py b/tools/delegation.py
index 80685c093..8e51bf5ee 100644
--- a/tools/delegation.py
+++ b/tools/delegation.py
@@ -5,10 +5,11 @@ from tools.helpers.print_style import PrintStyle
 
 class Delegation(Tool):
 
-    def execute(self):
+    def execute(self, **kwargs):
         # create subordinate agent using the data object on this agent and set superior agent to his data object
         if self.agent.get_data("subordinate") is None or self.args["reset"].lower().strip() == "true":
-            subordinate = Agent(system_prompt=self.agent.system_prompt, tools_prompt=self.agent.tools_prompt, number=self.agent.number+1)
+            # subordinate = Agent(system_prompt=self.agent.system_prompt, tools_prompt=self.agent.tools_prompt, number=self.agent.number+1)
+            subordinate = Agent(**self.agent.__dict__, agent_number=self.agent.agent_number+1)
             subordinate.set_data("superior", self.agent)
             self.agent.set_data("subordinate", subordinate) 
         # run subordinate agent message loop
diff --git a/tools/helpers/dirty_json.py b/tools/helpers/dirty_json.py
new file mode 100644
index 000000000..cf75e431e
--- /dev/null
+++ b/tools/helpers/dirty_json.py
@@ -0,0 +1,276 @@
+
+# work in progress, but quite good already
+# able to parse json like this, even when cut in half:
+
+# {
+#     name: John Doe,
+#     'age': 30,
+#     'some': undefined,
+#     other: tRue,
+#     city: "New York",
+#     "hobbies": ["reading", 'cycling'],
+#     married: false,
+#     children: null,
+#     "bio": """A multi-line
+#     biography that
+#     spans several lines""",
+#     'quote': """Another
+#     multi-line quote
+#     using single quotes"""
+# }
+
+
+class DirtyJson:
+    def __init__(self):
+        self._reset()
+
+    def _reset(self):
+        self.json_string = ""
+        self.index = 0
+        self.current_char = None
+        self.result = None
+        self.stack = []
+
+    @staticmethod
+    def parse_string(json_string):
+        parser = DirtyJson()
+        return parser.parse(json_string)
+    
+    def parse(self, json_string):
+        self._reset()
+        self.json_string = json_string
+        self.current_char = self.json_string[0]
+        self._parse()
+        return self.result
+        
+    def feed(self, chunk):
+        self.json_string += chunk
+        if not self.current_char and self.json_string:
+            self.current_char = self.json_string[0]
+        self._parse()
+        return self.result
+
+    def _advance(self,count=1):
+        self.index += count
+        if self.index < len(self.json_string):
+            self.current_char = self.json_string[self.index]
+        else:
+            self.current_char = None
+
+    def _skip_whitespace(self):
+        while self.current_char is not None and self.current_char.isspace():
+            self._advance()
+
+    def _parse(self):
+        if self.result is None:
+            self.result = self._parse_value()
+        else:
+            self._continue_parsing()
+
+    def _continue_parsing(self):
+        while self.current_char is not None:
+            if isinstance(self.result, dict):
+                self._parse_object_content()
+            elif isinstance(self.result, list):
+                self._parse_array_content()
+            elif isinstance(self.result, str):
+                self.result = self._parse_string()
+            else:
+                break
+
+    def _parse_value(self):
+        self._skip_whitespace()
+        if self.current_char == '{':
+            return self._parse_object()
+        elif self.current_char == '[':
+            return self._parse_array()
+        elif self.current_char in ['"', "'"]:
+            if self._peek(2) == self.current_char * 2: # type: ignore
+                return self._parse_multiline_string()
+            return self._parse_string()
+        elif self.current_char and (self.current_char.isdigit() or self.current_char in ['-', '+']):
+            return self._parse_number()
+        elif self._match("true"):
+            return True
+        elif self._match('false'):
+            return False
+        elif self._match('null') or self._match("undefined"):
+            return None
+        elif self.current_char:
+            return self._parse_unquoted_string()
+        return None
+
+    def _match(self, text:str) -> bool:
+        cnt = len(text)
+        if self._peek(cnt).lower() == text.lower():
+            self._advance(cnt)
+            return True
+        return False
+    
+    def _parse_object(self):
+        obj = {}
+        self._advance()  # Skip opening brace
+        self.stack.append(obj)
+        self._parse_object_content()
+        return obj
+
+    def _parse_object_content(self):
+        while self.current_char is not None:
+            self._skip_whitespace()
+            if self.current_char == '}':
+                self._advance()
+                self.stack.pop()
+                return
+            if self.current_char is None:
+                return  # End of input reached while parsing object
+            
+            key = self._parse_key()
+            value = None
+            self._skip_whitespace()
+            
+            if self.current_char == ':':
+                self._advance()
+                value = self._parse_value()
+            elif self.current_char is None:
+                value = None  # End of input reached after key
+            else:
+                value = self._parse_value()
+                
+            self.stack[-1][key] = value
+            
+            self._skip_whitespace()
+            if self.current_char == ',':
+                self._advance()
+                continue
+            elif self.current_char != '}':
+                if self.current_char is None:
+                    return  # End of input reached after value
+                # Allow missing comma between key-value pairs
+                continue
+
+    def _parse_key(self):
+        self._skip_whitespace()
+        if self.current_char in ['"', "'"]:
+            return self._parse_string()
+        else:
+            return self._parse_unquoted_key()
+
+    def _parse_unquoted_key(self):
+        result = ""
+        while self.current_char is not None and not self.current_char.isspace() and self.current_char not in [':', ',', '}', ']']:
+            result += self.current_char
+            self._advance()
+        return result
+
+    def _parse_array(self):
+        arr = []
+        self._advance()  # Skip opening bracket
+        self.stack.append(arr)
+        self._parse_array_content()
+        return arr
+
+    def _parse_array_content(self):
+        while self.current_char is not None:
+            self._skip_whitespace()
+            if self.current_char == ']':
+                self._advance()
+                self.stack.pop()
+                return
+            value = self._parse_value()
+            self.stack[-1].append(value)
+            self._skip_whitespace()
+            if self.current_char == ',':
+                self._advance()
+            elif self.current_char != ']':
+                self.stack.pop()
+                return
+
+    def _parse_string(self):
+        result = ""
+        quote_char = self.current_char
+        self._advance()  # Skip opening quote
+        while self.current_char is not None and self.current_char != quote_char:
+            if self.current_char == '\\':
+                self._advance()
+                if self.current_char in ['"', "'", '\\', '/', 'b', 'f', 'n', 'r', 't']:
+                    result += {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}.get(self.current_char, self.current_char)
+                elif self.current_char == 'u':
+                    unicode_char = ""
+                    for _ in range(4):
+                        if self.current_char is None:
+                            return result
+                        unicode_char += self.current_char
+                        self._advance()
+                    result += chr(int(unicode_char, 16))
+                    continue
+            else:
+                result += self.current_char
+            self._advance()
+        if self.current_char == quote_char:
+            self._advance()  # Skip closing quote
+        return result
+
+    def _parse_multiline_string(self):
+        result = ""
+        quote_char = self.current_char
+        self._advance(3)  # Skip first quote
+        while self.current_char is not None:
+            if self.current_char == quote_char and self._peek(2) == quote_char * 2: # type: ignore
+                self._advance(3)  # Skip first quote
+                break
+            result += self.current_char
+            self._advance()
+        return result.strip()
+
+    def _parse_number(self):
+        number_str = ""
+        while self.current_char is not None and (self.current_char.isdigit() or self.current_char in ['-', '+', '.', 'e', 'E']):
+            number_str += self.current_char
+            self._advance()
+        try:
+            return int(number_str)
+        except ValueError:
+            return float(number_str)
+
+    def _parse_true(self):
+        self._advance()
+        for char in 'rue':
+            if self.current_char != char:
+                return None
+            self._advance()
+        return True
+
+    def _parse_false(self):
+        self._advance()
+        for char in 'alse':
+            if self.current_char != char:
+                return None
+            self._advance()
+        return False
+
+    def _parse_null(self):
+        self._advance()
+        for char in 'ull':
+            if self.current_char != char:
+                return None
+            self._advance()
+        return None
+
+    def _parse_unquoted_string(self):
+        result = ""
+        # while self.current_char is not None and not self.current_char.isspace() and self.current_char not in [':', ',', '}', ']']:
+        while self.current_char is not None and self.current_char not in [':', ',', '}', ']']:
+            result += self.current_char
+            self._advance()
+        return result.strip()
+
+    def _peek(self, n):
+        peek_index = self.index
+        result = ''
+        for _ in range(n):
+            if peek_index < len(self.json_string):
+                result += self.json_string[peek_index]
+                peek_index += 1
+            else:
+                break
+        return result
\ No newline at end of file
diff --git a/tools/helpers/errors.py b/tools/helpers/errors.py
new file mode 100644
index 000000000..a8b7e08e9
--- /dev/null
+++ b/tools/helpers/errors.py
@@ -0,0 +1,30 @@
+
+def format_error(e: Exception, max_entries=2):
+    traceback_text = str(e.with_traceback(None))
+    # Split the traceback into lines
+    lines = traceback_text.split('\n')
+    
+    # Find all "File" lines
+    file_indices = [i for i, line in enumerate(lines) if line.strip().startswith("File ")]
+    
+    # If we found at least one "File" line, keep up to max_entries
+    if file_indices:
+        start_index = max(0, len(file_indices) - max_entries)
+        trimmed_lines = lines[file_indices[start_index]:]
+    else:
+        # If no "File" lines found, just return the original traceback
+        return traceback_text
+    
+    # Find the error message at the end
+    error_message = ""
+    for line in reversed(trimmed_lines):
+        if re.match(r'\w+Error:', line):
+            error_message = line
+            break
+    
+    # Combine the trimmed traceback with the error message
+    result = "Traceback (most recent call last):\n" + '\n'.join(trimmed_lines)
+    if error_message:
+        result += f"\n\n{error_message}"
+    
+    return result
\ No newline at end of file
diff --git a/tools/helpers/extract_tools.py b/tools/helpers/extract_tools.py
index 8d3486a7d..16101b9be 100644
--- a/tools/helpers/extract_tools.py
+++ b/tools/helpers/extract_tools.py
@@ -1,15 +1,32 @@
 import re, os
 from typing import Any
 from .  import files
-import dirtyjson
+# import dirtyjson
+from .dirty_json import DirtyJson
 import regex
 
 
-def json_parse_dirty(json:str) -> Any:
-    ext_json = extract_json_string(json)
-    ext_json = fix_json_string(ext_json)
-    data = dirtyjson.loads(ext_json)
-    return data
+def json_parse_dirty(json:str) -> dict[str,Any]:
+    ext_json = extract_json_object_string(json)
+    # ext_json = fix_json_string(ext_json)
+    data = DirtyJson.parse_string(ext_json)
+    if isinstance(data,dict): return data
+    return {}
+
+def extract_json_object_string(content):
+    start = content.find('{')
+    if start == -1:
+        print("No JSON content found.")
+        return ""
+
+    # Find the first '{'
+    end = content.rfind('}')
+    if end == -1:
+        # If there's no closing '}', return from start to the end
+        return content[start:]
+    else:
+        # If there's a closing '}', return the substring from start to end
+        return content[start:end+1]
 
 def extract_json_string(content):
     # Regular expression pattern to match a JSON object
diff --git a/tools/helpers/files.py b/tools/helpers/files.py
index 27f79ae9d..278e064d3 100644
--- a/tools/helpers/files.py
+++ b/tools/helpers/files.py
@@ -14,7 +14,7 @@ def read_file(relative_path, **kwargs):
         # content = re.sub(re.escape(placeholder), strval, content)
         content = content.replace(placeholder, strval)
 
-    return content 
+    return content
 
 def remove_code_fences(text):
     return re.sub(r'~~~\w*\n|~~~', '', text)
diff --git a/tools/helpers/rate_limiter.py b/tools/helpers/rate_limiter.py
index 9fcb10b1c..ccf1d8520 100644
--- a/tools/helpers/rate_limiter.py
+++ b/tools/helpers/rate_limiter.py
@@ -1,36 +1,48 @@
 import time
 from collections import deque
-from .print_style import PrintStyle
+from dataclasses import dataclass
+from typing import List, Tuple
 
-def rate_limiter(max_requests_per_minute, max_tokens_per_minute):
-    execution_times = deque()
-    token_counts = deque()
+@dataclass
+class CallRecord:
+    timestamp: float
+    input_tokens: int
+    output_tokens: int
 
-    def limit(tokens):
-        if tokens > max_tokens_per_minute:
-            raise ValueError("Number of tokens exceeds the maximum allowed per minute.")
+class RateLimiter:
+    def __init__(self, max_calls: int, max_input_tokens: int, max_output_tokens: int, window_seconds: int = 60):
+        self.max_calls = max_calls
+        self.max_input_tokens = max_input_tokens
+        self.max_output_tokens = max_output_tokens
+        self.window_seconds = window_seconds
+        self.call_records: deque = deque()
 
-        current_time = time.time()
-        
-        # Cleanup old execution times and token counts
-        while execution_times and current_time - execution_times[0] > 60:
-            execution_times.popleft()
-            token_counts.popleft()
+    def _clean_old_records(self, current_time: float):
+        while self.call_records and current_time - self.call_records[0].timestamp > self.window_seconds:
+            self.call_records.popleft()
 
-        total_tokens = sum(token_counts)
-        
-        if len(execution_times) < max_requests_per_minute and total_tokens + tokens <= max_tokens_per_minute:
-            execution_times.append(current_time)
-            token_counts.append(tokens)
-        else:
-            sleep_time = max(
-                60 - (current_time - execution_times[0]),
-                60 - (current_time - execution_times[0]) if total_tokens + tokens > max_tokens_per_minute else 0
-            )
-            PrintStyle(font_color="yellow", padding=True).print(f"Rate limiter: sleeping for {sleep_time} seconds...")
-            time.sleep(sleep_time)
+    def _get_counts(self) -> Tuple[int, int, int]:
+        calls = len(self.call_records)
+        input_tokens = sum(record.input_tokens for record in self.call_records)
+        output_tokens = sum(record.output_tokens for record in self.call_records)
+        return calls, input_tokens, output_tokens
+
+    def _wait_if_needed(self, current_time: float):
+        while True:
+            self._clean_old_records(current_time)
+            calls, input_tokens, output_tokens = self._get_counts()
+            
+            if calls < self.max_calls and input_tokens < self.max_input_tokens and output_tokens < self.max_output_tokens:
+                break
+            
+            oldest_record = self.call_records[0]
+            wait_time = oldest_record.timestamp + self.window_seconds - current_time
+            if wait_time > 0:
+                time.sleep(wait_time)
             current_time = time.time()
-            execution_times.append(current_time)
-            token_counts.append(tokens)
 
-    return limit
\ No newline at end of file
+    def limit(self, input_token_count: int, output_token_count: int):
+        current_time = time.time()
+        self._wait_if_needed(current_time)
+        self.call_records.append(CallRecord(current_time, input_token_count, output_token_count))
+
diff --git a/tools/helpers/tool.py b/tools/helpers/tool.py
index a28a329e5..53feca036 100644
--- a/tools/helpers/tool.py
+++ b/tools/helpers/tool.py
@@ -11,7 +11,7 @@ class Response:
     
 class Tool:
 
-    def __init__(self, agent: Agent, name: str, args: dict, message: str, **kwargs) -> None:
+    def __init__(self, agent: Agent, name: str, args: dict[str,str], message: str, **kwargs) -> None:
         self.agent = agent
         self.name = name
         self.args = args
@@ -22,11 +22,21 @@ class Tool:
         pass
 
     def before_execution(self):
-        PrintStyle(font_color="#1B4F72", padding=True, background_color="white", bold=True).print(f"{self.agent.name}: Using tool {self.name}:")
-        PrintStyle(font_color="#85C1E9").print(self.args)
-
+        PrintStyle(font_color="#1B4F72", padding=True, background_color="white", bold=True).print(f"{self.agent.agent_name}: Using tool '{self.name}':")
+        if self.args and isinstance(self.args, dict):
+            for key, value in self.args.items():
+                PrintStyle(font_color="#85C1E9", bold=True).stream(self.nice_key(key)+": ")
+                PrintStyle(font_color="#85C1E9", padding="\n" in value).stream(value)
+                PrintStyle().print()
+                    
     def after_execution(self, response: Response):
         msg_response = files.read_file("./prompts/fw.tool_response.md", tool_name=self.name, tool_response=response.message)
         self.agent.append_message(msg_response, human=True)
-        PrintStyle(font_color="#1B4F72", background_color="white", padding=True, bold=True).print(f"{self.agent.name}: Response from {self.name}:")
-        PrintStyle(font_color="#85C1E9").print(response.message)
\ No newline at end of file
+        PrintStyle(font_color="#1B4F72", background_color="white", padding=True, bold=True).print(f"{self.agent.agent_name}: Response from tool '{self.name}':")
+        PrintStyle(font_color="#85C1E9").print(response.message)
+
+    def nice_key(self, key:str):
+        words = key.split('_')
+        words = [words[0].capitalize()] + [word.lower() for word in words[1:]]
+        result = ' '.join(words)
+        return result
\ No newline at end of file
diff --git a/tools/memorize.py b/tools/memorize.py
new file mode 100644
index 000000000..6eef65eb8
--- /dev/null
+++ b/tools/memorize.py
@@ -0,0 +1,14 @@
+from agent import Agent
+from tools.helpers import files
+from tools.helpers.tool import Tool, Response
+from tools import memory_tool
+
+class Memorize(Tool):
+    def execute(self):
+
+        memory_tool.process_query(self.agent, str(self.args), "save")
+        
+        return Response(
+            message=files.read_file("prompts/fw.memorized.md"),
+            break_loop=False,
+        )
\ No newline at end of file
diff --git a/tools/memory_tool.py b/tools/memory_tool.py
index fa1534599..c77b57e6d 100644
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@@ -5,38 +5,39 @@ import os, json
 from tools.helpers.tool import Tool, Response
 from tools.helpers.print_style import PrintStyle
 
-db: VectorDB
-result_count = 3 #TODO parametrize better
-
+db: VectorDB | None = None
 
 class Memory(Tool):
     def execute(self):
-        result = process_query(self.agent, self.args["memory"],self.args["action"])
-        return Response(message=result, break_loop=False)
+        #TODO separate param for memory tool result count
+        result = process_query(self.agent, self.args["memory"],self.args["action"], result_count=self.agent.auto_memory_count)
+        return Response(message="\n\n".join(result), break_loop=False)
             
 
-def initialize(embeddings_model,messages_returned=3, subdir=""):
-    global db, result_count
+def initialize(embeddings_model, subdir=""):
+    global db
     dir = os.path.join("memory",subdir)
     db = VectorDB(embeddings_model=embeddings_model, in_memory=False, cache_dir=dir)
-    result_count = messages_returned
 
 
-def process_query(agent:Agent, message: str, action: str = "load", **kwargs):
+def process_query(agent:Agent, message: str, action: str = "load", result_count: int = 3, **kwargs):
+    if not db: initialize(agent.embeddings_model, subdir=agent.memory_subdir)
+    
     if action.strip().lower() == "save":
-        id = db.insert_document(str(message))
+        id = db.insert_document(str(message)) # type: ignore
         return files.read_file("./prompts/fw.memory_saved.md")
 
     elif action.strip().lower() == "delete":
-        deleted = db.delete_documents(message)
+        deleted = db.delete_documents(message) # type: ignore
         return files.read_file("./prompts/fw.memories_deleted.md", count=deleted)
 
     else:
         results=[]
-        docs = db.search_max_rel(message,result_count)
+        docs = db.search_max_rel(message,result_count) # type: ignore
         if len(docs)==0: return files.read_file("./prompts/fw.memories_not_found.md", query=message)
         for doc in docs:
             results.append(doc.page_content)
-        return "\n\n".join(results)
+        return results
+        # return "\n\n".join(results)
             
 
diff --git a/tools/response.py b/tools/response.py
index 1b1a3e401..805037290 100644
--- a/tools/response.py
+++ b/tools/response.py
@@ -15,6 +15,9 @@ class ResponseTool(Tool):
         return Response(message=self.args["text"], break_loop=True)
         # else:
 
+    def after_execution(self, response):
+        pass # do add anything to the history or output
+
     
 # def execute(agent:Agent, message: str, _tools, _tool_index, timeout=15, **kwargs):