mirror of
https://github.com/agent0ai/agent-zero.git
synced 2026-05-19 07:59:34 +00:00
fix: make memory cleanup update stale fragments
This commit is contained in:
parent
904a0f4a25
commit
6ba1f30dca
11 changed files with 233 additions and 12 deletions
|
|
@ -363,11 +363,18 @@ class Memory:
|
|||
)
|
||||
|
||||
async def delete_documents_by_query(
|
||||
self, query: str, threshold: float, filter: str = ""
|
||||
self,
|
||||
query: str,
|
||||
threshold: float,
|
||||
filter: str = "",
|
||||
*,
|
||||
include_exact: bool = False,
|
||||
cascade: bool = False,
|
||||
):
|
||||
k = 100
|
||||
tot = 0
|
||||
removed = []
|
||||
removed_ids: set[str] = set()
|
||||
|
||||
while True:
|
||||
# Perform similarity search with score
|
||||
|
|
@ -379,6 +386,7 @@ class Memory:
|
|||
# Extract document IDs and filter based on score
|
||||
# document_ids = [result[0].metadata["id"] for result in docs if result[1] < score_limit]
|
||||
document_ids = [result.metadata["id"] for result in docs]
|
||||
removed_ids.update(str(doc_id) for doc_id in document_ids)
|
||||
|
||||
# Delete documents with IDs over the threshold score
|
||||
if document_ids:
|
||||
|
|
@ -392,15 +400,45 @@ class Memory:
|
|||
if len(document_ids) < k:
|
||||
break
|
||||
|
||||
if include_exact:
|
||||
exact_docs = self._find_exact_query_docs(query, filter, removed_ids)
|
||||
if exact_docs:
|
||||
exact_ids = [doc.metadata["id"] for doc in exact_docs]
|
||||
await self.db.adelete(ids=exact_ids)
|
||||
removed += exact_docs
|
||||
removed_ids.update(str(doc_id) for doc_id in exact_ids)
|
||||
tot += len(exact_ids)
|
||||
|
||||
if cascade and removed_ids:
|
||||
related_docs = self._find_related_docs_by_ids(removed_ids)
|
||||
if related_docs:
|
||||
related_ids = [doc.metadata["id"] for doc in related_docs]
|
||||
await self.db.adelete(ids=related_ids)
|
||||
removed += related_docs
|
||||
removed_ids.update(str(doc_id) for doc_id in related_ids)
|
||||
tot += len(related_ids)
|
||||
|
||||
if tot:
|
||||
self._save_db() # persist
|
||||
return removed
|
||||
|
||||
async def delete_documents_by_ids(self, ids: list[str]):
|
||||
async def delete_documents_by_ids(
|
||||
self, ids: list[str], *, cascade: bool = False, filter: str = ""
|
||||
):
|
||||
# aget_by_ids is not yet implemented in faiss, need to do a workaround
|
||||
rem_docs = await self.db.aget_by_ids(
|
||||
ids
|
||||
) # existing docs to remove (prevents error)
|
||||
rem_ids = [doc.metadata["id"] for doc in rem_docs]
|
||||
|
||||
if cascade:
|
||||
related_docs = self._find_related_docs_by_ids(set(ids) | set(rem_ids))
|
||||
if related_docs:
|
||||
existing = {doc.metadata["id"] for doc in rem_docs}
|
||||
rem_docs.extend(
|
||||
doc for doc in related_docs if doc.metadata["id"] not in existing
|
||||
)
|
||||
|
||||
if rem_docs:
|
||||
rem_ids = [doc.metadata["id"] for doc in rem_docs] # ids to remove
|
||||
await self.db.adelete(ids=rem_ids)
|
||||
|
|
@ -445,6 +483,47 @@ class Memory:
|
|||
if not self.db.get_by_ids(doc_id): # check if exists
|
||||
return doc_id
|
||||
|
||||
def _find_exact_query_docs(
|
||||
self, query: str, filter: str, skip_ids: set[str]
|
||||
) -> list[Document]:
|
||||
needle = _normalize_memory_match_text(query)
|
||||
if len(needle) < 3:
|
||||
return []
|
||||
|
||||
docs: list[Document] = []
|
||||
comparator = Memory._get_comparator(filter) if filter else None
|
||||
for doc in self.db.get_all_docs().values():
|
||||
doc_id = str(doc.metadata.get("id", ""))
|
||||
if not doc_id or doc_id in skip_ids:
|
||||
continue
|
||||
if comparator and not comparator(doc.metadata):
|
||||
continue
|
||||
haystack = _normalize_memory_match_text(
|
||||
f"{doc.page_content}\n{json.dumps(doc.metadata, sort_keys=True, default=str)}"
|
||||
)
|
||||
if needle in haystack:
|
||||
docs.append(doc)
|
||||
return docs
|
||||
|
||||
def _find_related_docs_by_ids(
|
||||
self, ids: set[str], filter: str = ""
|
||||
) -> list[Document]:
|
||||
ids = {str(doc_id) for doc_id in ids if str(doc_id)}
|
||||
if not ids:
|
||||
return []
|
||||
|
||||
docs: list[Document] = []
|
||||
comparator = Memory._get_comparator(filter) if filter else None
|
||||
for doc in self.db.get_all_docs().values():
|
||||
doc_id = str(doc.metadata.get("id", ""))
|
||||
if not doc_id or doc_id in ids:
|
||||
continue
|
||||
if comparator and not comparator(doc.metadata):
|
||||
continue
|
||||
if _metadata_references_any(doc.metadata, ids):
|
||||
docs.append(doc)
|
||||
return docs
|
||||
|
||||
@staticmethod
|
||||
def _save_db_file(db: MyFaiss, memory_subdir: str):
|
||||
abs_dir = abs_db_dir(memory_subdir)
|
||||
|
|
@ -547,6 +626,23 @@ def reload():
|
|||
Memory.index = {}
|
||||
|
||||
|
||||
def _normalize_memory_match_text(value: str) -> str:
|
||||
return " ".join(str(value or "").casefold().split())
|
||||
|
||||
|
||||
def _metadata_references_any(value: Any, ids: set[str]) -> bool:
|
||||
if isinstance(value, dict):
|
||||
return any(_metadata_references_any(item, ids) for item in value.values())
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
return any(_metadata_references_any(item, ids) for item in value)
|
||||
text = str(value or "").strip()
|
||||
if not text:
|
||||
return False
|
||||
if text in ids:
|
||||
return True
|
||||
return any(doc_id in text.split(",") for doc_id in ids)
|
||||
|
||||
|
||||
def abs_db_dir(memory_subdir: str) -> str:
|
||||
# patch for projects, this way we don't need to re-work the structure of memory subdirs
|
||||
if memory_subdir.startswith("projects/"):
|
||||
|
|
|
|||
|
|
@ -728,9 +728,11 @@ class MemoryConsolidator:
|
|||
updated_count += 1
|
||||
updated_ids.append(new_id)
|
||||
|
||||
# Step 2: Insert additional new memory if provided
|
||||
# Step 2: Insert the new memory only when no existing memory was updated.
|
||||
# UPDATE means "repopulate the existing subject", not "append another
|
||||
# equally-important memory". This keeps mutable facts from piling up.
|
||||
new_memory_id = None
|
||||
if result.new_memory_content:
|
||||
if result.new_memory_content and not updated_ids:
|
||||
# LLM metadata takes precedence over original metadata when there are conflicts
|
||||
final_metadata = {
|
||||
'area': area,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# Memories on the topic
|
||||
- following are memories about current topic
|
||||
- do not overly rely on them they might not be relevant
|
||||
- if memories conflict, prefer the newest/current fact and ignore superseded older fragments
|
||||
|
||||
{{memories}}
|
||||
{{memories}}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,10 @@ notes:
|
|||
- `threshold` is similarity from `0` to `1`
|
||||
- `filter` is a metadata expression (e.g. `area=='main'`)
|
||||
- confirm destructive changes when accuracy matters
|
||||
- when the user updates a durable fact/preference, load related memories first, forget/delete superseded versions, then save one complete current version
|
||||
- do not append a second memory for the same mutable subject when the new statement replaces the old one
|
||||
- `memory_forget` also cleans exact matches and derived fragment/solution records related to removed memories
|
||||
- use `memory_save` for stable current facts, not short-lived test markers, greetings, or one-off conversation events
|
||||
|
||||
example:
|
||||
~~~json
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ Analyze a new memory alongside existing similar memories and determine whether t
|
|||
- **keep_separate** if memories serve different purposes
|
||||
- **skip** consolidation if no action is beneficial
|
||||
|
||||
Default bias: for mutable user preferences, project state, configuration choices, names, locations, active tasks, or "current" facts about the same subject, prefer **update** or **replace** over appending another separate memory.
|
||||
|
||||
## Consolidation Analysis Guidelines
|
||||
|
||||
|
|
@ -22,14 +23,16 @@ Analyze a new memory alongside existing similar memories and determine whether t
|
|||
|
||||
### 1. Temporal Intelligence
|
||||
- **Newer information** generally supersedes older information
|
||||
- **Preserve historical context** when consolidating - don't lose important chronological details
|
||||
- **Consider recency** - more recent memories may be more relevant
|
||||
- **Preserve historical context** only when the user explicitly needs history or an audit trail
|
||||
- **Do not keep old preferences as equally important memories** when the new memory clearly gives the current state
|
||||
- **Consider recency** - more recent memories are usually more relevant for mutable facts
|
||||
|
||||
### 2. Content Relationships
|
||||
- **Complementary information** should be merged into comprehensive memories
|
||||
- **Contradictory information** requires careful analysis of which is more accurate/current
|
||||
- **Duplicate content** should be consolidated to eliminate redundancy
|
||||
- **Distinct but related topics** may be better kept separate
|
||||
- **Same subject, changed value** should usually be update or replace, not keep_separate
|
||||
|
||||
### 3. Quality Assessment
|
||||
- **More detailed/complete** information should be preserved
|
||||
|
|
@ -79,8 +82,8 @@ Provide your analysis as a JSON object with this exact structure:
|
|||
|
||||
- **merge**: Combine multiple memories into one comprehensive memory, removing originals
|
||||
- **replace**: Replace outdated, incorrect, or superseded memories with new version, preserving important metadata. Use when new information directly contradicts or makes old information obsolete.
|
||||
- **keep_separate**: New memory addresses different aspects, keep all memories separate
|
||||
- **update**: Enhance existing memory with additional details from new memory
|
||||
- **keep_separate**: New memory addresses a genuinely different subject or stable historical event, keep all memories separate
|
||||
- **update**: Repopulate an existing memory for the same subject with the latest complete current version; do not insert an additional memory when the updated memory is sufficient
|
||||
- **skip**: No consolidation needed, use simple insertion for new memory
|
||||
|
||||
## Example Consolidation Scenarios
|
||||
|
|
@ -95,6 +98,11 @@ Provide your analysis as a JSON object with this exact structure:
|
|||
**Existing**: "User API endpoint is /api/users for getting user data"
|
||||
**Action**: replace → Update with new endpoint, note the change in historical_notes
|
||||
|
||||
### Scenario 2b: Update Current Preference
|
||||
**New**: "User now prefers concise technical answers with examples"
|
||||
**Existing**: "User prefers long exploratory answers"
|
||||
**Action**: update -> Rewrite the existing user-preference memory to the new current preference. Do not keep both as equally relevant memories.
|
||||
|
||||
**REPLACE Criteria**: Use replace when:
|
||||
- **High similarity score** (>0.9) indicates very similar content
|
||||
- New information directly contradicts existing information
|
||||
|
|
@ -116,6 +124,7 @@ Provide your analysis as a JSON object with this exact structure:
|
|||
3. **Maintain Context**: Keep temporal and source information where relevant
|
||||
4. **Enhance Searchability**: Use consolidation to improve future memory retrieval
|
||||
5. **Reduce Redundancy**: Eliminate unnecessary duplication while preserving nuance
|
||||
6. **Keep Current Facts Current**: For mutable facts, the final memory should represent the latest usable state, not a human-like archive of every old version
|
||||
|
||||
## Instructions
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@
|
|||
- Focus on USER MESSAGE if provided, use HISTORY for context
|
||||
- Keep in mind that these memories should be helpful for continuing the conversation and solving problems by AI
|
||||
- Consider if each memory holds real information value for the context or not
|
||||
- If multiple memories conflict about the same mutable user/project fact, include only the newest/current one when it is identifiable
|
||||
- Exclude superseded, historical, duplicate, or low-detail fragments when a more complete current memory is available
|
||||
|
||||
# Include only when:
|
||||
- Memory is relevant to the current situation
|
||||
|
|
@ -22,6 +24,7 @@
|
|||
- Short vague texts like "Pet inquiry" or "Programming skills" with no more detail
|
||||
- Common conversation patterns like greetings
|
||||
- Memories that hold no information value
|
||||
- Older conflicting memories for the same preference or project state when a newer/current memory is available
|
||||
|
||||
# Example output
|
||||
```json
|
||||
|
|
@ -32,4 +35,4 @@
|
|||
> "User has greeted me" (no information value)
|
||||
> "Hello world program" (just title, no details, no context, irrelevant by itself)
|
||||
> "Today is Monday" (just date, information obsolete, not helpful)
|
||||
> "Memory search" (just title, irrelevant by itself)
|
||||
> "Memory search" (just title, irrelevant by itself)
|
||||
|
|
|
|||
|
|
@ -30,6 +30,9 @@
|
|||
- Do not break information related to the same subject into multiple memories, keep them as one text
|
||||
- If there are multiple facts related to the same subject, merge them into one more detailed memory instead
|
||||
- Example: Instead of three memories "User's dog is Max", "Max is 6 years old", "Max is white and brown", create one memory "User's dog is Max, 6 years old, white and brown."
|
||||
- If the history changes or corrects a previously stated fact, output only the new complete current fact; do not output both old and new versions
|
||||
- Prefer a single durable profile-style sentence for mutable user/project preferences, such as "User currently prefers..." or "Project currently uses..."
|
||||
- Do not memorize temporary test markers, temporary behavior checks, or cleanup-only facts
|
||||
|
||||
# Correct examples of data worth memorizing with (explanation)
|
||||
> User's name is John Doe (name is important)
|
||||
|
|
@ -45,6 +48,8 @@
|
|||
> Today is Monday (just date, no value in this information)
|
||||
> Market inquiry (just a topic without detail)
|
||||
> RAM Status (just a topic without detail)
|
||||
> User used to prefer X before changing to Y (historical preference is usually not useful; memorize the current preference only)
|
||||
> Temporary marker ABC123 was used in a memory test (test residue, not useful)
|
||||
|
||||
|
||||
# Further WRONG examples
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ class MemoryDelete(Tool):
|
|||
async def execute(self, ids="", **kwargs):
|
||||
db = await Memory.get(self.agent)
|
||||
ids = [id.strip() for id in ids.split(",") if id.strip()]
|
||||
dels = await db.delete_documents_by_ids(ids=ids)
|
||||
dels = await db.delete_documents_by_ids(ids=ids, cascade=True)
|
||||
|
||||
result = self.agent.read_prompt("fw.memories_deleted.md", memory_count=len(dels))
|
||||
return Response(message=result, break_loop=False)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,13 @@ class MemoryForget(Tool):
|
|||
|
||||
async def execute(self, query="", threshold=DEFAULT_THRESHOLD, filter="", **kwargs):
|
||||
db = await Memory.get(self.agent)
|
||||
dels = await db.delete_documents_by_query(query=query, threshold=threshold, filter=filter)
|
||||
dels = await db.delete_documents_by_query(
|
||||
query=query,
|
||||
threshold=threshold,
|
||||
filter=filter,
|
||||
include_exact=True,
|
||||
cascade=True,
|
||||
)
|
||||
|
||||
result = self.agent.read_prompt("fw.memories_deleted.md", memory_count=len(dels))
|
||||
return Response(message=result, break_loop=False)
|
||||
|
|
|
|||
93
tests/test_memory_cleanup.py
Normal file
93
tests/test_memory_cleanup.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from plugins._memory.helpers.memory import Memory
|
||||
|
||||
|
||||
class FakeFaiss:
|
||||
def __init__(self, docs: list[Document]):
|
||||
self.docs = {doc.metadata["id"]: doc for doc in docs}
|
||||
self.deleted: list[str] = []
|
||||
|
||||
async def asearch(self, *_args, **_kwargs):
|
||||
return []
|
||||
|
||||
async def adelete(self, ids):
|
||||
for doc_id in ids:
|
||||
self.deleted.append(doc_id)
|
||||
self.docs.pop(doc_id, None)
|
||||
|
||||
async def aget_by_ids(self, ids):
|
||||
return [self.docs[doc_id] for doc_id in ids if doc_id in self.docs]
|
||||
|
||||
def get_all_docs(self):
|
||||
return self.docs
|
||||
|
||||
def get_by_ids(self, ids):
|
||||
return [self.docs[doc_id] for doc_id in ids if doc_id in self.docs]
|
||||
|
||||
|
||||
def test_memory_forget_removes_exact_matches_and_derived_fragments():
|
||||
main = Document(
|
||||
page_content="User currently prefers memory cleanup token banana-397.",
|
||||
metadata={"id": "main-1", "area": "main"},
|
||||
)
|
||||
fragment = Document(
|
||||
page_content="Derived note from old preference.",
|
||||
metadata={
|
||||
"id": "fragment-1",
|
||||
"area": "fragments",
|
||||
"consolidated_from": ["main-1"],
|
||||
},
|
||||
)
|
||||
unrelated = Document(
|
||||
page_content="Unrelated memory about project setup.",
|
||||
metadata={"id": "other-1", "area": "main"},
|
||||
)
|
||||
fake_db = FakeFaiss([main, fragment, unrelated])
|
||||
memory = Memory(fake_db, memory_subdir="test")
|
||||
memory._save_db = lambda: None
|
||||
|
||||
removed = asyncio.run(
|
||||
memory.delete_documents_by_query(
|
||||
query="banana-397",
|
||||
threshold=0.99,
|
||||
include_exact=True,
|
||||
cascade=True,
|
||||
)
|
||||
)
|
||||
|
||||
assert {doc.metadata["id"] for doc in removed} == {"main-1", "fragment-1"}
|
||||
assert fake_db.deleted == ["main-1", "fragment-1"]
|
||||
assert set(fake_db.docs) == {"other-1"}
|
||||
|
||||
|
||||
def test_memory_delete_cascades_even_when_original_id_is_already_missing():
|
||||
replacement = Document(
|
||||
page_content="User currently prefers concise technical answers.",
|
||||
metadata={
|
||||
"id": "replacement-1",
|
||||
"area": "main",
|
||||
"updated_from": "old-pref-1",
|
||||
},
|
||||
)
|
||||
fake_db = FakeFaiss([replacement])
|
||||
memory = Memory(fake_db, memory_subdir="test")
|
||||
memory._save_db = lambda: None
|
||||
|
||||
removed = asyncio.run(
|
||||
memory.delete_documents_by_ids(["old-pref-1"], cascade=True)
|
||||
)
|
||||
|
||||
assert [doc.metadata["id"] for doc in removed] == ["replacement-1"]
|
||||
assert fake_db.deleted == ["replacement-1"]
|
||||
assert fake_db.docs == {}
|
||||
|
|
@ -173,6 +173,8 @@ def test_memory_forget_tool_imports_plugin_memory_load(monkeypatch):
|
|||
"query": "codex memory forget token",
|
||||
"threshold": 0.99,
|
||||
"filter": "area=='codex_sweep'",
|
||||
"include_exact": True,
|
||||
"cascade": True,
|
||||
}
|
||||
]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue