updated ai sdk object structure

2026-05-22 11:08:50 +00:00 · 2026-04-17 22:55:02 -07:00 · 2026-04-17 22:55:02 -07:00 · 1da2ae0529
commit 1da2ae0529
parent 7b99822ae3
10 changed files with 377 additions and 371 deletions
--- a/apps/docs/integrations/ai-sdk.mdx
+++ b/apps/docs/integrations/ai-sdk.mdx
@ -35,8 +35,7 @@ import { generateText } from "ai"
 import { withSupermemory } from "@supermemory/tools/vercel"
 import { openai } from "@ai-sdk/openai"

-const modelWithMemory = withSupermemory({
-  model: openai("gpt-4"),
+const modelWithMemory = withSupermemory(openai("gpt-4"), {
  containerTag: "user-123",
  customId: "conv-456"
 })
@ -51,8 +50,7 @@ const result = await generateText({
  **Memory saving is enabled by default.** The middleware automatically saves conversations to memory. To disable memory saving:

  ```typescript
-  const modelWithMemory = withSupermemory({
-    model: openai("gpt-4"),
+  const modelWithMemory = withSupermemory(openai("gpt-4"), {
    containerTag: "user-123",
    customId: "conv-456",
    addMemory: "never"
@ -65,8 +63,7 @@ const result = await generateText({
 **Profile Mode (Default)** - Retrieves the user's complete profile:

 ```typescript
-const model = withSupermemory({
-  model: openai("gpt-4"),
+const model = withSupermemory(openai("gpt-4"), {
  containerTag: "user-123",
  customId: "conv-456",
  mode: "profile"
@ -76,8 +73,7 @@ const model = withSupermemory({
 **Query Mode** - Searches memories based on the user's message:

 ```typescript
-const model = withSupermemory({
-  model: openai("gpt-4"),
+const model = withSupermemory(openai("gpt-4"), {
  containerTag: "user-123",
  customId: "conv-456",
  mode: "query"
@ -87,8 +83,7 @@ const model = withSupermemory({
 **Full Mode** - Combines profile AND query-based search:

 ```typescript
-const model = withSupermemory({
-  model: openai("gpt-4"),
+const model = withSupermemory(openai("gpt-4"), {
  containerTag: "user-123",
  customId: "conv-456",
  mode: "full"
@ -113,8 +108,7 @@ const claudePrompt = (data: MemoryPromptData) => `
 </context>
 `.trim()

-const model = withSupermemory({
-  model: anthropic("claude-3-sonnet"),
+const model = withSupermemory(anthropic("claude-3-sonnet"), {
  containerTag: "user-123",
  customId: "conv-456",
  mode: "full",
@ -125,8 +119,7 @@ const model = withSupermemory({
 ### Verbose Logging

 ```typescript
-const model = withSupermemory({
-  model: openai("gpt-4"),
+const model = withSupermemory(openai("gpt-4"), {
  containerTag: "user-123",
  customId: "conv-456",
  verbose: true
--- a/packages/tools/README.md
+++ b/packages/tools/README.md
@ -57,8 +57,8 @@ const addTool = addMemoryTool(process.env.SUPERMEMORY_API_KEY!, {

 #### AI SDK Middleware with Supermemory

- `withSupermemory` will take advantage supermemory profile v4 endpoint personalized based on container tag
- You can provide the Supermemory API key via the `apiKey` option to `withSupermemory` (recommended for browser usage), or fall back to `SUPERMEMORY_API_KEY` in the environment for server usage.
+- `withSupermemory` wraps any language model with supermemory capabilities using the v4 profile endpoint
+- You can provide the Supermemory API key via the `apiKey` option (recommended for browser usage), or fall back to `SUPERMEMORY_API_KEY` in the environment for server usage
 - **Per-turn caching**: Memory injection is cached for tool-call continuations within the same user turn. The middleware detects when the AI SDK is continuing a multi-step flow (e.g., after a tool call) and reuses the cached memories instead of making redundant API calls. A fresh fetch occurs on each new user message turn.

 ```typescript
@ -66,35 +66,36 @@ import { generateText } from "ai"
 import { withSupermemory } from "@supermemory/tools/ai-sdk"
 import { openai } from "@ai-sdk/openai"

-const modelWithMemory = withSupermemory(openai("gpt-5"), "user_id_life")
+const modelWithMemory = withSupermemory(openai("gpt-4"), {
+  containerTag: "user-123",
+  customId: "conversation-456",
+})

 const result = await generateText({
-	model: modelWithMemory,
-	messages: [{ role: "user", content: "where do i live?" }],
+  model: modelWithMemory,
+  messages: [{ role: "user", content: "where do i live?" }],
 })

 console.log(result.text)
 ```

-#### Conversation Grouping
+#### Configuration Options

-Use the `conversationId` option to group messages into a single document for contextual memory generation:
+The `withSupermemory` function accepts a model and a configuration object:

 ```typescript
-import { generateText } from "ai"
-import { withSupermemory } from "@supermemory/tools/ai-sdk"
-import { openai } from "@ai-sdk/openai"
-
-const modelWithMemory = withSupermemory(openai("gpt-5"), "user_id_life", {
-	conversationId: "conversation-456"
+withSupermemory(model, {
+  containerTag: string,      // Required: User/container identifier for memory scoping
+  customId: string,          // Required: Conversation ID for grouping messages
+  mode?: "profile" | "query" | "full",  // Memory retrieval mode (default: "profile")
+  addMemory?: "always" | "never",       // Auto-save conversations (default: "always")
+  searchMode?: "memories" | "hybrid" | "documents",  // Search mode (default: "memories")
+  searchLimit?: number,      // Max search results for hybrid/documents mode (default: 10)
+  verbose?: boolean,         // Enable detailed logging (default: false)
+  apiKey?: string,           // Supermemory API key (falls back to env var)
+  baseUrl?: string,          // Custom API base URL
+  promptTemplate?: (data: MemoryPromptData) => string,  // Custom memory formatting
 })
-
-const result = await generateText({
-	model: modelWithMemory,
-	messages: [{ role: "user", content: "where do i live?" }],
-})
-
-console.log(result.text)
 ```

 #### Verbose Mode
@ -106,13 +107,15 @@ import { generateText } from "ai"
 import { withSupermemory } from "@supermemory/tools/ai-sdk"
 import { openai } from "@ai-sdk/openai"

-const modelWithMemory = withSupermemory(openai("gpt-5"), "user_id_life", {
-	verbose: true
+const modelWithMemory = withSupermemory(openai("gpt-4"), {
+  containerTag: "user-123",
+  customId: "conv-456",
+  verbose: true,
 })

 const result = await generateText({
-	model: modelWithMemory,
-	messages: [{ role: "user", content: "where do i live?" }],
+  model: modelWithMemory,
+  messages: [{ role: "user", content: "where do i live?" }],
 })

 console.log(result.text)
@ -120,7 +123,7 @@ console.log(result.text)

 When verbose mode is enabled, you'll see console output like:
 ```
-[supermemory] Searching memories for container: user_id_life
+[supermemory] Searching memories for container: user-123
 [supermemory] User message: where do i live?
 [supermemory] System prompt exists: false
 [supermemory] Found 3 memories
@ -139,11 +142,10 @@ import { withSupermemory } from "@supermemory/tools/ai-sdk"
 import { openai } from "@ai-sdk/openai"

 // Uses profile mode by default - gets all user profile memories
-const modelWithMemory = withSupermemory(openai("gpt-4"), "user-123")
-
-// Explicitly specify profile mode
-const modelWithProfile = withSupermemory(openai("gpt-4"), "user-123", { 
-  mode: "profile" 
+const modelWithMemory = withSupermemory(openai("gpt-4"), {
+  containerTag: "user-123",
+  customId: "conv-456",
+  mode: "profile",
 })

 const result = await generateText({
@ -158,8 +160,10 @@ import { generateText } from "ai"
 import { withSupermemory } from "@supermemory/tools/ai-sdk"
 import { openai } from "@ai-sdk/openai"

-const modelWithQuery = withSupermemory(openai("gpt-4"), "user-123", { 
-  mode: "query" 
+const modelWithQuery = withSupermemory(openai("gpt-4"), {
+  containerTag: "user-123",
+  customId: "conv-456",
+  mode: "query",
 })

 const result = await generateText({
@ -174,8 +178,10 @@ import { generateText } from "ai"
 import { withSupermemory } from "@supermemory/tools/ai-sdk"
 import { openai } from "@ai-sdk/openai"

-const modelWithFull = withSupermemory(openai("gpt-4"), "user-123", { 
-  mode: "full" 
+const modelWithFull = withSupermemory(openai("gpt-4"), {
+  containerTag: "user-123",
+  customId: "conv-456",
+  mode: "full",
 })

 const result = await generateText({
@ -184,38 +190,58 @@ const result = await generateText({
 })
 ```

-#### Automatic Memory Capture
+#### RAG with Hybrid Search

-The middleware can automatically save user messages as memories:
+Use `searchMode: "hybrid"` to search both memories AND document chunks (recommended for RAG applications):

-**Always Save Memories** - Automatically stores every user message as a memory:
 ```typescript
 import { generateText } from "ai"
 import { withSupermemory } from "@supermemory/tools/ai-sdk"
 import { openai } from "@ai-sdk/openai"

-const modelWithAutoSave = withSupermemory(openai("gpt-4"), "user-123", {
-  addMemory: "always"
+const ragModel = withSupermemory(openai("gpt-4"), {
+  containerTag: "user-123",
+  customId: "conv-456",
+  mode: "full",
+  searchMode: "hybrid",  // Search both memories and document chunks
+  searchLimit: 15,       // Return up to 15 results
+})
+
+const result = await generateText({
+  model: ragModel,
+  messages: [{ role: "user", content: "What's in my documents about quarterly goals?" }],
+})
+```
+
+#### Automatic Memory Capture
+
+The middleware can automatically save conversations as memories:
+
+**Always Save Memories (Default)** - Automatically stores conversations:
+```typescript
+import { generateText } from "ai"
+import { withSupermemory } from "@supermemory/tools/ai-sdk"
+import { openai } from "@ai-sdk/openai"
+
+const modelWithAutoSave = withSupermemory(openai("gpt-4"), {
+  containerTag: "user-123",
+  customId: "conv-456",
+  addMemory: "always",
 })

 const result = await generateText({
  model: modelWithAutoSave,
  messages: [{ role: "user", content: "I prefer React with TypeScript for my projects" }],
 })
-// This message will be automatically saved as a memory
+// This conversation will be automatically saved as a memory
 ```

-**Never Save Memories (Default)** - Only retrieves memories without storing new ones:
+**Never Save Memories** - Only retrieves memories without storing new ones:
 ```typescript
-const modelWithNoSave = withSupermemory(openai("gpt-4"), "user-123")
-```
-
-**Combined Options** - Use verbose logging with specific modes and memory storage:
-```typescript
-const modelWithOptions = withSupermemory(openai("gpt-4"), "user-123", {
-  mode: "profile",
-  addMemory: "always",
-  verbose: true
+const modelWithNoSave = withSupermemory(openai("gpt-4"), {
+  containerTag: "user-123",
+  customId: "conv-456",
+  addMemory: "never",
 })
 ```

@ -239,7 +265,9 @@ ${data.generalSearchMemories}
 </user_memories>
 `.trim()

-const modelWithCustomPrompt = withSupermemory(openai("gpt-4"), "user-123", {
+const modelWithCustomPrompt = withSupermemory(openai("gpt-4"), {
+  containerTag: "user-123",
+  customId: "conv-456",
  mode: "full",
  promptTemplate: customPrompt,
 })
@ -646,23 +674,30 @@ Without `strict: true`, optional fields like `includeFullDocs` and `limit` won't

 ### withSupermemory Middleware Options

-The `withSupermemory` middleware accepts additional configuration options:
+The `withSupermemory` middleware accepts a model and a configuration object:

 ```typescript
-interface WithSupermemoryOptions {
-  conversationId?: string
-  verbose?: boolean
-  mode?: "profile" | "query" | "full"
-  addMemory?: "always" | "never"
-  /** Optional Supermemory API key. Use this in browser environments. */
-  apiKey?: string
+interface WithSupermemoryConfig {
+  containerTag: string        // Required: User/container identifier for memory scoping
+  customId: string            // Required: Conversation ID for grouping messages
+  verbose?: boolean           // Enable detailed logging (default: false)
+  mode?: "profile" | "query" | "full"  // Memory retrieval mode (default: "profile")
+  searchMode?: "memories" | "hybrid" | "documents"  // Search mode (default: "memories")
+  searchLimit?: number        // Max search results for hybrid/documents mode (default: 10)
+  addMemory?: "always" | "never"  // Auto-save conversations (default: "always")
+  apiKey?: string             // Supermemory API key (falls back to SUPERMEMORY_API_KEY env var)
+  baseUrl?: string            // Custom API base URL
+  promptTemplate?: (data: MemoryPromptData) => string  // Custom memory formatting
 }
 ```

- **conversationId**: Optional conversation ID to group messages into a single document for contextual memory generation
+- **containerTag**: Required. The container tag/identifier for memory search (e.g., user ID, project ID)
+- **customId**: Required. Custom ID to group messages into a single document for contextual memory generation
 - **verbose**: Enable detailed logging of memory search and injection process (default: false)
 - **mode**: Memory search mode - "profile" (default), "query", or "full"
- **addMemory**: Automatic memory storage mode - "always" or "never" (default: "never")
+- **searchMode**: Search mode - "memories" (default), "hybrid" (memories + chunks), or "documents" (chunks only)
+- **searchLimit**: Maximum number of search results when using hybrid/documents mode (default: 10)
+- **addMemory**: Automatic memory storage mode - "always" (default) or "never"

 ## Available Tools

--- a/packages/tools/src/shared/cache.ts
+++ b/packages/tools/src/shared/cache.ts
@ -5,8 +5,12 @@ import type { MemoryMode } from "./types"
 * Generic memory cache for storing per-turn memories to avoid redundant API calls.
 * Used to cache memory retrieval results during tool-call loops within the same turn.
 */
-export class MemoryCache<T = string> {
-	private cache: LRUCache<string, T> = new LRUCache({ max: 100 })
+export class MemoryCache<T extends {} = string> {
+	private cache: LRUCache<string, T>
+
+	constructor() {
+		this.cache = new LRUCache<string, T>({ max: 100 })
+	}

 	/**
 	 * Generates a cache key for the current turn based on context parameters.
--- a/packages/tools/src/shared/index.ts
+++ b/packages/tools/src/shared/index.ts
@ -3,6 +3,7 @@ export type {
 	MemoryPromptData,
 	PromptTemplate,
 	MemoryMode,
+	SearchMode,
 	AddMemoryMode,
 	Logger,
 	ProfileStructure,
--- a/packages/tools/src/shared/memory-client.ts
+++ b/packages/tools/src/shared/memory-client.ts
@ -2,6 +2,7 @@ import { deduplicateMemories } from "../tools-shared"
 import type {
 	Logger,
 	MemoryMode,
+	SearchMode,
 	MemoryPromptData,
 	ProfileStructure,
 	PromptTemplate,
@ -72,6 +73,15 @@ export interface BuildMemoriesTextOptions {
 	apiKey: string
 	logger: Logger
 	promptTemplate?: PromptTemplate
+	/**
+	 * Search mode for memory retrieval:
+	 * - "memories": Search only memory entries (default)
+	 * - "hybrid": Search both memories AND document chunks
+	 * - "documents": Search only document chunks
+	 */
+	searchMode?: SearchMode
+	/** Maximum number of search results to return (default: 10) */
+	searchLimit?: number
 }

 /**
--- a/packages/tools/src/shared/types.ts
+++ b/packages/tools/src/shared/types.ts
@ -47,6 +47,14 @@ export type PromptTemplate = (data: MemoryPromptData) => string
 */
 export type MemoryMode = "profile" | "query" | "full"

+/**
+ * Search mode for memory retrieval:
+ * - "memories": Search only memory entries (default)
+ * - "hybrid": Search both memories AND document chunks (recommended for RAG)
+ * - "documents": Search only document chunks
+ */
+export type SearchMode = "memories" | "hybrid" | "documents"
+
 /**
 * Memory persistence mode:
 * - "always": Automatically save conversations as memories
--- a/packages/tools/src/vercel/index.ts
+++ b/packages/tools/src/vercel/index.ts
@ -12,9 +12,10 @@ import {
 } from "./middleware"
 import type { PromptTemplate, MemoryPromptData } from "./memory-prompt"

-interface WrapVercelLanguageModelOptions<T extends LanguageModel> {
-	/** The language model to wrap with supermemory capabilities */
-	model: T
+/**
+ * Configuration options for Supermemory integration
+ */
+interface WithSupermemoryConfig {
 	/** The container tag/identifier for memory search (e.g., user ID, project ID) */
 	containerTag: string
 	/** Custom ID to group messages into a single document. Required. */
@ -76,17 +77,17 @@ interface WrapVercelLanguageModelOptions<T extends LanguageModel> {
 * Supports both Vercel AI SDK 5 (LanguageModelV2) and SDK 6 (LanguageModelV3) via runtime
 * detection of `model.specificationVersion`.
 *
- * @param options - Configuration object containing model and Supermemory options
- * @param options.model - The language model to wrap with supermemory capabilities (V2 or V3)
- * @param options.containerTag - Required. The container tag/identifier for memory search (e.g., user ID, project ID)
- * @param options.customId - Required. Custom ID to group messages into a single document
- * @param options.verbose - Optional flag to enable detailed logging of memory search and injection process (default: false)
- * @param options.mode - Optional mode for memory search: "profile", "query", or "full" (default: "profile")
- * @param options.searchMode - Optional search mode: "memories" (default), "hybrid" (memories + chunks), or "documents" (chunks only)
- * @param options.searchLimit - Optional maximum number of search results when using hybrid/documents mode (default: 10)
- * @param options.addMemory - Optional mode for memory persistence: "always" (default - saves conversations), "never" (read-only mode)
- * @param options.apiKey - Optional Supermemory API key to use instead of the environment variable
- * @param options.baseUrl - Optional base URL for the Supermemory API (default: "https://api.supermemory.ai")
+ * @param model - The language model to wrap with supermemory capabilities (V2 or V3)
+ * @param config - Configuration object for Supermemory integration
+ * @param config.containerTag - Required. The container tag/identifier for memory search (e.g., user ID, project ID)
+ * @param config.customId - Required. Custom ID to group messages into a single document
+ * @param config.verbose - Optional flag to enable detailed logging of memory search and injection process (default: false)
+ * @param config.mode - Optional mode for memory search: "profile", "query", or "full" (default: "profile")
+ * @param config.searchMode - Optional search mode: "memories" (default), "hybrid" (memories + chunks), or "documents" (chunks only)
+ * @param config.searchLimit - Optional maximum number of search results when using hybrid/documents mode (default: 10)
+ * @param config.addMemory - Optional mode for memory persistence: "always" (default - saves conversations), "never" (read-only mode)
+ * @param config.apiKey - Optional Supermemory API key to use instead of the environment variable
+ * @param config.baseUrl - Optional base URL for the Supermemory API (default: "https://api.supermemory.ai")
 *
 * @returns A wrapped language model that automatically includes relevant memories in prompts
 *
@ -97,23 +98,27 @@ interface WrapVercelLanguageModelOptions<T extends LanguageModel> {
 * import { generateText } from "ai"
 *
 * // Basic usage with profile memories
- * const modelWithMemory = withSupermemory({
- *   model: openai("gpt-4"),
- *   containerTag: "user-123",
- *   customId: "conv-456",
- *   mode: "full",
- *   addMemory: "always"
- * })
+ * const modelWithMemory = withSupermemory(
+ *   openai("gpt-4"),
+ *   {
+ *     containerTag: "user-123",
+ *     customId: "conv-456",
+ *     mode: "full",
+ *     addMemory: "always"
+ *   }
+ * )
 *
 * // RAG usage with hybrid search (memories + document chunks)
- * const ragModel = withSupermemory({
- *   model: openai("gpt-4"),
- *   containerTag: "user-123",
- *   customId: "conv-789",
- *   mode: "full",
- *   searchMode: "hybrid",  // Search both memories and document chunks
- *   searchLimit: 15,
- * })
+ * const ragModel = withSupermemory(
+ *   openai("gpt-4"),
+ *   {
+ *     containerTag: "user-123",
+ *     customId: "conv-789",
+ *     mode: "full",
+ *     searchMode: "hybrid",  // Search both memories and document chunks
+ *     searchLimit: 15,
+ *   }
+ * )
 *
 * const result = await generateText({
 *   model: ragModel,
@ -121,13 +126,14 @@ interface WrapVercelLanguageModelOptions<T extends LanguageModel> {
 * })
 * ```
 *
- * @throws {Error} When neither `options.apiKey` nor `process.env.SUPERMEMORY_API_KEY` are set
+ * @throws {Error} When neither `config.apiKey` nor `process.env.SUPERMEMORY_API_KEY` are set
 * @throws {Error} When supermemory API request fails
 */
 const wrapVercelLanguageModel = <T extends LanguageModel>(
-	options: WrapVercelLanguageModelOptions<T>,
+	model: T,
+	config: WithSupermemoryConfig,
 ): T => {
-	const { model, containerTag, customId, ...restOptions } = options
+	const { containerTag, customId, ...restOptions } = config
 	const providedApiKey = restOptions.apiKey ?? process.env.SUPERMEMORY_API_KEY

 	if (!providedApiKey) {
@ -149,109 +155,113 @@ const wrapVercelLanguageModel = <T extends LanguageModel>(
 		promptTemplate: restOptions.promptTemplate,
 	})

-	const wrappedModel = {
-		...model,
+	// Use Object.create to preserve prototype chain, then copy own properties
+	const wrappedModel = Object.create(
+		Object.getPrototypeOf(model),
+		Object.getOwnPropertyDescriptors(model),
+	) as T

-		doGenerate: async (params: LanguageModelCallOptions) => {
-			try {
-				const transformedParams = await transformParamsWithMemory(params, ctx)
+	// biome-ignore lint/suspicious/noExplicitAny: Union type compatibility between V2 and V3
+	wrappedModel.doGenerate = async (params: LanguageModelCallOptions): Promise<any> => {
+		try {
+			const transformedParams = await transformParamsWithMemory(params, ctx)

-				// biome-ignore lint/suspicious/noExplicitAny: Union type compatibility between V2 and V3
-				const result = await model.doGenerate(transformedParams as any)
+			// biome-ignore lint/suspicious/noExplicitAny: Union type compatibility between V2 and V3
+			const result = await model.doGenerate(transformedParams as any)

-				const userMessage = getLastUserMessage(params)
-				if (
-					ctx.addMemory === "always" &&
-					ctx.customId &&
-					userMessage &&
-					userMessage.trim()
-				) {
-					const assistantResponseText = extractAssistantResponseText(
-						result.content as unknown[],
-					)
-					saveMemoryAfterResponse(
-						ctx.client,
-						ctx.containerTag,
-						ctx.customId,
-						assistantResponseText,
-						params,
-						ctx.logger,
-						ctx.apiKey,
-						ctx.normalizedBaseUrl,
-					)
-				}
-
-				return result
-			} catch (error) {
-				ctx.logger.error("Error generating response", {
-					error: error instanceof Error ? error.message : "Unknown error",
-				})
-				throw error
-			}
-		},
-
-		doStream: async (params: LanguageModelCallOptions) => {
-			let generatedText = ""
-
-			try {
-				const transformedParams = await transformParamsWithMemory(params, ctx)
-
-				const { stream, ...rest } = await model.doStream(
-					// biome-ignore lint/suspicious/noExplicitAny: Union type compatibility between V2 and V3
-					transformedParams as any,
+			const userMessage = getLastUserMessage(params)
+			if (
+				ctx.addMemory === "always" &&
+				ctx.customId &&
+				userMessage &&
+				userMessage.trim()
+			) {
+				const assistantResponseText = extractAssistantResponseText(
+					result.content as unknown[],
+				)
+				saveMemoryAfterResponse(
+					ctx.client,
+					ctx.containerTag,
+					ctx.customId,
+					assistantResponseText,
+					params,
+					ctx.logger,
+					ctx.apiKey,
+					ctx.normalizedBaseUrl,
 				)
-
-				const transformStream = new TransformStream<
-					LanguageModelStreamPart,
-					LanguageModelStreamPart
-				>({
-					transform(chunk, controller) {
-						if (chunk.type === "text-delta") {
-							generatedText += chunk.delta
-						}
-						controller.enqueue(chunk)
-					},
-					flush: async () => {
-						const userMessage = getLastUserMessage(params)
-						if (
-							ctx.addMemory === "always" &&
-							ctx.customId &&
-							userMessage &&
-							userMessage.trim()
-						) {
-							saveMemoryAfterResponse(
-								ctx.client,
-								ctx.containerTag,
-								ctx.customId,
-								generatedText,
-								params,
-								ctx.logger,
-								ctx.apiKey,
-								ctx.normalizedBaseUrl,
-							)
-						}
-					},
-				})
-
-				return {
-					stream: stream.pipeThrough(transformStream),
-					...rest,
-				}
-			} catch (error) {
-				ctx.logger.error("Error streaming response", {
-					error: error instanceof Error ? error.message : "Unknown error",
-				})
-				throw error
 			}
-		},
-	} as T
+
+			return result
+		} catch (error) {
+			ctx.logger.error("Error generating response", {
+				error: error instanceof Error ? error.message : "Unknown error",
+			})
+			throw error
+		}
+	}
+
+	// biome-ignore lint/suspicious/noExplicitAny: Union type compatibility between V2 and V3
+	wrappedModel.doStream = async (params: LanguageModelCallOptions): Promise<any> => {
+		let generatedText = ""
+
+		try {
+			const transformedParams = await transformParamsWithMemory(params, ctx)
+
+			const { stream, ...rest } = await model.doStream(
+				// biome-ignore lint/suspicious/noExplicitAny: Union type compatibility between V2 and V3
+				transformedParams as any,
+			)
+
+			const transformStream = new TransformStream<
+				LanguageModelStreamPart,
+				LanguageModelStreamPart
+			>({
+				transform(chunk, controller) {
+					if (chunk.type === "text-delta") {
+						generatedText += chunk.delta
+					}
+					controller.enqueue(chunk)
+				},
+				flush: async () => {
+					const userMessage = getLastUserMessage(params)
+					if (
+						ctx.addMemory === "always" &&
+						ctx.customId &&
+						userMessage &&
+						userMessage.trim()
+					) {
+						saveMemoryAfterResponse(
+							ctx.client,
+							ctx.containerTag,
+							ctx.customId,
+							generatedText,
+							params,
+							ctx.logger,
+							ctx.apiKey,
+							ctx.normalizedBaseUrl,
+						)
+					}
+				},
+			})
+
+			return {
+				stream: stream.pipeThrough(transformStream),
+				...rest,
+			}
+		} catch (error) {
+			ctx.logger.error("Error streaming response", {
+				error: error instanceof Error ? error.message : "Unknown error",
+			})
+			throw error
+		}
+	}

 	return wrappedModel
 }

 export {
 	wrapVercelLanguageModel as withSupermemory,
-	type WrapVercelLanguageModelOptions as WithSupermemoryOptions,
+	type WithSupermemoryConfig,
 	type PromptTemplate,
 	type MemoryPromptData,
 }
--- a/packages/tools/src/vercel/memory-prompt.ts
+++ b/packages/tools/src/vercel/memory-prompt.ts
@ -1,144 +1,70 @@
-import { deduplicateMemories } from "../shared"
-import type { Logger } from "./logger"
-import {
-	type LanguageModelCallOptions,
-	convertProfileToMarkdown,
-	type ProfileStructure,
-} from "./util"
+// Re-export shared types and functions
+export {
+	type MemoryPromptData,
+	type PromptTemplate,
+	defaultPromptTemplate,
+	normalizeBaseUrl,
+	buildMemoriesText,
+	type BuildMemoriesTextOptions,
+} from "../shared"

-export const normalizeBaseUrl = (url?: string): string => {
-	const defaultUrl = "https://api.supermemory.ai"
-	if (!url) return defaultUrl
-	return url.endsWith("/") ? url.slice(0, -1) : url
-}
+import type { Logger, MemoryPromptData } from "../shared"
+import type { LanguageModelCallOptions } from "./util"

-const supermemoryProfileSearch = async (
-	containerTag: string,
-	queryText: string,
-	baseUrl: string,
-): Promise<ProfileStructure> => {
-	const payload = queryText
-		? JSON.stringify({
-				q: queryText,
-				containerTag: containerTag,
-			})
-		: JSON.stringify({
-				containerTag: containerTag,
-			})
-
-	try {
-		const response = await fetch(`${baseUrl}/v4/profile`, {
-			method: "POST",
-			headers: {
-				"Content-Type": "application/json",
-				Authorization: `Bearer ${process.env.SUPERMEMORY_API_KEY}`,
-			},
-			body: payload,
-		})
-
-		if (!response.ok) {
-			const errorText = await response.text().catch(() => "Unknown error")
-			throw new Error(
-				`Supermemory profile search failed: ${response.status} ${response.statusText}. ${errorText}`,
-			)
-		}
-
-		return await response.json()
-	} catch (error) {
-		if (error instanceof Error) {
-			throw error
-		}
-		throw new Error(`Supermemory API request failed: ${error}`)
-	}
-}
-
-export const addSystemPrompt = async (
+/**
+ * Extracts the query text from params based on mode.
+ * For "profile" mode, returns empty string (no query needed).
+ * For "query" or "full" mode, extracts the last user message text.
+ *
+ * @param params - The language model call options
+ * @param mode - The memory retrieval mode
+ * @returns The query text for memory search
+ */
+export const extractQueryText = (
 	params: LanguageModelCallOptions,
-	containerTag: string,
-	logger: Logger,
 	mode: "profile" | "query" | "full",
-	baseUrl = "https://api.supermemory.ai",
-): Promise<LanguageModelCallOptions> => {
+): string => {
+	if (mode === "profile") {
+		return ""
+	}
+
+	const userMessage = params.prompt
+		.slice()
+		.reverse()
+		.find((prompt: { role: string }) => prompt.role === "user")
+
+	const content = userMessage?.content
+	if (!content) return ""
+
+	if (typeof content === "string") {
+		return content
+	}
+
+	// biome-ignore lint/suspicious/noExplicitAny: Union type compatibility between V2 and V3
+	return (content as any[])
+		.filter((part) => part.type === "text")
+		.map((part) => part.text || "")
+		.join(" ")
+}
+
+/**
+ * Injects memories string into params by appending to existing system prompt
+ * or creating a new one. Pure function - does not mutate the original params.
+ *
+ * @param params - The language model call options
+ * @param memories - The formatted memories string to inject
+ * @param logger - Logger for debug output
+ * @returns New params with memories injected into the system prompt
+ */
+export const injectMemoriesIntoParams = (
+	params: LanguageModelCallOptions,
+	memories: string,
+	logger: Logger,
+): LanguageModelCallOptions => {
 	const systemPromptExists = params.prompt.some(
 		(prompt) => prompt.role === "system",
 	)

-	const queryText =
-		mode !== "profile"
-			? params.prompt
-					.slice()
-					.reverse()
-					.find((prompt) => prompt.role === "user")
-					?.content?.filter((content) => content.type === "text")
-					?.map((content) => (content.type === "text" ? content.text : ""))
-					?.join(" ") || ""
-			: ""
-
-	const memoriesResponse = await supermemoryProfileSearch(
-		containerTag,
-		queryText,
-		baseUrl,
-	)
-
-	const memoryCountStatic = memoriesResponse.profile.static?.length || 0
-	const memoryCountDynamic = memoriesResponse.profile.dynamic?.length || 0
-
-	logger.info("Memory search completed", {
-		containerTag,
-		memoryCountStatic,
-		memoryCountDynamic,
-		queryText:
-			queryText.substring(0, 100) + (queryText.length > 100 ? "..." : ""),
-		mode,
-	})
-
-	const deduplicated = deduplicateMemories({
-		static: memoriesResponse.profile.static,
-		dynamic: memoriesResponse.profile.dynamic,
-		searchResults: memoriesResponse.searchResults?.results,
-	})
-
-	logger.debug("Memory deduplication completed", {
-		static: {
-			original: memoryCountStatic,
-			deduplicated: deduplicated.static.length,
-		},
-		dynamic: {
-			original: memoryCountDynamic,
-			deduplicated: deduplicated.dynamic.length,
-		},
-		searchResults: {
-			original: memoriesResponse.searchResults.results.length,
-			deduplicated: deduplicated.searchResults?.length,
-		},
-	})
-
-	const profileData =
-		mode !== "query"
-			? convertProfileToMarkdown({
-					profile: {
-						static: deduplicated.static,
-						dynamic: deduplicated.dynamic,
-					},
-					searchResults: { results: [] },
-				})
-			: ""
-	const searchResultsMemories =
-		mode !== "profile"
-			? `Search results for user's recent message: \n${deduplicated.searchResults
-					.map((memory) => `- ${memory}`)
-					.join("\n")}`
-			: ""
-
-	const memories =
-		`User Supermemories: \n${profileData}\n${searchResultsMemories}`.trim()
-	if (memories) {
-		logger.debug("Memory content preview", {
-			content: memories,
-			fullLength: memories.length,
-		})
-	}
-
 	if (systemPromptExists) {
 		logger.debug("Added memories to existing system prompt")
 		// biome-ignore lint/suspicious/noExplicitAny: Union type compatibility between V2 and V3 prompt types
@ -160,3 +86,35 @@ export const addSystemPrompt = async (
 	] as any
 	return { ...params, prompt: newPrompt } as LanguageModelCallOptions
 }
+
+/**
+ * Adds memories to the system prompt by fetching from API and injecting.
+ * This is the original combined function, now implemented via helpers.
+ *
+ * @deprecated Prefer using buildMemoriesText + injectMemoriesIntoParams for caching support
+ */
+export const addSystemPrompt = async (
+	params: LanguageModelCallOptions,
+	containerTag: string,
+	logger: Logger,
+	mode: "profile" | "query" | "full",
+	baseUrl: string,
+	apiKey: string,
+	promptTemplate?: (data: MemoryPromptData) => string,
+): Promise<LanguageModelCallOptions> => {
+	const { buildMemoriesText } = await import("../shared")
+
+	const queryText = extractQueryText(params, mode)
+
+	const memories = await buildMemoriesText({
+		containerTag,
+		queryText,
+		mode,
+		baseUrl,
+		apiKey,
+		logger,
+		promptTemplate,
+	})
+
+	return injectMemoriesIntoParams(params, memories, logger)
+}
--- a/packages/tools/test/with-supermemory/integration.test.ts
+++ b/packages/tools/test/with-supermemory/integration.test.ts
@ -82,8 +82,8 @@ const createIntegrationMockModel = () => {
 		reset: () => {
 			capturedGenerateParams = null
 			capturedStreamParams = null
-			vi.mocked(model.doGenerate).mockClear()
-			vi.mocked(model.doStream).mockClear()
+			;(model.doGenerate as any).mockClear()
+			;(model.doStream as any).mockClear()
 		},
 	}
 }
@ -96,8 +96,7 @@ describe.skipIf(!shouldRunIntegration)(
 				const { model, getCapturedGenerateParams } =
 					createIntegrationMockModel()

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -126,8 +125,7 @@ describe.skipIf(!shouldRunIntegration)(

 				const customId = `test-generate-${Date.now()}`

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -169,8 +167,7 @@ describe.skipIf(!shouldRunIntegration)(

 				const customId = `test-conversation-${Date.now()}`

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -198,8 +195,7 @@ describe.skipIf(!shouldRunIntegration)(
 			it("should fetch memories and stream response", async () => {
 				const { model, getCapturedStreamParams } = createIntegrationMockModel()

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-stream-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -236,8 +232,7 @@ describe.skipIf(!shouldRunIntegration)(

 				const customId = `test-stream-${Date.now()}`

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -278,8 +273,7 @@ describe.skipIf(!shouldRunIntegration)(
 			it("should handle text-delta chunks correctly", async () => {
 				const { model } = createIntegrationMockModel()

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-chunks-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -318,8 +312,7 @@ describe.skipIf(!shouldRunIntegration)(
 				const { model } = createIntegrationMockModel()
 				const fetchSpy = vi.spyOn(globalThis, "fetch")

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-profile-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -358,8 +351,7 @@ describe.skipIf(!shouldRunIntegration)(
 				const { model } = createIntegrationMockModel()
 				const fetchSpy = vi.spyOn(globalThis, "fetch")

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-query-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -398,8 +390,7 @@ describe.skipIf(!shouldRunIntegration)(
 				const { model } = createIntegrationMockModel()
 				const fetchSpy = vi.spyOn(globalThis, "fetch")

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-full-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -444,8 +435,7 @@ describe.skipIf(!shouldRunIntegration)(
 					generalSearchMemories: string
 				}) => `<custom-memories>${data.userMemories}</custom-memories>`

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-template-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -472,8 +462,7 @@ describe.skipIf(!shouldRunIntegration)(
 				const { model, getCapturedGenerateParams } =
 					createIntegrationMockModel()

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-verbose-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -500,8 +489,7 @@ describe.skipIf(!shouldRunIntegration)(
 				const fetchSpy = vi.spyOn(globalThis, "fetch")

 				// Use the configured base URL (or default)
-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-baseurl-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -537,12 +525,11 @@ describe.skipIf(!shouldRunIntegration)(
 				const { model } = createIntegrationMockModel()

 				// Override doGenerate to throw an error
-				vi.mocked(model.doGenerate).mockRejectedValueOnce(
+				;(model.doGenerate as any).mockRejectedValueOnce(
 					new Error("Model error"),
 				)

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-error-${Date.now()}`,
 					apiKey: INTEGRATION_CONFIG.apiKey,
@ -564,8 +551,7 @@ describe.skipIf(!shouldRunIntegration)(
 			it("should handle invalid API key gracefully", async () => {
 				const { model } = createIntegrationMockModel()

-				const wrapped = withSupermemory({
-					model,
+				const wrapped = withSupermemory(model, {
 					containerTag: INTEGRATION_CONFIG.containerTag,
 					customId: `test-invalid-key-${Date.now()}`,
 					apiKey: "invalid-api-key-12345",
--- a/packages/tools/test/with-supermemory/unit.test.ts
+++ b/packages/tools/test/with-supermemory/unit.test.ts
@ -73,8 +73,7 @@ describe("Unit: withSupermemory", () => {
 			const mockModel = createMockLanguageModel()

 			expect(() => {
-				withSupermemory({
-					model: mockModel,
+				withSupermemory(mockModel, {
 					containerTag: TEST_CONFIG.containerTag,
 					customId: "test-conv-123",
 				})
@ -85,8 +84,7 @@ describe("Unit: withSupermemory", () => {
 			process.env.SUPERMEMORY_API_KEY = "test-key"

 			const mockModel = createMockLanguageModel()
-			const wrappedModel = withSupermemory({
-				model: mockModel,
+			const wrappedModel = withSupermemory(mockModel, {
 				containerTag: TEST_CONFIG.containerTag,
 				customId: "test-conv-456",
 			})
@ -107,7 +105,10 @@ describe("Unit: withSupermemory", () => {
 				doStream: vi.fn(),
 			}
 			const inner = Object.create(proto) as LanguageModelV2
-			const wrappedModel = withSupermemory(inner, TEST_CONFIG.containerTag)
+			const wrappedModel = withSupermemory(inner, {
+				containerTag: TEST_CONFIG.containerTag,
+				customId: "test-proto",
+			})

 			expect(wrappedModel.specificationVersion).toBe("v2")
 			expect(wrappedModel.provider).toBe("gateway")
@ -149,7 +150,7 @@ describe("Unit: withSupermemory", () => {
 			await transformParamsWithMemory(params, ctx)

 			expect(ctx.memoryCache).toBeDefined()
-			const turnKey = `${TEST_CONFIG.containerTag}::profile:Hello`
+			const turnKey = `${TEST_CONFIG.containerTag}:test-cache-123:profile:Hello`
 			const cachedMemories = ctx.memoryCache.get(turnKey)
 			expect(cachedMemories).toBeDefined()
 			expect(cachedMemories).toContain("Cached memory")