Agent Memory Systems: Engineering Short-term, Long-term & Working Memory
记忆系统是 AI Agent 区别于无状态 API 调用的核心能力。一个优秀的记忆系统能让 Agent 在长期交互中保持上下文连贯性、学习用户偏好、积累领域知识。本文将深入探讨 Agent 记忆系统的架构设计与实现方法。
记忆系统架构总览
┌─────────────────────────────────────────────────────────────────┐
│ Memory Manager │
│ ┌──────────────────────────────────────────────────────────┐ │
│ │ Memory Controller │ │
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
│ │ │Consolid-│ │Retrieval│ │Forgetting│ │Conflict│ │ │
│ │ │ation │ │Strategy │ │Policy │ │Resolut.│ │ │
│ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ │
│ └──────────────────────────────────────────────────────────┘ │
│ │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ Short-term │ │ Working │ │ Long-term │ │
│ │ Memory │ │ Memory │ │ Memory │ │
│ │ │ │ │ │ │ │
│ │ • Buffer │ │ • Scratchpad │ │ • Vector DB │ │
│ │ • Summary │ │ • Context │ │ • Knowledge │ │
│ │ • Sliding │ │ Variables │ │ Graph │ │
│ │ Window │ │ • Task State │ │ • Episodic │ │
│ └──────────────┘ └──────────────┘ └──────────────┘ │
└─────────────────────────────────────────────────────────────────┘
短期记忆(Short-term Memory)
短期记忆负责维护当前对话的上下文,是最基础的记忆形式。
对话缓冲区(Conversation Buffer)
最简单的方式是保存完整对话历史:
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
@dataclass
class Message:
role: str # "user", "assistant", "system", "tool"
content: str
timestamp: datetime = field(default_factory=datetime.utcnow)
metadata: dict = field(default_factory=dict)
class ConversationBufferMemory:
"""完整对话缓冲区记忆"""
def __init__(self, max_tokens: int = 100000):
self.messages: list[Message] = []
self.max_tokens = max_tokens
def add_message(self, role: str, content: str, metadata: dict = None):
self.messages.append(Message(
role=role,
content=content,
metadata=metadata or {}
))
def get_messages(self) -> list[dict]:
return [{"role": m.role, "content": m.content} for m in self.messages]
def get_token_count(self) -> int:
return sum(len(m.content) // 4 for m in self.messages)
def clear(self):
self.messages = []
滑动窗口记忆(Sliding Window)
当对话变得很长时,滑动窗口可以控制上下文大小:
class SlidingWindowMemory:
"""滑动窗口记忆"""
def __init__(self, window_size: int = 20, preserve_system: bool = True):
self.messages: list[Message] = []
self.window_size = window_size
self.preserve_system = preserve_system
def add_message(self, role: str, content: str, metadata: dict = None):
self.messages.append(Message(role=role, content=content))
def get_messages(self) -> list[dict]:
if len(self.messages) <= self.window_size:
return [{"role": m.role, "content": m.content} for m in self.messages]
# 保留系统消息 + 最近的 N 条消息
system_msgs = []
other_msgs = []
for msg in self.messages:
if self.preserve_system and msg.role == "system":
system_msgs.append(msg)
else:
other_msgs.append(msg)
recent_msgs = other_msgs[-self.window_size:]
all_msgs = system_msgs + recent_msgs
return [{"role": m.role, "content": m.content} for m in all_msgs]
摘要记忆(Summary Memory)
摘要记忆通过压缩早期对话来节省 token:
class SummaryBufferMemory:
"""带摘要的缓冲区记忆"""
def __init__(self, llm_client, max_recent_messages: int = 10):
self.messages: list[Message] = []
self.summary: str = ""
self.llm = llm_client
self.max_recent = max_recent_messages
def add_message(self, role: str, content: str, metadata: dict = None):
self.messages.append(Message(role=role, content=content))
# 超过阈值时触发摘要
if len(self.messages) > self.max_recent * 2:
self._summarize_old_messages()
async def _summarize_old_messages(self):
"""压缩早期消息为摘要"""
to_summarize = self.messages[:-self.max_recent]
conversation_text = "\n".join(
f"{m.role}: {m.content}" for m in to_summarize
)
prompt = f"""请将以下对话压缩为简洁的摘要,保留关键信息:
{conversation_text}
摘要:"""
new_summary = await self.llm.generate(prompt)
if self.summary:
self.summary = f"{self.summary}\n\n[更新] {new_summary}"
else:
self.summary = new_summary
# 只保留最近的消息
self.messages = self.messages[-self.max_recent:]
def get_messages(self) -> list[dict]:
result = []
if self.summary:
result.append({
"role": "system",
"content": f"先前对话摘要:\n{self.summary}"
})
result.extend(
{"role": m.role, "content": m.content}
for m in self.messages
)
return result
工作记忆(Working Memory)
工作记忆是 Agent 在执行任务时的"便签本",存储中间状态和临时信息。
上下文变量管理
class WorkingMemory:
"""Agent 工作记忆"""
def __init__(self):
self.variables: dict = {} # 上下文变量
self.scratchpad: list[str] = [] # 推理步骤记录
self.task_stack: list[dict] = [] # 任务栈
self.hypotheses: list[dict] = [] # 假设列表
def set_variable(self, key: str, value, ttl: int = None):
"""设置上下文变量"""
self.variables[key] = {
"value": value,
"created_at": datetime.utcnow(),
"ttl": ttl
}
def get_variable(self, key: str, default=None):
"""获取上下文变量"""
var = self.variables.get(key)
if var is None:
return default
# 检查 TTL
if var["ttl"]:
elapsed = (datetime.utcnow() - var["created_at"]).seconds
if elapsed > var["ttl"]:
del self.variables[key]
return default
return var["value"]
def add_scratchpad_entry(self, entry: str):
"""添加推理步骤"""
self.scratchpad.append(f"[{datetime.utcnow().isoformat()}] {entry}")
def get_scratchpad(self) -> str:
"""获取完整的推理过程"""
return "\n".join(self.scratchpad)
def push_task(self, task: dict):
"""推入子任务"""
self.task_stack.append(task)
def pop_task(self) -> Optional[dict]:
"""弹出当前任务"""
return self.task_stack.pop() if self.task_stack else None
def add_hypothesis(self, hypothesis: str, confidence: float):
"""记录假设"""
self.hypotheses.append({
"hypothesis": hypothesis,
"confidence": confidence,
"timestamp": datetime.utcnow()
})
def get_best_hypothesis(self) -> Optional[str]:
"""获取置信度最高的假设"""
if not self.hypotheses:
return None
best = max(self.hypotheses, key=lambda h: h["confidence"])
return best["hypothesis"]
def get_context_for_llm(self) -> str:
"""生成供 LLM 使用的上下文"""
parts = []
if self.variables:
vars_str = "\n".join(
f" - {k}: {v['value']}" for k, v in self.variables.items()
)
parts.append(f"当前上下文变量:\n{vars_str}")
if self.scratchpad:
parts.append(f"推理过程:\n{self.get_scratchpad()}")
if self.task_stack:
task_str = " → ".join(t["name"] for t in self.task_stack)
parts.append(f"任务栈: {task_str}")
if self.hypotheses:
parts.append(f"当前最佳假设: {self.get_best_hypothesis()}")
return "\n\n".join(parts)
长期记忆(Long-term Memory)
长期记忆使用向量数据库存储和检索历史信息。
向量记忆实现
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
class LongTermMemory:
"""基于向量数据库的长期记忆"""
def __init__(self, collection_name: str = "agent_memory"):
# 初始化嵌入模型
self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
# 初始化 ChromaDB
self.client = chromadb.Client(Settings(
chroma_db_impl="duckdb+parquet",
persist_directory="./memory_db"
))
self.collection = self.client.get_or_create_collection(
name=collection_name,
metadata={"hnsw:space": "cosine"}
)
def store_memory(self, content: str, memory_type: str,
metadata: dict = None, importance: float = 1.0):
"""存储记忆"""
embedding = self.embedder.encode(content).tolist()
memory_id = f"{memory_type}_{datetime.utcnow().timestamp()}"
self.collection.add(
ids=[memory_id],
embeddings=[embedding],
documents=[content],
metadatas=[{
"type": memory_type,
"importance": importance,
"created_at": datetime.utcnow().isoformat(),
**(metadata or {})
}]
)
def retrieve_memories(self, query: str, top_k: int = 5,
memory_type: str = None,
min_importance: float = 0.0) -> list[dict]:
"""检索相关记忆"""
query_embedding = self.embedder.encode(query).tolist()
# 构建过滤条件
where_filter = {}
if memory_type:
where_filter["type"] = memory_type
if min_importance > 0:
where_filter["importance"] = {"$gte": min_importance}
results = self.collection.query(
query_embeddings=[query_embedding],
n_results=top_k,
where=where_filter if where_filter else None
)
memories = []
for i, doc in enumerate(results["documents"][0]):
memories.append({
"content": doc,
"metadata": results["metadatas"][0][i],
"distance": results["distances"][0][i]
})
return memories
def update_memory_importance(self, memory_id: str, new_importance: float):
"""更新记忆重要性"""
result = self.collection.get(ids=[memory_id])
if result["ids"]:
metadata = result["metadatas"][0]
metadata["importance"] = new_importance
self.collection.update(
ids=[memory_id],
metadatas=[metadata]
)
def forget_old_memories(self, days_threshold: int = 90,
min_importance: float = 0.3):
"""遗忘不重要的旧记忆"""
cutoff_date = (
datetime.utcnow() - timedelta(days=days_threshold)
).isoformat()
results = self.collection.get(
where={
"$and": [
{"created_at": {"$lt": cutoff_date}},
{"importance": {"$lt": min_importance}}
]
}
)
if results["ids"]:
self.collection.delete(ids=results["ids"])
情节记忆(Episodic Memory)
情节记忆记录具体的交互经历,类似于人类的"事件回忆":
class EpisodicMemory:
"""情节记忆:记录具体的交互经历"""
def __init__(self, long_term_memory: LongTermMemory):
self.ltm = long_term_memory
def store_episode(self, episode: dict):
"""存储一个情节"""
# 将情节编码为自然语言描述
description = self._episode_to_text(episode)
self.ltm.store_memory(
content=description,
memory_type="episodic",
metadata={
"session_id": episode["session_id"],
"outcome": episode["outcome"],
"emotion": episode.get("emotion", "neutral")
},
importance=self._calculate_importance(episode)
)
def recall_similar_episodes(self, current_situation: str,
top_k: int = 3) -> list[dict]:
"""回忆类似的经历"""
return self.ltm.retrieve_memories(
query=current_situation,
top_k=top_k,
memory_type="episodic"
)
def _episode_to_text(self, episode: dict) -> str:
"""将情节结构化数据转为自然语言"""
return (
f"在 {episode['timestamp']},用户说:{episode['user_input']}。"
f"我采取了行动:{episode['action']}。"
f"结果是:{episode['outcome']}。"
f"学到了:{episode.get('lesson', '无')}"
)
def _calculate_importance(self, episode: dict) -> float:
"""计算情节的重要性"""
importance = 0.5
# 成功的任务增加重要性
if episode["outcome"] == "success":
importance += 0.2
# 包含学习的内容更重要
if episode.get("lesson"):
importance += 0.3
# 用户反馈好的更重要
if episode.get("user_feedback") == "positive":
importance += 0.2
return min(importance, 1.0)
语义记忆(Semantic Memory)
语义记忆存储抽象的知识和概念:
import json
class SemanticMemory:
"""语义记忆:存储抽象知识"""
def __init__(self, long_term_memory: LongTermMemory):
self.ltm = long_term_memory
self.knowledge_graph = {} # 简化的知识图谱
def store_knowledge(self, concept: str, facts: list[str],
relations: list[dict] = None):
"""存储概念知识"""
# 存储到向量数据库
knowledge_text = f"关于 {concept}:\n" + "\n".join(f"- {f}" for f in facts)
self.ltm.store_memory(
content=knowledge_text,
memory_type="semantic",
metadata={"concept": concept},
importance=0.8
)
# 更新知识图谱
self.knowledge_graph[concept] = {
"facts": facts,
"relations": relations or []
}
def query_knowledge(self, query: str, top_k: int = 5) -> list[dict]:
"""查询知识"""
return self.ltm.retrieve_memories(
query=query,
top_k=top_k,
memory_type="semantic",
min_importance=0.5
)
def get_related_concepts(self, concept: str) -> list[str]:
"""获取相关概念"""
if concept not in self.knowledge_graph:
return []
relations = self.knowledge_graph[concept]["relations"]
return [r["target"] for r in relations]
记忆整合与检索策略
记忆整合(Memory Consolidation)
class MemoryConsolidator:
"""记忆整合:将短期记忆转化为长期记忆"""
def __init__(self, llm_client, long_term_memory: LongTermMemory):
self.llm = llm_client
self.ltm = long_term_memory
async def consolidate_session(self, session_messages: list[dict]):
"""整合一次会话的记忆"""
# 1. 提取关键信息
key_info = await self._extract_key_information(session_messages)
# 2. 分类存储
for info in key_info:
if info["type"] == "user_preference":
self.ltm.store_memory(
content=info["content"],
memory_type="semantic",
metadata={"subcategory": "user_preference"},
importance=0.9
)
elif info["type"] == "fact":
self.ltm.store_memory(
content=info["content"],
memory_type="semantic",
metadata={"subcategory": "learned_fact"},
importance=0.7
)
elif info["type"] == "episode":
self.ltm.store_memory(
content=info["content"],
memory_type="episodic",
importance=info.get("importance", 0.5)
)
async def _extract_key_information(self, messages: list[dict]) -> list[dict]:
"""使用 LLM 提取关键信息"""
conversation = "\n".join(
f"{m['role']}: {m['content']}" for m in messages
)
prompt = f"""分析以下对话,提取需要长期记住的信息。返回 JSON 格式:
对话:
{conversation}
请提取:
用户偏好(如编程语言、代码风格)
学到的事实(如项目结构、技术选型)
重要的交互事件
返回格式:
[{{"type": "user_preference|fact|episode", "content": "...", "importance": 0.0-1.0}}]"""
response = await self.llm.generate(prompt)
return json.loads(response)
多策略检索
class MemoryRetriever:
"""智能记忆检索器"""
def __init__(self, short_term, working_memory, long_term_memory):
self.short_term = short_term
self.working = working_memory
self.long_term = long_term_memory
async def retrieve_relevant_context(self, query: str,
strategy: str = "hybrid") -> str:
"""根据策略检索相关上下文"""
contexts = []
# 1. 从短期记忆获取最近对话
if strategy in ["hybrid", "recent"]:
recent = self.short_term.get_messages()[-5:]
if recent:
contexts.append("最近对话:\n" + "\n".join(
f"{m['role']}: {m['content']}" for m in recent
))
# 2. 从工作记忆获取当前上下文
if strategy in ["hybrid", "working"]:
working_ctx = self.working.get_context_for_llm()
if working_ctx:
contexts.append(f"当前工作状态:\n{working_ctx}")
# 3. 从长期记忆检索相关历史
if strategy in ["hybrid", "semantic"]:
memories = self.long_term.retrieve_memories(
query=query,
top_k=3,
min_importance=0.5
)
if memories:
memory_texts = [m["content"] for m in memories]
contexts.append("相关记忆:\n" + "\n---\n".join(memory_texts))
return "\n\n==========\n\n".join(contexts)
完整的记忆管理器
class MemoryManager:
"""统一记忆管理器"""
def __init__(self, session_id: str, llm_client, redis_client):
self.session_id = session_id
# 初始化各层记忆
self.short_term = SummaryBufferMemory(llm_client)
self.working = WorkingMemory()
self.long_term = LongTermMemory(collection_name=f"agent_{session_id}")
# 初始化辅助组件
self.episodic = EpisodicMemory(self.long_term)
self.semantic = SemanticMemory(self.long_term)
self.consolidator = MemoryConsolidator(llm_client, self.long_term)
self.retriever = MemoryRetriever(
self.short_term, self.working, self.long_term
)
self.llm = llm_client
async def process_interaction(self, user_input: str, agent_response: str):
"""处理一次完整交互"""
# 1. 记录到短期记忆
self.short_term.add_message("user", user_input)
self.short_term.add_message("assistant", agent_response)
# 2. 如果对话过长,触发整合
if self.short_term.get_token_count() > 50000:
messages = self.short_term.get_messages()
await self.consolidator.consolidate_session(messages)
async def build_context(self, query: str) -> str:
"""为当前查询构建完整上下文"""
return await self.retriever.retrieve_relevant_context(query)
def update_working_memory(self, key: str, value):
"""更新工作记忆"""
self.working.set_variable(key, value)
def record_reasoning_step(self, step: str):
"""记录推理步骤"""
self.working.add_scratchpad_entry(step)
最佳实践
总结
Agent 记忆系统的设计直接影响 Agent 的智能程度和用户体验。通过合理的短期、工作、长期记忆分层,配合记忆整合和智能检索策略,可以构建出具有"经验积累"能力的智能 Agent。