Back to Blog
RAGAgentArchitecture

Agentic RAG Explained: When RAG Meets Autonomous Agents

2026-04-0511 min

检索增强生成(RAG)是将外部知识注入大语言模型的关键技术。从最初的朴素 RAG 到如今的 Agentic RAG,这一技术经历了巨大的演进。本文将深入剖析 Agentic RAG 的核心思想、技术架构和实现方法。

RAG 的演进历程

第一代:朴素 RAG(Naive RAG)

用户查询 → 向量检索 → 拼接上下文 → LLM 生成 → 输出

朴素 RAG 的问题:

  • 查询质量差时检索不到相关内容
  • 检索结果可能不相关或冗余
  • 无法处理需要多步推理的复杂问题
  • 无法自我验证答案质量
  • 第二代:高级 RAG(Advanced RAG)

    引入查询重写、重排序等优化:

    用户查询 → 查询优化 → 向量检索 → 重排序 → LLM 生成 → 输出
    

    第三代:Agentic RAG

    将 Agent 的规划、工具使用和自我反思能力引入 RAG:

    用户查询 → [Agent 规划] → [自适应检索] → [自我验证] → [迭代改进] → 输出
    

    ↻ 循环直到满意

    Agentic RAG 的核心组件

    1. 查询分解(Query Decomposition)

    复杂查询被分解为多个子查询:

    from typing import List
    

    import json

    class QueryDecomposer:

    """查询分解器"""

    def __init__(self, llm_client):

    self.llm = llm_client

    async def decompose(self, query: str) -> List[dict]:

    """将复杂查询分解为子查询"""

    prompt = f"""你是一个查询分析专家。请将以下复杂查询分解为多个简单的子查询。

    原始查询: {query}

    分析要求:

  • 识别查询中包含的多个信息需求
  • 每个子查询应该独立可回答
  • 标注子查询之间的依赖关系
  • 为每个子查询指定最合适的检索策略
  • 返回 JSON 格式:

    {{

    "sub_queries": [

    {{

    "id": "q1",

    "query": "子查询内容",

    "strategy": "semantic|keyword|hybrid",

    "depends_on": [],

    "priority": 1

    }}

    ],

    "reasoning": "分解理由"

    }}"""

    response = await self.llm.generate(prompt)

    return json.loads(response)

    async def adapt_query(self, original_query: str,

    failed_context: str) -> str:

    """根据检索失败的情况调整查询"""

    prompt = f"""检索未找到相关信息,请重新表述查询。

    原始查询: {original_query}

    检索结果: {failed_context}

    请提供一个新的查询表述,可能更容易找到相关信息:

    """

    return await self.llm.generate(prompt)

    2. 自适应检索(Adaptive Retrieval)

    根据查询类型和上下文动态选择检索策略:

    from enum import Enum
    

    from dataclasses import dataclass

    class RetrievalStrategy(Enum):

    SEMANTIC = "semantic" # 语义相似度检索

    KEYWORD = "keyword" # 关键词检索

    HYBRID = "hybrid" # 混合检索

    GRAPH = "graph" # 知识图谱检索

    SQL = "sql" # 结构化查询

    @dataclass

    class RetrievalResult:

    content: str

    source: str

    score: float

    metadata: dict

    class AdaptiveRetriever:

    """自适应检索器"""

    def __init__(self, vector_store, keyword_index, graph_db, sql_engine):

    self.vector_store = vector_store

    self.keyword_index = keyword_index

    self.graph_db = graph_db

    self.sql_engine = sql_engine

    async def retrieve(self, query: str, strategy: RetrievalStrategy = None,

    top_k: int = 5) -> List[RetrievalResult]:

    """根据策略执行检索"""

    if strategy is None:

    strategy = await self._determine_strategy(query)

    match strategy:

    case RetrievalStrategy.SEMANTIC:

    return await self._semantic_search(query, top_k)

    case RetrievalStrategy.KEYWORD:

    return await self._keyword_search(query, top_k)

    case RetrievalStrategy.HYBRID:

    return await self._hybrid_search(query, top_k)

    case RetrievalStrategy.GRAPH:

    return await self._graph_search(query, top_k)

    case RetrievalStrategy.SQL:

    return await self._sql_search(query, top_k)

    async def _determine_strategy(self, query: str) -> RetrievalStrategy:

    """自动确定最佳检索策略"""

    # 简单规则:包含特定模式的查询使用不同策略

    if any(op in query.upper() for op in ["SELECT", "COUNT", "AVG", "SUM"]):

    return RetrievalStrategy.SQL

    if "关系" in query or "连接" in query or "关联" in query:

    return RetrievalStrategy.GRAPH

    # 默认使用混合检索

    return RetrievalStrategy.HYBRID

    async def _semantic_search(self, query: str, top_k: int) -> List[RetrievalResult]:

    """语义向量检索"""

    embedding = await self.get_embedding(query)

    results = self.vector_store.similarity_search_with_score(

    embedding, k=top_k

    )

    return [

    RetrievalResult(

    content=doc.page_content,

    source=doc.metadata.get("source", "unknown"),

    score=score,

    metadata=doc.metadata

    )

    for doc, score in results

    ]

    async def _keyword_search(self, query: str, top_k: int) -> List[RetrievalResult]:

    """关键词检索"""

    results = self.keyword_index.search(query, limit=top_k)

    return [

    RetrievalResult(

    content=hit.document.content,

    source=hit.document.source,

    score=hit.score,

    metadata=hit.document.metadata

    )

    for hit in results

    ]

    async def _hybrid_search(self, query: str, top_k: int) -> List[RetrievalResult]:

    """混合检索:结合语义和关键词"""

    semantic_results = await self._semantic_search(query, top_k)

    keyword_results = await self._keyword_search(query, top_k)

    # 融合排序

    combined = self._reciprocal_rank_fusion(

    semantic_results, keyword_results, k=top_k

    )

    return combined

    def _reciprocal_rank_fusion(self, *result_lists, k=60, top_k=5):

    """倒数排名融合"""

    scores = {}

    for results in result_lists:

    for rank, result in enumerate(results):

    key = result.content[:100] # 用内容前缀作为 key

    if key not in scores:

    scores[key] = {"result": result, "score": 0}

    scores[key]["score"] += 1 / (k + rank + 1)

    sorted_results = sorted(scores.values(), key=lambda x: x["score"], reverse=True)

    return [item["result"] for item in sorted_results[:top_k]]

    3. 自我反思与验证(Self-Reflection)

    Agentic RAG 的核心能力——评估自身输出质量并迭代改进:

    class SelfReflectiveRAG:
    

    """带自我反思的 RAG Agent"""

    def __init__(self, llm_client, retriever: AdaptiveRetriever):

    self.llm = llm_client

    self.retriever = retriever

    self.max_iterations = 3

    async def answer(self, query: str) -> dict:

    """生成带自我验证的回答"""

    context = ""

    history = []

    for iteration in range(self.max_iterations):

    # 1. 生成回答

    answer = await self._generate_answer(query, context)

    # 2. 评估回答质量

    evaluation = await self._evaluate_answer(query, answer, context)

    history.append({

    "iteration": iteration,

    "answer": answer,

    "evaluation": evaluation

    })

    # 3. 判断是否满意

    if evaluation["is_satisfactory"]:

    return {

    "answer": answer,

    "iterations": iteration + 1,

    "confidence": evaluation["confidence"],

    "history": history

    }

    # 4. 根据评估结果改进

    if evaluation["needs_more_info"]:

    # 需要更多信息,重新检索

    new_queries = await self._generate_search_queries(

    query, answer, evaluation["gaps"]

    )

    for q in new_queries:

    results = await self.retriever.retrieve(q)

    context += "\n" + "\n".join(r.content for r in results)

    elif evaluation["needs_correction"]:

    # 需要纠正,调整生成策略

    context = await self._refine_context(query, context, evaluation)

    # 达到最大迭代次数

    return {

    "answer": answer,

    "iterations": self.max_iterations,

    "confidence": evaluation["confidence"],

    "warning": "已达到最大迭代次数",

    "history": history

    }

    async def _evaluate_answer(self, query: str, answer: str,

    context: str) -> dict:

    """评估回答质量"""

    prompt = f"""请评估以下 AI 回答的质量。

    用户问题: {query}

    检索到的上下文:

    {context}

    AI 回答:

    {answer}

    请从以下维度评估(1-10分):

  • 相关性:回答是否切题
  • 完整性:是否充分回答了问题
  • 准确性:信息是否正确(基于上下文)
  • 有据性:是否有上下文支持
  • 返回 JSON:

    {{

    "relevance": 0-10,

    "completeness": 0-10,

    "accuracy": 0-10,

    "groundedness": 0-10,

    "confidence": 0.0-1.0,

    "is_satisfactory": true/false,

    "needs_more_info": true/false,

    "needs_correction": true/false,

    "gaps": ["缺失信息列表"],

    "issues": ["问题列表"]

    }}"""

    response = await self.llm.generate(prompt)

    return json.loads(response)

    async def _generate_search_queries(self, original_query: str,

    current_answer: str,

    gaps: list) -> list:

    """根据评估结果生成新的搜索查询"""

    prompt = f"""基于以下信息生成补充搜索查询。

    原始问题: {original_query}

    当前回答: {current_answer}

    识别到的信息缺口: {gaps}

    请生成 2-3 个具体的搜索查询来填补这些缺口。返回 JSON 数组:["query1", "query2"]"""

    response = await self.llm.generate(prompt)

    return json.loads(response)

    async def _generate_answer(self, query: str, context: str) -> str:

    """生成回答"""

    prompt = f"""基于以下上下文回答用户问题。如果上下文不足,请明确说明。

    上下文:

    {context}

    问题: {query}

    回答:"""

    return await self.llm.generate(prompt)

    async def _refine_context(self, query: str, context: str,

    evaluation: dict) -> str:

    """精炼上下文,去除不相关内容"""

    prompt = f"""以下上下文可能包含不相关信息,请筛选出与问题最相关的部分。

    问题: {query}

    评估问题: {evaluation.get('issues', [])}

    上下文:

    {context}

    请返回精炼后的上下文:"""

    return await self.llm.generate(prompt)

    4. 纠正性 RAG(Corrective RAG)

    CRAG 在检索后加入验证步骤,纠正不准确的检索结果:

    class CorrectiveRAG:
    

    """纠正性 RAG"""

    def __init__(self, llm_client, retriever: AdaptiveRetriever,

    web_search_tool=None):

    self.llm = llm_client

    self.retriever = retriever

    self.web_search = web_search_tool

    async def answer(self, query: str) -> dict:

    """执行纠正性 RAG"""

    # 1. 初始检索

    initial_results = await self.retriever.retrieve(query, top_k=5)

    # 2. 评估检索结果

    evaluations = []

    for result in initial_results:

    eval_result = await self._evaluate_retrieval(query, result.content)

    evaluations.append({

    "content": result.content,

    "evaluation": eval_result

    })

    # 3. 分类处理

    correct_docs = []

    incorrect_docs = []

    ambiguous_docs = []

    for item in evaluations:

    verdict = item["evaluation"]["verdict"]

    if verdict == "correct":

    correct_docs.append(item["content"])

    elif verdict == "incorrect":

    incorrect_docs.append(item)

    else:

    ambiguous_docs.append(item)

    # 4. 根据评估结果采取行动

    final_context = list(correct_docs)

    if len(correct_docs) < 2:

    # 正确文档太少,需要补充检索

    if self.web_search:

    web_results = await self.web_search.search(query)

    final_context.extend(web_results[:3])

    # 尝试不同的检索策略

    alt_results = await self.retriever.retrieve(

    query, strategy=RetrievalStrategy.KEYWORD, top_k=3

    )

    final_context.extend([r.content for r in alt_results])

    # 5. 知识精炼

    if ambiguous_docs:

    refined = await self._refine_ambiguous(ambiguous_docs, query)

    final_context.extend(refined)

    # 6. 生成最终回答

    answer = await self._generate_answer(query, final_context)

    return {

    "answer": answer,

    "sources_used": len(final_context),

    "corrections_made": len(incorrect_docs),

    "supplementary_searches": len(correct_docs) < 2

    }

    async def _evaluate_retrieval(self, query: str, document: str) -> dict:

    """评估单个检索结果的相关性和正确性"""

    prompt = f"""评估以下检索到的文档是否能够帮助回答用户问题。

    用户问题: {query}

    检索到的文档:

    {document}

    请评估:

  • 相关性(与问题是否相关)
  • 正确性(信息是否准确)
  • 充分性(是否包含足够信息)
  • 返回 JSON:

    {{

    "relevance_score": 0-10,

    "correctness_score": 0-10,

    "sufficiency_score": 0-10,

    "verdict": "correct|incorrect|ambiguous",

    "reason": "评估理由"

    }}"""

    response = await self.llm.generate(prompt)

    return json.loads(response)

    async def _refine_ambiguous(self, ambiguous_docs: list,

    query: str) -> list:

    """精炼模糊的文档"""

    refined = []

    for doc in ambiguous_docs:

    prompt = f"""从以下文档中提取与问题相关的关键信息。

    问题: {query}

    文档: {doc["content"]}

    请只返回相关的事实和数据:"""

    extracted = await self.llm.generate(prompt)

    refined.append(extracted)

    return refined

    async def _generate_answer(self, query: str, context: list) -> str:

    prompt = f"""基于以下经过验证的上下文回答问题。

    上下文:

    {"\n---\n".join(context)}

    问题:{query}

    请提供准确、有据可查的回答:"""

    return await self.llm.generate(prompt)

    完整的 Agentic RAG 系统

    class AgenticRAGSystem:
    

    """完整的 Agentic RAG 系统"""

    def __init__(self, llm_client, vector_store, keyword_index,

    graph_db=None, web_search=None):

    self.llm = llm_client

    # 初始化各组件

    self.retriever = AdaptiveRetriever(

    vector_store, keyword_index, graph_db, None

    )

    self.decomposer = QueryDecomposer(llm_client)

    self.reflective_rag = SelfReflectiveRAG(llm_client, self.retriever)

    self.corrective_rag = CorrectiveRAG(llm_client, self.retriever, web_search)

    async def query(self, question: str, mode: str = "auto") -> dict:

    """处理用户查询"""

    # 1. 查询分析

    analysis = await self._analyze_query(question)

    # 2. 根据复杂度选择模式

    if mode == "auto":

    mode = "simple" if analysis["complexity"] < 0.5 else "agentic"

    match mode:

    case "simple":

    return await self._simple_rag(question)

    case "agentic":

    return await self._agentic_rag(question, analysis)

    async def _analyze_query(self, query: str) -> dict:

    """分析查询特征"""

    prompt = f"""分析以下查询的特征:

    查询: {query}

    返回 JSON:

    {{

    "complexity": 0.0-1.0,

    "type": "factual|analytical|comparative|procedural",

    "requires_decomposition": true/false,

    "requires_multiple_sources": true/false,

    "domain": "general|technical|scientific|business"

    }}"""

    response = await self.llm.generate(prompt)

    return json.loads(response)

    async def _simple_rag(self, query: str) -> dict:

    """简单 RAG 流程"""

    results = await self.retriever.retrieve(query)

    context = "\n".join(r.content for r in results)

    prompt = f"""基于以下上下文回答问题。

    上下文:

    {context}

    问题: {query}

    回答:"""

    answer = await self.llm.generate(prompt)

    return {"answer": answer, "mode": "simple", "sources": len(results)}

    async def _agentic_rag(self, query: str, analysis: dict) -> dict:

    """Agentic RAG 流程"""

    # 1. 如果需要,分解查询

    if analysis["requires_decomposition"]:

    decomp = await self.decomposer.decompose(query)

    sub_answers = []

    for sub_q in decomp["sub_queries"]:

    sub_result = await self.reflective_rag.answer(sub_q["query"])

    sub_answers.append({

    "query": sub_q["query"],

    "answer": sub_result["answer"]

    })

    # 综合子答案

    synthesis = await self._synthesize_answers(query, sub_answers)

    return {

    "answer": synthesis,

    "mode": "agentic_decomposed",

    "sub_queries": len(sub_answers)

    }

    else:

    # 2. 使用自我反思 RAG

    result = await self.reflective_rag.answer(query)

    return {

    "answer": result["answer"],

    "mode": "agentic_reflective",

    "iterations": result["iterations"],

    "confidence": result["confidence"]

    }

    async def _synthesize_answers(self, original_query: str,

    sub_answers: list) -> str:

    """综合多个子答案"""

    answers_text = "\n".join(

    f"问题: {sa['query']}\n答案: {sa['answer']}"

    for sa in sub_answers

    )

    prompt = f"""基于以下多个子问题的答案,综合回答原始问题。

    原始问题: {original_query}

    子问题及答案:

    {answers_text}

    请提供一个完整的、连贯的综合回答:"""

    return await self.llm.generate(prompt)

    使用示例

    import asyncio
    
    

    async def main():

    # 初始化组件

    llm = AnthropicClient(model="claude-sonnet-4-20250514")

    vector_store = ChromaDBStore("knowledge_base")

    keyword_index = ElasticsearchIndex("documents")

    # 创建 Agentic RAG 系统

    rag = AgenticRAGSystem(llm, vector_store, keyword_index)

    # 简单查询

    result = await rag.query("公司的退货政策是什么?")

    print(result["answer"])

    # 复杂查询(自动触发 Agentic 流程)

    result = await rag.query(

    "比较我们产品和竞品 A 的性能差异,并分析我们在市场上的优劣势"

    )

    print(result["answer"])

    asyncio.run(main())

    性能优化建议

    | 优化策略 | 效果 | 实现复杂度 |
    |---------|------|-----------|
    | 查询缓存 | ⭐⭐⭐⭐ | 低 |
    | 异步并行检索 | ⭐⭐⭐ | 中 |
    | 检索结果缓存 | ⭐⭐⭐ | 低 |
    | 模型选择优化 | ⭐⭐⭐⭐ | 低 |
    | 批量评估 | ⭐⭐ | 中 |

    总结

    Agentic RAG 代表了检索增强技术的最新方向。通过查询分解、自适应检索、自我反思和纠正机制,Agentic RAG 能够处理更复杂的查询,提供更准确、更可靠的答案。在构建企业级知识问答系统时,Agentic RAG 是值得深入研究的技术方向。