04-从零手写 ReAct 循环：Agent 的心跳是怎么转起来的

直接看代码注释

"""练习 3：从零手写 ReAct 循环（对齐官方仓库写法）—— Agent 的“心跳”

对应 awesome-agentic-ai-zh / examples/stage-3/03-react-from-scratch。

这里的“从零”= 不用 LangGraph / CrewAI 这类框架，自己用一个 while 把
    Thought → Action → Observation → Thought → …
转起来；但【工具调用仍走 API 原生的 tool_calls】，而不是自己解析文本
（自己解析文本的经典写法见对照文件 04-react-text-protocol.py）。
自己写一遍这个循环，才会真正搞懂框架替你藏起来的 4 件事：
    1) messages 数组为什么会越滚越长（每轮都把 assistant + tool 结果追加进去）；
    2) tool_call.id 和工具结果（role="tool" 的 tool_call_id）怎么配对；
    3) finish_reason 为什么是 "tool_calls"（还要继续）或 "stop"（可以收尾）；
    4) max_iter 为什么是必须的 safety net（防止无限循环烧钱）。

模型用 DeepSeek 云服务（OpenAI 兼容），配置见根目录 .env。
"""

import json
import os

from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()

client = OpenAI(
    api_key=os.environ["DEEPSEEK_API_KEY"],
    base_url=os.environ["DEEPSEEK_BASE_URL"],
)
MODEL = os.environ["MODEL"]


# === 1. 工具定义（含实作）—— 我们的“手”，真正执行的代码 ===
def lookup_fact(query: str) -> str:
    """假的事实查询（教学用，避免依赖外部 API）。"""
    facts = {
        "台北人口": "2602000",
        "纽约人口": "8336000",
        "光速": "299792458",  # m/s
    }
    return facts.get(query.strip(), f"unknown: {query}")


def calculator(expression: str) -> str:
    """安全计算器：只允许数字和基本运算符，避免 eval 执行危险代码。"""
    allowed = set("0123456789.+-*/() ")
    if any(c not in allowed for c in expression):
        return f"error: 表达式含不允许字符（{expression}）"
    try:
        return str(eval(expression, {"__builtins__": {}}, {}))  # 已用白名单兜底
    except Exception as e:
        return f"error: {e}"


# OpenAI 兼容的 tools schema：包一层 {"type":"function","function":{...}}
# 注意 description 写清“什么时候用”——这是模型在多工具间选对的依据。
TOOLS_SPEC = [
    {
        "type": "function",
        "function": {
            "name": "lookup_fact",
            "description": "查询一个事实（人口 / 物理常数等）。需要外部事实时调用。",
            "parameters": {
                "type": "object",
                "properties": {"query": {"type": "string", "description": "查询关键字"}},
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "calculator",
            "description": "做基本算术运算（加减乘除）。需要计算时调用。",
            "parameters": {
                "type": "object",
                "properties": {"expression": {"type": "string", "description": "算术表达式"}},
                "required": ["expression"],
            },
        },
    },
]

# 工具名 -> 真实函数 的调度表，模型选了谁就执行谁
TOOL_IMPL = {
    "lookup_fact": lambda args: lookup_fact(args["query"]),
    "calculator": lambda args: calculator(args["expression"]),
}


# === 2. ReAct 主循环 —— agent 的心跳 ===
def react_loop(question: str, max_iter: int = 6) -> dict:
    """每一轮：问 LLM → 若它要调工具就执行并把结果接回 → 直到它给出最终答案。"""
    messages = [{"role": "user", "content": question}]
    trace = []  # 记录每步轨迹，方便观察循环怎么转

    # max_iter 是熔断护栏：万一模型卡在循环里，也不会无限调用下去（机制④）
    for step in range(max_iter):
        # ① 想 + 决定：让模型在看完目前 messages + 工具后，决定下一步
        resp = client.chat.completions.create(
            model=MODEL,
            messages=messages,
            tools=TOOLS_SPEC,
        )
        choice = resp.choices[0]
        msg = choice.message
        tool_calls = msg.tool_calls or []

        # 把这一轮 assistant 的发言追加回 messages —— 这就是数组越滚越长的原因（机制①）
        assistant_entry = {"role": "assistant", "content": msg.content or ""}
        if tool_calls:
            assistant_entry["tool_calls"] = [
                {
                    "id": tc.id,
                    "type": "function",
                    "function": {"name": tc.function.name, "arguments": tc.function.arguments},
                }
                for tc in tool_calls
            ]
        messages.append(assistant_entry)

        # ② 终止条件：finish_reason 为 "stop"（或没有 tool_calls）说明模型要收尾了（机制③）
        if choice.finish_reason == "stop" or not tool_calls:
            trace.append({"step": step, "thought": msg.content, "tool": None})
            return {"final": msg.content, "trace": trace, "steps": step + 1}

        # ③ 做 + 看：执行模型选中的每个工具，把结果以 role="tool" 接回
        for tc in tool_calls:
            args = json.loads(tc.function.arguments)
            fn = TOOL_IMPL.get(tc.function.name)
            obs = fn(args) if fn else f"error: unknown tool {tc.function.name}"
            print(f"[step {step}] {tc.function.name}({args}) → {obs}")

            # 关键：tool 结果必须带 tool_call_id，和上面那次调用的 id 配对（机制②）
            messages.append(
                {
                    "role": "tool",
                    "tool_call_id": tc.id,
                    "content": obs,
                }
            )
            trace.append(
                {"step": step, "thought": msg.content, "tool": tc.function.name, "obs": obs}
            )
        # 循环回到 ①，模型带着新 Observation 继续“想”——心跳就这样一下下转起来

    # 跑满 max_iter 仍没收尾：显式标记 truncated，绝不假装成功
    return {"final": None, "trace": trace, "steps": max_iter, "truncated": True}


# === 3. 自我验证 ===
if __name__ == "__main__":
    # 这个任务需要循环转好几圈：查台北人口 → 查纽约人口 → 相除 → 给答案
    question = "台北人口除以纽约人口是多少？保留 4 位小数。"
    print(f"❓ 问题：{question}（using DeepSeek {MODEL}）")
    print("-" * 60)

    result = react_loop(question, max_iter=6)

    print("-" * 60)
    print(f"✅ 最终答案：{result['final']}")
    print(f"   共 {result['steps']} 轮")

    # 宽松验证：循环应当正常收尾（给出答案），或显式 truncate，绝不静默失败
    assert result.get("final") is not None or result.get("truncated"), "loop 应收尾或显式 truncate"
    print("✅ 练习 3 通过 —— 你已用原生 tool_calls + 手动 while 循环跑通 ReAct")