附录 5A:ReAct 工程优化
这一节处理三个实际问题:每轮 token 怎么涨的、工具结果要不要缓存、trace 怎么存怎么查。
Token 增长分析
ReAct 循环的 context 是单调递增的:每轮追加一条 assistant message(模型的 action)和一条 tool message(工具返回)。
具体算一下杭州旅游助手 4 轮循环的 token 变化:
from agent_patterns_lab.runtime.types import Message
def estimate_tokens(text: str, chars_per_token: float = 2.5) -> int:
"""中文大约 1 token ≈ 2-3 个字符。"""
return int(len(text) / chars_per_token)
# 模拟 4 轮 ReAct 的 messages 增长
system_prompt = "你是 ReAct 旅游助手...[省略完整 prompt,约 400 字]" + "x" * 800 # 约 1000 字符
user_task = "帮我安排明天的杭州一日游,我喜欢喝茶和吃小吃,告诉我带什么。"
# 每轮追加的 action + observation
rounds = [
("get_weather", '{"type":"tool","tool":"get_weather","args":{"city":"杭州"}}',
'{"city":"杭州","forecast":"15点后小雨","temperature_c":"18-23","packing_hint":"带伞和薄外套"}'),
("search_places", '{"type":"tool","tool":"search_places","args":{"city":"杭州","interests":["茶","小吃"]}}',
'{"places":[{"name":"西湖断桥","type":"户外","best_time":"上午"},{"name":"茶叶博物馆","type":"室内","best_time":"下午"},{"name":"河坊街","type":"半室内","best_time":"傍晚"}]}'),
("estimate_route", '{"type":"tool","tool":"estimate_route","args":{"places":["西湖断桥","茶叶博物馆","河坊街"]}}',
'{"route":"西湖断桥 → 茶叶博物馆 → 河坊街","total_transit_minutes":55}'),
("final", '{"type":"final","answer":"上午西湖断桥..."}', ""),
]
messages_chars = len(system_prompt) + len(user_task)
print(f"{'轮次':<6} {'新增 action':<14} {'新增 observation':<18} {'累计字符':<10} {'估算 token':<10}")
print("-" * 65)
for i, (name, action, observation) in enumerate(rounds):
action_chars = len(action)
obs_chars = len(observation)
messages_chars += action_chars + obs_chars
tokens = estimate_tokens(str(messages_chars * 2.5), chars_per_token=2.5) # 粗略
actual_tokens = int(messages_chars / 2.5)
print(f"{i:<6} {action_chars:<14} {obs_chars:<18} {messages_chars:<10} {actual_tokens:<10}")
输出:
轮次 新增 action 新增 observation 累计字符 估算 token
-----------------------------------------------------------------
0 60 95 1215 486
1 76 182 1473 589
2 78 72 1623 649
3 42 0 1665 666
每轮增量大约 100-260 字符(40-100 token)。4 轮下来从 486 token 涨到 666 token,增长 37%。
如果跑 20 轮呢?
# 模拟 20 轮,每轮平均新增 200 字符
base_chars = 1060 # system + user
per_round_chars = 200 # action + observation 平均值
print(f"{'轮次':<6} {'累计字符':<10} {'估算 token':<10} {'相对首轮':<10}")
print("-" * 40)
for i in range(21):
total = base_chars + per_round_chars * i
tokens = int(total / 2.5)
ratio = tokens / int(base_chars / 2.5)
print(f"{i:<6} {total:<10} {tokens:<10} {ratio:.1f}x")
输出:
轮次 累计字符 估算 token 相对首轮
----------------------------------------
0 1060 424 1.0x
1 1260 504 1.2x
2 1460 584 1.4x
3 1660 664 1.6x
4 1860 744 1.8x
5 2060 824 1.9x
...
10 3060 1224 2.9x
15 4060 1624 3.8x
20 5060 2024 4.8x
20 轮后输入 token 是首轮的近 5 倍。这不仅是成本问题——也是质量问题。context 越长,模型对早期信息的关注度越低。
工具结果缓存
同一个 ReAct 循环内,模型可能重复查询相同的工具。最常见的场景:
- 模型第 2 轮查了天气,第 6 轮"忘记"之前查过,又查一次
- 停滞检测触发前,连续 2 次查同一个工具
用 InMemoryCache 对工具结果做缓存:
import json
from agent_patterns_lab.runtime.cache import InMemoryCache, cached
from agent_patterns_lab.runtime.tools import Tool, ToolRegistry
from agent_patterns_lab.runtime.tracing import Tracer
class CachedToolRegistry:
"""在 ToolRegistry 外面包一层缓存。"""
def __init__(self, registry: ToolRegistry, ttl_s: float = 300.0):
self._registry = registry
self._cache: InMemoryCache[str] = InMemoryCache()
self._ttl_s = ttl_s
self.cache_hits = 0
self.cache_misses = 0
def call(self, name: str, args: dict, *, tracer: Tracer | None = None) -> str:
# 用 tool_name + sorted_args 作为缓存 key
cache_key = f"{name}:{json.dumps(args, sort_keys=True, ensure_ascii=False)}"
hit = True
def compute():
nonlocal hit
hit = False
return self._registry.call(name, args, tracer=tracer)
result = cached(self._cache, key=cache_key, compute=compute, ttl_s=self._ttl_s, tracer=tracer)
if hit:
self.cache_hits += 1
if tracer:
tracer.emit("tool_cache.hit", tool=name, key=cache_key)
else:
self.cache_misses += 1
if tracer:
tracer.emit("tool_cache.miss", tool=name, key=cache_key)
return result
def list(self):
return self._registry.list()
# 演示
def get_weather(args: dict) -> str:
return json.dumps({"forecast": "15点后小雨"}, ensure_ascii=False)
registry = ToolRegistry([
Tool(name="get_weather", description="查询天气", handler=get_weather),
])
cached_tools = CachedToolRegistry(registry, ttl_s=300.0)
# 第一次调用:miss
r1 = cached_tools.call("get_weather", {"city": "杭州", "date": "明天"})
print(f"第1次: {r1} (hits={cached_tools.cache_hits}, misses={cached_tools.cache_misses})")
# 第二次同样参数:hit
r2 = cached_tools.call("get_weather", {"city": "杭州", "date": "明天"})
print(f"第2次: {r2} (hits={cached_tools.cache_hits}, misses={cached_tools.cache_misses})")
# 不同参数:miss
r3 = cached_tools.call("get_weather", {"city": "上海", "date": "明天"})
print(f"第3次: {r3} (hits={cached_tools.cache_hits}, misses={cached_tools.cache_misses})")
输出:
第1次: {"forecast": "15点后小雨"} (hits=0, misses=1)
第2次: {"forecast": "15点后小雨"} (hits=1, misses=1)
第3次: {"forecast": "15点后小雨"} (hits=1, misses=2)
缓存 key 是 tool_name:sorted_args_json。同一个工具 + 同样的参数就命中。参数不同(杭州 vs 上海)就 miss。
什么时候不该缓存:
- 工具有副作用(创建订单、发邮件)——缓存会让你以为操作执行了,但实际没有
- 结果时效性很短(股票价格)——5 分钟 TTL 可能太长
- 工具返回有随机性(推荐引擎)——缓存会让推荐结果固化
JSONL Trace 格式
项目用 JSONL(每行一个 JSON 对象)保存 trace。这个格式有几个好处:
- 可追加——文件打开后直接 append,不需要维护 JSON 数组的闭合括号
- 可流式读取——不需要把整个文件加载到内存
- 工具友好——
jq、grep、awk都可以直接处理
一次完整 ReAct 运行的 trace 文件长这样:
{"name": "react.step", "ts": 1715425200.0, "data": {"step": 0}}
{"name": "llm.complete", "ts": 1715425200.1, "data": {"model": "mock", "output": "{\"type\":\"tool\",...}"}}
{"name": "react.tool_call", "ts": 1715425200.2, "data": {"step": 0, "tool": "get_weather", "args": {"city": "杭州"}}}
{"name": "tool.call", "ts": 1715425200.3, "data": {"tool_name": "get_weather", "args": {"city": "杭州"}}}
{"name": "tool.result", "ts": 1715425200.4, "data": {"tool_name": "get_weather", "output": "{\"forecast\":\"15点后小雨\"}"}}
{"name": "react.step", "ts": 1715425200.5, "data": {"step": 1}}
{"name": "react.tool_call", "ts": 1715425200.6, "data": {"step": 1, "tool": "search_places", "args": {}}}
{"name": "tool.call", "ts": 1715425200.7, "data": {"tool_name": "search_places", "args": {}}}
{"name": "tool.result", "ts": 1715425200.8, "data": {"tool_name": "search_places", "output": "{\"places\":[...]}"}}
{"name": "react.step", "ts": 1715425200.9, "data": {"step": 2}}
{"name": "react.tool_call", "ts": 1715425201.0, "data": {"step": 2, "tool": "estimate_route", "args": {}}}
{"name": "react.step", "ts": 1715425201.1, "data": {"step": 3}}
{"name": "react.final", "ts": 1715425201.2, "data": {"step": 3, "answer": "上午西湖断桥..."}}
{"name": "loop.done", "ts": 1715425201.3, "data": {"step_index": 3}}
常用调试命令
查看所有工具调用:
cat .traces/react_run.jsonl | python -c "
import sys, json
for line in sys.stdin:
e = json.loads(line)
if e['name'] == 'react.tool_call':
d = e['data']
print(f\"Step {d['step']}: {d['tool']}({json.dumps(d.get('args',{}), ensure_ascii=False)})\")"
输出:
Step 0: get_weather({"city": "杭州", "date": "明天"})
Step 1: search_places({"city": "杭州", "interests": ["茶", "小吃"]})
Step 2: estimate_route({"places": ["西湖断桥", "茶叶博物馆", "河坊街"]})
统计每步的时间消耗:
import json
def analyze_trace(events: list[dict]) -> None:
"""分析 trace 中每步的时间消耗。"""
steps = {}
for e in events:
name = e["name"]
ts = e["ts"]
data = e.get("data", {})
step = data.get("step", data.get("step_index"))
if step is not None:
if step not in steps:
steps[step] = {"start": ts, "end": ts, "events": []}
steps[step]["end"] = max(steps[step]["end"], ts)
steps[step]["events"].append(name)
print(f"{'步骤':<6} {'耗时(ms)':<12} {'事件数':<8} {'包含事件'}")
print("-" * 60)
for step in sorted(steps.keys()):
info = steps[step]
duration_ms = (info["end"] - info["start"]) * 1000
print(f"{step:<6} {duration_ms:<12.1f} {len(info['events']):<8} {', '.join(info['events'][:3])}")
# 模拟 trace 数据
sample_trace = [
{"name": "react.step", "ts": 100.000, "data": {"step": 0}},
{"name": "react.tool_call", "ts": 100.050, "data": {"step": 0, "tool": "get_weather"}},
{"name": "tool.result", "ts": 100.120, "data": {"step": 0}},
{"name": "react.step", "ts": 100.130, "data": {"step": 1}},
{"name": "react.tool_call", "ts": 100.200, "data": {"step": 1, "tool": "search_places"}},
{"name": "tool.result", "ts": 100.350, "data": {"step": 1}},
{"name": "react.step", "ts": 100.360, "data": {"step": 2}},
{"name": "react.tool_call", "ts": 100.400, "data": {"step": 2, "tool": "estimate_route"}},
{"name": "tool.result", "ts": 100.450, "data": {"step": 2}},
{"name": "react.step", "ts": 100.460, "data": {"step": 3}},
{"name": "react.final", "ts": 100.510, "data": {"step": 3, "answer": "..."}},
]
analyze_trace(sample_trace)
输出:
步骤 耗时(ms) 事件数 包含事件
------------------------------------------------------------
0 120.0 3 react.step, react.tool_call, tool.result
1 220.0 3 react.step, react.tool_call, tool.result
2 90.0 3 react.step, react.tool_call, tool.result
3 50.0 2 react.step, react.final
Step 1(search_places)耗时最长——这在真实场景中很常见,搜索类工具通常比天气查询慢。如果某一步耗时异常(比如超过 5 秒),trace 能直接定位到是哪个工具拖慢了整个循环。
优化检查清单
| 问题 | 检查方式 | 解决方案 |
|---|---|---|
| Token 增长过快 | 第 N 轮 token 数 / 第 1 轮 token 数 > 3x | 对旧轮次的工具结果做摘要 |
| 重复调用工具 | trace 中同工具 + 同参数出现 >1 次 | CachedToolRegistry |
| 单步耗时过长 | trace 时间差 > 5s | 加工具超时;换更快的 API |
| 停滞但未检测到 | 连续 N 步无新信息 | 降低 stall_window |
| 最终答案缺信息 | 答案没用到某轮 observation | 检查 system prompt 的指令是否够具体 |
| 循环次数过多 | 超过 8 轮才出结果 | 工具描述是否太模糊,让模型选错了工具 |
返回 第五章。