第2072篇:Spring AI深度使用——ChatClient的设计哲学和高级模式
2026/4/30大约 7 分钟
第2072篇:Spring AI深度使用——ChatClient的设计哲学和高级模式
适读人群:正在使用Spring AI构建AI应用的Java工程师 | 阅读时长:约19分钟 | 核心价值:深入理解ChatClient的Fluent API设计,掌握Advisor链、函数调用、多模态等高级用法
Spring AI的ChatClient是我见过的Java AI框架里设计最优雅的API之一。
它的Fluent Builder模式让代码读起来几乎像自然语言——你可以清晰地看到一次AI交互的完整流程。但很多人用了一段时间后,发现有些场景不知道怎么处理。
这篇文章深入ChatClient的设计,讲那些进阶用法。
ChatClient的设计哲学
先看一个完整的使用示例,感受API的表达力:
/**
* ChatClient Fluent API展示
* 几乎每行代码都是自解释的
*/
@Service
@RequiredArgsConstructor
public class AdvancedChatService {
private final ChatClient.Builder chatClientBuilder;
private final VectorStore vectorStore;
private final OrderTools orderTools;
/**
* 构建一个功能完整的AI客服
* - RAG知识库
* - 工具调用
* - 对话记忆
* - 自定义Advisor链
*/
public ChatClient buildCustomerServiceClient() {
return chatClientBuilder
// 基础配置
.defaultSystem("""
你是{brand}品牌的客服助手{agentName}。
请用友好、专业的语气回答用户问题。
如果遇到无法回答的问题,请建议用户联系人工客服。
""")
// 默认工具(所有请求都可用)
.defaultTools(orderTools)
// Advisor链(按顺序执行)
.defaultAdvisors(
new MessageChatMemoryAdvisor(new InMemoryChatMemory()), // 对话记忆
new QuestionAnswerAdvisor(vectorStore), // RAG检索
new SimpleLoggerAdvisor() // 请求日志
)
.build();
}
/**
* 使用构建好的客户端进行对话
*/
public String chat(ChatClient client, String userId, String message) {
return client.prompt()
.system(s -> s.param("brand", "某某品牌")
.param("agentName", "小美"))
.user(message)
.advisors(a -> a.param(CHAT_MEMORY_CONVERSATION_ID_KEY, userId))
.call()
.content();
}
/**
* 请求级别的灵活配置
* 每次调用可以覆盖默认配置
*/
public String chatWithCustomConfig(
ChatClient client,
String message,
boolean useKnowledgeBase) {
var prompt = client.prompt()
.user(message);
// 动态决定是否使用RAG
if (useKnowledgeBase) {
prompt.advisors(new QuestionAnswerAdvisor(vectorStore));
}
return prompt.call().content();
}
}深度理解Advisor链
/**
* 实现自定义Advisor的最佳实践
* 理解执行顺序和数据传递
*/
@Component
@Slf4j
public class ProductionGradeAdvisor implements CallAroundAdvisor {
private final MetricsService metricsService;
private final ContentSafetyService safetyService;
// Advisor执行顺序(数值越小越先执行)
// 建议:安全检查 < 缓存 < 日志 < RAG < 记忆
private static final int ORDER = 5;
@Override
public int getOrder() {
return ORDER;
}
@Override
public String getName() {
return "ProductionGradeAdvisor";
}
/**
* 环绕增强:在请求前后都可以介入
*
* 流程:
* 1. before: 请求预处理(安全检查、缓存查询)
* 2. chain.nextAroundCall(adviseContext) → 执行后续Advisor和实际LLM调用
* 3. after: 响应后处理(质量检查、缓存写入、指标记录)
*/
@Override
public AdvisedResponse aroundCall(
AdvisedRequest advisedRequest,
CallAroundAdvisorChain chain) {
long startTime = System.currentTimeMillis();
String requestId = UUID.randomUUID().toString();
// ===== BEFORE =====
// 1. 安全检查
String userInput = advisedRequest.userText();
if (safetyService.isBlocked(userInput)) {
log.warn("请求被安全策略拦截: requestId={}", requestId);
return createBlockedResponse(advisedRequest, requestId);
}
// 2. 在请求上下文中传递信息给后续Advisor
AdvisedRequest enrichedRequest = advisedRequest.toBuilder()
.adviseContext(new HashMap<>(advisedRequest.adviseContext()) {{
put("requestId", requestId);
put("startTime", startTime);
}})
.build();
// ===== 调用后续链 =====
AdvisedResponse response;
try {
response = chain.nextAroundCall(enrichedRequest);
} catch (Exception e) {
metricsService.recordError("chat", e.getClass().getSimpleName());
throw e;
}
// ===== AFTER =====
long latency = System.currentTimeMillis() - startTime;
// 记录指标
metricsService.recordLatency("chat", latency);
metricsService.recordTokenUsage(
response.response().getMetadata().getUsage().getTotalTokens()
);
log.info("AI请求完成: requestId={}, latency={}ms, tokens={}",
requestId, latency,
response.response().getMetadata().getUsage().getTotalTokens());
return response;
}
private AdvisedResponse createBlockedResponse(AdvisedRequest request, String requestId) {
// 构造一个拒绝响应
ChatResponse blockedResponse = new ChatResponse(List.of(
new Generation(
new AssistantMessage("很抱歉,您的请求不符合服务规范,无法处理。"),
ChatGenerationMetadata.builder().build()
)
));
return new AdvisedResponse(blockedResponse, request.adviseContext());
}
}/**
* Advisor执行顺序演示
* 理解多个Advisor如何协同工作
*/
@Configuration
public class AdvisorOrderConfig {
@Bean
public ChatClient chatClient(
ChatClient.Builder builder,
VectorStore vectorStore,
ChatMemory chatMemory) {
return builder
.defaultAdvisors(
// 执行顺序(从先到后):
// 1. 安全检查(order=5):最先执行,拦截违规请求
new ProductionGradeAdvisor(metricsService, safetyService),
// 2. 缓存(order=10):命中缓存则直接返回,不执行后续
new ResponseCachingAdvisor(redisTemplate),
// 3. RAG检索(order=20):从知识库检索相关内容,注入到context
QuestionAnswerAdvisor.builder(vectorStore)
.withSearchRequest(SearchRequest.defaults().withTopK(3))
.build(),
// 4. 对话记忆(order=30):最后执行,负责维护对话历史
new MessageChatMemoryAdvisor(chatMemory)
// 注意:数字越小越先执行(before阶段)
// after阶段(响应处理)则是反向顺序
)
.build();
}
}函数调用的进阶用法
/**
* Spring AI函数调用的高级模式
*/
@Configuration
public class AdvancedFunctionConfig {
/**
* 动态函数注册
* 根据用户权限决定可用的函数
*/
@Bean("permissionAwareFunctions")
public FunctionCallback[] permissionAwareFunctions(
OrderFunctions orderFunctions,
AdminFunctions adminFunctions) {
// 所有用户可用
List<FunctionCallback> callbacks = new ArrayList<>();
callbacks.add(FunctionCallbackWrapper.builder(orderFunctions::queryOrder)
.withName("queryOrder")
.withDescription("查询订单信息")
.build());
return callbacks.toArray(new FunctionCallback[0]);
}
/**
* 函数调用结果的格式化
* 控制函数返回给AI的信息格式
*/
@Bean
@Description("查询库存,返回JSON格式的库存信息")
public Function<InventoryRequest, String> checkInventory(
InventoryService inventoryService) {
return request -> {
InventoryInfo info = inventoryService.getInventory(request.productId());
// 精心设计的返回格式,帮助AI理解
if (info == null) {
return String.format("{\"found\": false, \"productId\": \"%s\"}",
request.productId());
}
return String.format("""
{
"found": true,
"productId": "%s",
"productName": "%s",
"currentStock": %d,
"status": "%s",
"estimatedRestockDate": "%s"
}
""",
info.getProductId(),
info.getProductName(),
info.getQuantity(),
info.getQuantity() > 0 ? "有货" : "缺货",
info.getRestockDate() != null ? info.getRestockDate().toString() : "未知"
);
};
}
}
record InventoryRequest(String productId) {}/**
* 在ChatClient中动态控制函数可用性
* 根据请求上下文决定注册哪些函数
*/
@Service
@RequiredArgsConstructor
public class PermissionAwareChatService {
private final ChatClient baseChatClient;
private final UserPermissionService permissionService;
public String chat(String userId, String message) {
UserPermissions perms = permissionService.getPermissions(userId);
// 基于权限动态注入函数
var promptSpec = baseChatClient.prompt()
.user(message);
// 普通用户功能
promptSpec.functions("queryOrder", "checkInventory");
// 管理员额外功能
if (perms.isAdmin()) {
promptSpec.functions("modifyOrder", "adjustInventory", "generateReport");
}
return promptSpec.call().content();
}
}多模型并发调用
/**
* 并发调用多个模型,综合结果
* 适用于:需要高置信度的场景,多个模型投票
*/
@Service
@RequiredArgsConstructor
public class MultiModelConsensusService {
private final ChatClient gpt4oClient;
private final ChatClient claudeClient;
private final ChatClient geminiClient;
/**
* 三个模型投票,返回一致性最高的答案
*/
public ConsensusResult getConsensusAnswer(String question) {
// 并发调用三个模型
CompletableFuture<String> gpt4oFuture = CompletableFuture
.supplyAsync(() -> gpt4oClient.prompt().user(question).call().content());
CompletableFuture<String> claudeFuture = CompletableFuture
.supplyAsync(() -> claudeClient.prompt().user(question).call().content());
CompletableFuture<String> geminiFuture = CompletableFuture
.supplyAsync(() -> geminiClient.prompt().user(question).call().content());
String gpt4oAnswer = gpt4oFuture.join();
String claudeAnswer = claudeFuture.join();
String geminiAnswer = geminiFuture.join();
// 简单的一致性判断(实际应该用语义相似度)
boolean gptClaudeAgree = semanticallySimilar(gpt4oAnswer, claudeAnswer);
boolean gptGeminiAgree = semanticallySimilar(gpt4oAnswer, geminiAnswer);
boolean claudeGeminiAgree = semanticallySimilar(claudeAnswer, geminiAnswer);
int consensusCount = 0;
String primaryAnswer = gpt4oAnswer;
if (gptClaudeAgree && gptGeminiAgree) {
consensusCount = 3;
primaryAnswer = gpt4oAnswer;
} else if (gptClaudeAgree) {
consensusCount = 2;
primaryAnswer = gpt4oAnswer;
} else if (claudeGeminiAgree) {
consensusCount = 2;
primaryAnswer = claudeAnswer;
} else if (gptGeminiAgree) {
consensusCount = 2;
primaryAnswer = gpt4oAnswer;
}
return new ConsensusResult(
primaryAnswer,
consensusCount,
List.of(gpt4oAnswer, claudeAnswer, geminiAnswer)
);
}
private boolean semanticallySimilar(String a, String b) {
// 简化实现:字符重叠率
Set<String> wordsA = new HashSet<>(Arrays.asList(a.split("\\s+")));
Set<String> wordsB = new HashSet<>(Arrays.asList(b.split("\\s+")));
Set<String> intersection = new HashSet<>(wordsA);
intersection.retainAll(wordsB);
Set<String> union = new HashSet<>(wordsA);
union.addAll(wordsB);
return union.isEmpty() ? false : (double) intersection.size() / union.size() > 0.6;
}
public record ConsensusResult(
String answer,
int consensusCount, // 1=无共识,2=两模型同意,3=全部同意
List<String> allAnswers
) {}
}流式响应的正确处理
/**
* Spring AI流式响应的完整处理
* 包括错误处理和取消支持
*/
@RestController
@RequiredArgsConstructor
@RequestMapping("/api/stream")
public class StreamingChatController {
private final ChatClient chatClient;
/**
* Server-Sent Events流式输出
*/
@GetMapping(value = "/chat", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<ServerSentEvent<String>> streamChat(
@RequestParam String message,
@RequestParam String userId) {
return chatClient.prompt()
.user(message)
.advisors(a -> a.param(CHAT_MEMORY_CONVERSATION_ID_KEY, userId))
.stream()
.content()
.map(chunk -> ServerSentEvent.<String>builder()
.data(chunk)
.build())
// 流结束标记
.concatWith(Mono.just(ServerSentEvent.<String>builder()
.event("done")
.data("[DONE]")
.build()))
// 错误处理
.onErrorResume(e -> {
log.error("流式响应错误: userId={}", userId, e);
return Flux.just(ServerSentEvent.<String>builder()
.event("error")
.data("服务异常,请重试")
.build());
});
}
/**
* 流式响应 + 元数据(Token使用量等)
*/
@GetMapping(value = "/chat-with-metadata", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<ServerSentEvent<String>> streamChatWithMetadata(
@RequestParam String message) {
return chatClient.prompt()
.user(message)
.stream()
.chatResponse()
.flatMap(response -> {
// 提取内容和元数据
String content = response.getResult().getOutput().getContent();
if (response.getMetadata().getUsage() != null) {
// 最后一个chunk携带Token使用量
String metaEvent = String.format(
"{\"content\":\"%s\",\"totalTokens\":%d}",
content,
response.getMetadata().getUsage().getTotalTokens()
);
return Flux.just(
ServerSentEvent.<String>builder().data(content).build(),
ServerSentEvent.<String>builder().event("usage").data(metaEvent).build()
);
}
return Flux.just(ServerSentEvent.<String>builder().data(content).build());
});
}
}ChatClient的实例管理
/**
* ChatClient的实例管理最佳实践
*
* 设计原则:
* - ChatClient.Builder是线程安全的,可以共享
* - ChatClient实例每次build()都是新的,建议缓存
* - 不同场景(客服/分析/代码)用不同的ChatClient实例
*/
@Configuration
public class ChatClientInstanceConfig {
/**
* 客服专用Client:注入客服知识库和相关工具
*/
@Bean("customerServiceClient")
public ChatClient customerServiceClient(
ChatClient.Builder builder,
VectorStore customerServiceKB) {
return builder.clone()
.defaultSystem("你是一个专业的客服助手。")
.defaultAdvisors(
new QuestionAnswerAdvisor(customerServiceKB),
new MessageChatMemoryAdvisor(new InMemoryChatMemory())
)
.build();
}
/**
* 代码审查专用Client:更高的温度、更长的上下文
*/
@Bean("codeReviewClient")
public ChatClient codeReviewClient(
ChatClient.Builder builder) {
return builder.clone()
.defaultSystem("""
你是一个经验丰富的高级工程师,专注于代码审查。
重点关注:安全漏洞、性能问题、代码可读性。
回答要具体,指出具体的行号和改进建议。
""")
.defaultOptions(OpenAiChatOptions.builder()
.withTemperature(0.1f) // 代码审查要更确定性
.withMaxTokens(4096) // 允许更长的回答
.build())
.build();
}
/**
* 创意写作专用Client:更高的创意度
*/
@Bean("creativeWritingClient")
public ChatClient creativeWritingClient(
ChatClient.Builder builder) {
return builder.clone()
.defaultSystem("你是一个富有创意的内容创作者。")
.defaultOptions(OpenAiChatOptions.builder()
.withTemperature(0.9f) // 更高的随机性
.build())
.build();
}
}Spring AI的ChatClient设计的精髓是:用Fluent API描述AI交互流程,用Advisor链解决横切关注点,用函数调用连接外部系统。三者组合起来,几乎可以处理所有企业AI场景。
