Spring AI Advisor进阶:自定义拦截器的完整开发指南
2026/4/30大约 8 分钟
Spring AI Advisor进阶:自定义拦截器的完整开发指南
适读人群:已掌握Spring AI基础、想深度定制AI调用链路行为的Java工程师 阅读时长:约18分钟 文章价值:完整掌握Spring AI Advisor机制的内部原理,开发各类生产级自定义Advisor
从"能用"到"用好"的距离
有个在星球里的同学阿强,用Spring AI做了一个智能文档处理系统。功能都跑通了,但他说有几个需求不知道怎么加:
- 所有AI调用要记录到审计库,不能修改每个业务服务的代码
- 某些高敏感用户的请求,要在进LLM之前过一遍内容安全检测
- 对话记录要根据用户会话ID自动关联,不需要业务层感知
- 生产环境想加一个"AI调用速率限制",防止某个用户刷接口
他想把这些逻辑加到一个地方统一管理,而不是散在各个Service里。
我说:这正是Advisor的场景。
Advisor是Spring AI 1.0的核心扩展机制,相当于Spring MVC的HandlerInterceptor,但作用在AI调用链路上。理解了它,你就掌握了Spring AI最重要的扩展点。
Advisor机制原理
核心接口:
// 同步调用拦截
public interface CallAroundAdvisor extends Ordered {
String getName();
AdvisedResponse aroundCall(AdvisedRequest advisedRequest,
CallAroundAdvisorChain chain);
}
// 流式调用拦截
public interface StreamAroundAdvisor extends Ordered {
String getName();
Flux<AdvisedResponse> aroundStream(AdvisedRequest advisedRequest,
StreamAroundAdvisorChain chain);
}Order规则(记住这个,很多人搞反):
- Order数字越小,执行越靠外(最先执行前置,最后执行后置)
- Order数字越大,执行越靠里(最后执行前置,最先执行后置)
开发一:完整的审计Advisor
@Component
@Slf4j
public class FullAuditAdvisor implements CallAroundAdvisor, StreamAroundAdvisor {
private final AuditService auditService;
@Override
public String getName() { return "FullAuditAdvisor"; }
@Override
public int getOrder() { return 100; } // 靠外,覆盖所有调用
/**
* 同步调用拦截
*/
@Override
public AdvisedResponse aroundCall(AdvisedRequest request,
CallAroundAdvisorChain chain) {
String auditId = UUID.randomUUID().toString();
long startTime = System.currentTimeMillis();
// 提取审计信息
AuditRecord record = buildAuditRecord(auditId, request);
AdvisedResponse response = null;
Exception error = null;
try {
response = chain.nextAroundCall(request);
return response;
} catch (Exception e) {
error = e;
throw e;
} finally {
// 无论成功失败都写审计
finalizeAuditRecord(record, response, error,
System.currentTimeMillis() - startTime);
auditService.saveAsync(record);
}
}
/**
* 流式调用拦截(关键:不能阻塞Flux)
*/
@Override
public Flux<AdvisedResponse> aroundStream(AdvisedRequest request,
StreamAroundAdvisorChain chain) {
String auditId = UUID.randomUUID().toString();
long startTime = System.currentTimeMillis();
AtomicBoolean firstToken = new AtomicBoolean(true);
StringBuilder contentBuilder = new StringBuilder();
return chain.nextAroundStream(request)
.doOnNext(response -> {
// 收集流式输出
if (response.response() != null) {
String token = response.response().getResult()
.getOutput().getContent();
if (token != null) contentBuilder.append(token);
}
// 记录首token时间
if (firstToken.compareAndSet(true, false)) {
long ttft = System.currentTimeMillis() - startTime;
log.debug("TTFT: {}ms, auditId={}", ttft, auditId);
}
})
.doOnComplete(() -> {
// 流结束时写审计
AuditRecord record = buildAuditRecord(auditId, request);
record.setResponseContent(contentBuilder.toString());
record.setLatencyMs(System.currentTimeMillis() - startTime);
record.setSuccess(true);
auditService.saveAsync(record);
})
.doOnError(e -> {
AuditRecord record = buildAuditRecord(auditId, request);
record.setSuccess(false);
record.setErrorMessage(e.getMessage());
record.setLatencyMs(System.currentTimeMillis() - startTime);
auditService.saveAsync(record);
});
}
private AuditRecord buildAuditRecord(String auditId, AdvisedRequest request) {
return AuditRecord.builder()
.auditId(auditId)
.userId(extractUserId())
.traceId(MDC.get("traceId"))
.systemPrompt(request.systemText())
.userInput(request.userText())
.requestTime(LocalDateTime.now())
.build();
}
private void finalizeAuditRecord(AuditRecord record,
AdvisedResponse response,
Exception error,
long latencyMs) {
record.setLatencyMs(latencyMs);
if (response != null && response.response() != null) {
record.setSuccess(true);
record.setResponseContent(
response.response().getResult().getOutput().getContent());
var usage = response.response().getMetadata().getUsage();
if (usage != null) {
record.setPromptTokens((int) usage.getPromptTokens());
record.setCompletionTokens((int) usage.getGenerationTokens());
}
} else if (error != null) {
record.setSuccess(false);
record.setErrorMessage(error.getMessage());
}
}
private String extractUserId() {
try {
return SecurityContextHolder.getContext()
.getAuthentication().getName();
} catch (Exception e) {
return "anonymous";
}
}
}开发二:内容安全过滤Advisor
@Component
@Slf4j
public class ContentSafetyAdvisor implements CallAroundAdvisor {
private final ContentSafetyService contentSafetyService;
@Override
public String getName() { return "ContentSafetyAdvisor"; }
@Override
public int getOrder() { return 200; } // 审计之后,内存之前
@Override
public AdvisedResponse aroundCall(AdvisedRequest request,
CallAroundAdvisorChain chain) {
// 输入检测
String userInput = request.userText();
ContentCheckResult inputCheck = contentSafetyService.check(userInput);
if (inputCheck.isViolation()) {
log.warn("输入内容违规,拦截请求: category={}, content={}",
inputCheck.getCategory(),
userInput.substring(0, Math.min(50, userInput.length())));
// 返回安全拒绝响应,不继续调用LLM
return buildRejectionResponse(request, inputCheck);
}
// 输入安全,继续调用
AdvisedResponse response = chain.nextAroundCall(request);
// 输出检测(防止LLM被绕过生成违规内容)
if (response.response() != null) {
String output = response.response().getResult().getOutput().getContent();
ContentCheckResult outputCheck = contentSafetyService.check(output);
if (outputCheck.isViolation()) {
log.warn("输出内容违规,替换响应: category={}", outputCheck.getCategory());
return buildRejectionResponse(request, outputCheck);
}
}
return response;
}
private AdvisedResponse buildRejectionResponse(AdvisedRequest request,
ContentCheckResult checkResult) {
// 构造一个安全的拒绝响应
String safeReply = switch (checkResult.getCategory()) {
case "HATE_SPEECH" -> "您的输入包含不当内容,请调整后重试。";
case "VIOLENCE" -> "请避免涉及暴力相关的讨论。";
case "PRIVACY" -> "请勿在对话中包含他人的个人隐私信息。";
default -> "该内容不符合使用规范,请调整后重试。";
};
// 创建安全响应(简化写法,实际需要正确构造ChatResponse)
var generation = new Generation(new AssistantMessage(safeReply));
var chatResponse = new ChatResponse(List.of(generation));
return AdvisedResponse.builder()
.response(chatResponse)
.adviseContext(request.adviseContext())
.build();
}
}开发三:速率限制Advisor
@Component
@Slf4j
public class RateLimitAdvisor implements CallAroundAdvisor {
private final LoadingCache<String, RateLimiter> userRateLimiters;
@Override
public String getName() { return "RateLimitAdvisor"; }
@Override
public int getOrder() { return 50; } // 最外层,最先限流
public RateLimitAdvisor() {
// 每个用户独立的速率限制器,最多缓存1000个用户
this.userRateLimiters = Caffeine.newBuilder()
.maximumSize(1000)
.expireAfterAccess(Duration.ofHours(1))
.build(userId -> RateLimiter.create(2.0)); // 每用户2 QPS
}
@Override
public AdvisedResponse aroundCall(AdvisedRequest request,
CallAroundAdvisorChain chain) {
String userId = getCurrentUserId();
RateLimiter rateLimiter = userRateLimiters.get(userId);
// 尝试获取令牌(最多等待500ms)
boolean acquired = rateLimiter.tryAcquire(500, TimeUnit.MILLISECONDS);
if (!acquired) {
log.warn("用户触发速率限制: userId={}", userId);
// 返回429风格的响应
var generation = new Generation(new AssistantMessage(
"您的请求频率过高,请稍后再试(每秒最多2次请求)。"
));
var chatResponse = new ChatResponse(List.of(generation));
return AdvisedResponse.builder()
.response(chatResponse)
.adviseContext(request.adviseContext())
.build();
}
return chain.nextAroundCall(request);
}
private String getCurrentUserId() {
try {
return SecurityContextHolder.getContext()
.getAuthentication().getName();
} catch (Exception e) {
return "anonymous";
}
}
}开发四:Prompt注入防护Advisor
@Component
@Slf4j
public class PromptInjectionGuardAdvisor implements CallAroundAdvisor {
// 典型的Prompt注入攻击模式
private static final List<Pattern> INJECTION_PATTERNS = List.of(
Pattern.compile("(?i)ignore (previous|above|all) instructions?"),
Pattern.compile("(?i)forget (everything|all) (you were|above)"),
Pattern.compile("(?i)you are now (a |an )?(?!assistant)"),
Pattern.compile("(?i)(system|sys)[::]"),
Pattern.compile("\\[INST\\]"),
Pattern.compile("<\\|im_start\\|>"),
Pattern.compile("(?i)print (your|the) (system|initial) prompt")
);
@Override
public String getName() { return "PromptInjectionGuardAdvisor"; }
@Override
public int getOrder() { return 150; }
@Override
public AdvisedResponse aroundCall(AdvisedRequest request,
CallAroundAdvisorChain chain) {
String userInput = request.userText();
for (Pattern pattern : INJECTION_PATTERNS) {
if (pattern.matcher(userInput).find()) {
log.warn("检测到Prompt注入攻击: pattern={}, input={}",
pattern.pattern(),
userInput.substring(0, Math.min(100, userInput.length())));
// 可以选择拒绝,或者清洗后继续
// 这里选择拒绝
var generation = new Generation(new AssistantMessage(
"检测到异常输入模式,请使用正常方式提问。"
));
var chatResponse = new ChatResponse(List.of(generation));
return AdvisedResponse.builder()
.response(chatResponse)
.adviseContext(request.adviseContext())
.build();
}
}
return chain.nextAroundCall(request);
}
}开发五:上下文传递Advisor
有时候业务层需要向Advisor传递参数,通过adviseContext来实现:
@Component
@Slf4j
public class BusinessContextAdvisor implements CallAroundAdvisor {
// 自定义参数Key常量
public static final String BIZ_MODULE_KEY = "bizModule";
public static final String PRIORITY_KEY = "priority";
@Override
public String getName() { return "BusinessContextAdvisor"; }
@Override
public int getOrder() { return 300; }
@Override
public AdvisedResponse aroundCall(AdvisedRequest request,
CallAroundAdvisorChain chain) {
// 从adviseContext读取业务参数
String bizModule = (String) request.adviseContext()
.getOrDefault(BIZ_MODULE_KEY, "default");
String priority = (String) request.adviseContext()
.getOrDefault(PRIORITY_KEY, "normal");
log.info("AI调用上下文: bizModule={}, priority={}", bizModule, priority);
// 根据优先级选择不同的处理策略
if ("high".equals(priority)) {
// 高优先级:可以走独立的模型配置
MDC.put("aiPriority", "high");
}
try {
return chain.nextAroundCall(request);
} finally {
MDC.remove("aiPriority");
}
}
}
// 业务层使用方式
@Service
public class PriorityAwareService {
private final ChatClient chatClient;
public String urgentQuery(String message) {
return chatClient.prompt()
.user(message)
.advisors(spec -> spec
.param(BusinessContextAdvisor.BIZ_MODULE_KEY, "urgent_support")
.param(BusinessContextAdvisor.PRIORITY_KEY, "high")
)
.call()
.content();
}
}完整Advisor链配置
把所有自定义Advisor组合起来:
@Configuration
@Slf4j
public class ProductionAdvisorChainConfig {
@Bean
public ChatClient productionChatClient(
ChatClient.Builder builder,
RateLimitAdvisor rateLimitAdvisor,
FullAuditAdvisor auditAdvisor,
PromptInjectionGuardAdvisor injectionGuardAdvisor,
ContentSafetyAdvisor contentSafetyAdvisor,
BusinessContextAdvisor businessContextAdvisor) {
return builder
.defaultAdvisors(
// Order 50: 速率限制(最外层,快速拒绝)
rateLimitAdvisor,
// Order 100: 审计(覆盖所有请求)
auditAdvisor,
// Order 150: Prompt注入防护
injectionGuardAdvisor,
// Order 200: 内容安全
contentSafetyAdvisor,
// Order 300: 业务上下文
businessContextAdvisor,
// 内存管理(接近LLM)
new MessageChatMemoryAdvisor(new InMemoryChatMemory()),
// 调试日志(最内层)
new SimpleLoggerAdvisor()
)
.build();
}
}执行顺序(前置):
Advisor开发的注意事项
| 注意点 | 说明 |
|---|---|
| 流式与同步分开实现 | 流式要实现StreamAroundAdvisor,不能只实现CallAroundAdvisor |
| 不要在finally里抛异常 | 审计写入失败不能影响业务,catch住记录日志 |
| adviseContext线程安全 | adviseContext是Map,多线程场景注意并发 |
| 不要修改原始request | request是不可变的,需要修改时创建新的request |
| Order不能冲突 | 同一个Order会按注册顺序执行,但建议避免相同Order |
小结
Advisor机制是Spring AI中最值得深入掌握的扩展点。它的设计思路和Spring AOP、Servlet Filter一脉相承——把横切关注点从业务逻辑里剥离出来。
掌握了Advisor,你就能:
- 不修改任何业务代码,给所有AI调用加上审计、限流、安全检查
- 通过adviseContext在层与层之间传递业务参数
- 同时支持同步和流式两种模式
阿强照这个思路,把原来散在7个Service里的审计代码,合并成了一个AuditAdvisor,代码量从500行降到了120行,而且新增AI调用时自动覆盖,不需要记得去加日志了。
这就是好的框架扩展机制应该有的效果。
