第2339篇:Java AI应用中的设计模式实践——策略模式、装饰器模式在AI中的应用
2026/4/30大约 7 分钟
第2339篇:Java AI应用中的设计模式实践——策略模式、装饰器模式在AI中的应用
适读人群:希望提升AI应用代码质量和可扩展性的Java工程师 | 阅读时长:约18分钟 | 核心价值:掌握策略模式、装饰器模式等在AI工程中的具体落地方式,写出可维护可扩展的AI代码
设计模式这个词有时候让人觉得很"教科书",但在AI工程里,有几个经典模式确实特别实用,能解决真实的工程问题。
装饰器模式在Spring AI的Advisor体系里天然存在,但很多人只是照着文档用,没意识到可以自己灵活组合。策略模式在多模型切换、多种Prompt策略场景里几乎是必用的。
这篇文章从真实的AI工程问题出发,展示这些模式怎么解决实际问题。
策略模式:多模型选型和动态切换
场景:你的AI应用需要根据不同情况选择不同的LLM——低成本问题用DeepSeek,复杂分析用GPT-4o,代码相关用Claude,实时信息用带搜索的模型。
// 策略接口:定义LLM选择器
public interface ModelSelectionStrategy {
String getName();
boolean matches(ModelSelectionContext context);
ChatClient getChatClient();
int getPriority(); // 多个策略匹配时,优先级高的生效
}
// 请求上下文
public record ModelSelectionContext(
String question,
String userTier, // 用户等级:free / pro / enterprise
String requestType, // chat / code / analysis / translation
int estimatedInputTokens, // 预估输入token数
boolean needsRealtime // 是否需要实时信息
) {}
// 策略1:代码相关问题用Claude
@Component
public class CodeQuestionStrategy implements ModelSelectionStrategy {
private final ChatClient claudeClient;
private final Set<String> codeKeywords = Set.of(
"代码", "bug", "error", "exception", "java", "python", "函数", "方法", "类");
public CodeQuestionStrategy(@Qualifier("claudeChatClient") ChatClient claudeClient) {
this.claudeClient = claudeClient;
}
@Override
public String getName() { return "CodeQuestionStrategy"; }
@Override
public boolean matches(ModelSelectionContext context) {
if (!"code".equals(context.requestType())) return false;
String question = context.question().toLowerCase();
return codeKeywords.stream().anyMatch(question::contains);
}
@Override
public ChatClient getChatClient() { return claudeClient; }
@Override
public int getPriority() { return 80; }
}
// 策略2:企业用户使用高级模型
@Component
public class EnterpriseUserStrategy implements ModelSelectionStrategy {
private final ChatClient gpt4Client;
@Override
public boolean matches(ModelSelectionContext context) {
return "enterprise".equals(context.userTier()) &&
context.estimatedInputTokens() > 2000;
}
@Override
public ChatClient getChatClient() { return gpt4Client; }
@Override
public int getPriority() { return 90; }
@Override
public String getName() { return "EnterpriseUserStrategy"; }
}
// 策略3:默认策略(兜底)
@Component
public class DefaultModelStrategy implements ModelSelectionStrategy {
private final ChatClient defaultClient; // 便宜的模型
@Override
public boolean matches(ModelSelectionContext context) {
return true; // 总是匹配,作为兜底
}
@Override
public ChatClient getChatClient() { return defaultClient; }
@Override
public int getPriority() { return 0; } // 最低优先级
@Override
public String getName() { return "DefaultModelStrategy"; }
}
// 策略路由器:选择合适的策略
@Service
@RequiredArgsConstructor
@Slf4j
public class ModelSelector {
private final List<ModelSelectionStrategy> strategies;
public ChatClient selectModel(ModelSelectionContext context) {
ModelSelectionStrategy selected = strategies.stream()
.filter(s -> s.matches(context))
.max(Comparator.comparingInt(ModelSelectionStrategy::getPriority))
.orElseThrow(() -> new IllegalStateException("没有匹配的模型策略(不应该发生)"));
log.debug("选择模型策略:{},用户tier={},问题类型={}",
selected.getName(), context.userTier(), context.requestType());
return selected.getChatClient();
}
}装饰器模式:Advisor的组合艺术
Spring AI的Advisor就是装饰器模式的实现。理解这一点后,你可以自由组合:
// 装饰器1:内容安全检查(最外层,先拦截不安全内容)
@Component
public class SafetyGuardAdvisor implements CallAroundAdvisor {
private static final List<String> BLOCKED_PATTERNS = List.of(
"如何制作炸弹", "破解密码的方法", "非法");
@Override
public String getName() { return "SafetyGuardAdvisor"; }
@Override
public int getOrder() { return 100; } // 最高优先级,最先执行
@Override
public AdvisedResponse aroundCall(AdvisedRequest request, CallAroundAdvisorChain chain) {
String userText = request.userText();
boolean isBlocked = BLOCKED_PATTERNS.stream()
.anyMatch(pattern -> userText.contains(pattern));
if (isBlocked) {
// 直接构造一个拒绝响应,不往下传递
log.warn("内容安全检查拦截,内容预览:{}",
userText.substring(0, Math.min(50, userText.length())));
// 返回一个"被拦截"的响应
return buildRejectedResponse(request, "您的问题包含不允许的内容,请修改后重试");
}
return chain.nextAroundCall(request);
}
private AdvisedResponse buildRejectedResponse(AdvisedRequest request, String message) {
// 构建一个降级的AdvisedResponse
// 具体实现取决于Spring AI版本,这里简化示意
return AdvisedResponse.builder()
.response(buildBlockedChatResponse(message))
.adviseContext(request.adviseContext())
.build();
}
}
// 装饰器2:成本追踪(记录每次调用的Token消耗)
@Component
@Slf4j
public class CostTrackingAdvisor implements CallAroundAdvisor, StreamAroundAdvisor {
private final CostAccumulator costAccumulator;
@Override
public String getName() { return "CostTrackingAdvisor"; }
@Override
public int getOrder() { return 50; } // 中间层
@Override
public AdvisedResponse aroundCall(AdvisedRequest request, CallAroundAdvisorChain chain) {
String userId = (String) request.adviseContext().getOrDefault("userId", "unknown");
AdvisedResponse response = chain.nextAroundCall(request);
// 提取Token用量并记录成本
if (response.response() != null) {
Usage usage = response.response().getMetadata().getUsage();
if (usage != null) {
double cost = calculateCost(usage, extractModelName(request));
costAccumulator.addCost(userId, cost, usage.getTotalTokens());
log.debug("调用成本:userId={}, tokens={}, estimatedCost=¥{}",
userId, usage.getTotalTokens(), String.format("%.4f", cost));
}
}
return response;
}
private double calculateCost(Usage usage, String modelName) {
// 简化的成本计算(按人民币)
Map<String, double[]> pricing = Map.of(
"gpt-4o", new double[]{0.015, 0.06}, // input/output per 1K tokens (USD)
"gpt-3.5-turbo", new double[]{0.0005, 0.0015},
"deepseek-chat", new double[]{0.001, 0.002} // DeepSeek API pricing
);
double[] modelPricing = pricing.getOrDefault(modelName, new double[]{0.01, 0.03});
double usdCost = usage.getPromptTokens() / 1000.0 * modelPricing[0] +
usage.getGenerationTokens() / 1000.0 * modelPricing[1];
return usdCost * 7.1; // 约等于人民币
}
private String extractModelName(AdvisedRequest request) {
// 从请求上下文中提取模型名
return (String) request.adviseContext().getOrDefault("modelName", "unknown");
}
@Override
public Flux<AdvisedResponse> aroundStream(AdvisedRequest request, StreamAroundAdvisorChain chain) {
return chain.nextAroundStream(request);
// 流式模式下成本追踪略有不同,这里简化处理
}
}
// 装饰器3:缓存(完全相同的问题不重复调用LLM)
@Component
public class ResponseCacheAdvisor implements CallAroundAdvisor {
private final Cache<String, String> responseCache;
public ResponseCacheAdvisor() {
this.responseCache = Caffeine.newBuilder()
.maximumSize(1000)
.expireAfterWrite(Duration.ofHours(1))
.build();
}
@Override
public String getName() { return "ResponseCacheAdvisor"; }
@Override
public int getOrder() { return 60; }
@Override
public AdvisedResponse aroundCall(AdvisedRequest request, CallAroundAdvisorChain chain) {
// 只缓存特定类型的请求(比如FAQ类,不缓存对话类)
boolean isCacheable = Boolean.TRUE.equals(
request.adviseContext().get("cacheable"));
if (!isCacheable) {
return chain.nextAroundCall(request);
}
String cacheKey = DigestUtils.md5Hex(request.userText());
String cachedContent = responseCache.getIfPresent(cacheKey);
if (cachedContent != null) {
log.debug("命中缓存:key={}", cacheKey);
return buildCachedResponse(request, cachedContent);
}
AdvisedResponse response = chain.nextAroundCall(request);
// 缓存成功的响应
if (response.response() != null &&
response.response().getResult() != null) {
String content = response.response().getResult().getOutput().getContent();
if (content != null && !content.isBlank()) {
responseCache.put(cacheKey, content);
}
}
return response;
}
}在ChatClient中组合这些Advisor
@Configuration
public class AiClientConfig {
@Bean
public ChatClient fullFeatureChatClient(
ChatClient.Builder builder,
SafetyGuardAdvisor safetyAdvisor,
CostTrackingAdvisor costAdvisor,
ResponseCacheAdvisor cacheAdvisor) {
return builder
// Advisor执行顺序:SafetyGuard(100) > CostTracking(50) > Cache(60) > LLM
// 数值越大越先执行(最外层)
.defaultAdvisors(safetyAdvisor, costAdvisor, cacheAdvisor)
.defaultSystem("你是专业的AI助手")
.build();
}
// 不同场景用不同的Advisor组合
@Bean("minimalChatClient")
public ChatClient minimalChatClient(
ChatClient.Builder builder,
SafetyGuardAdvisor safetyAdvisor) {
// 只加安全检查,不加成本追踪(用于内部工具)
return builder
.defaultAdvisors(safetyAdvisor)
.build();
}
}模板方法模式:RAG流水线的变体处理
不同类型的文档,RAG的检索策略可能不同:技术文档用精确匹配,FAQ用语义检索,新闻用时间感知检索。
// 模板方法:定义RAG流程的骨架
public abstract class RagTemplate {
private final ChatClient chatClient;
protected RagTemplate(ChatClient chatClient) {
this.chatClient = chatClient;
}
// 模板方法:定义固定流程
public final String query(String question) {
// 1. 预处理问题(各子类可以override)
String processedQuestion = preprocessQuestion(question);
// 2. 检索相关内容(各子类必须实现)
List<Document> docs = retrieve(processedQuestion);
// 3. 过滤和排序(各子类可以override)
List<Document> filteredDocs = filterAndRank(docs, processedQuestion);
// 4. 构建上下文
String context = buildContext(filteredDocs);
// 5. 生成回答(使用模板方法定义的Prompt)
return generate(processedQuestion, context);
}
// 钩子方法:子类可以选择性override
protected String preprocessQuestion(String question) {
return question; // 默认不做预处理
}
// 抽象方法:子类必须实现
protected abstract List<Document> retrieve(String question);
protected List<Document> filterAndRank(List<Document> docs, String question) {
return docs; // 默认不过滤
}
protected String buildContext(List<Document> docs) {
return docs.stream()
.map(Document::getContent)
.collect(Collectors.joining("\n\n"));
}
protected String generate(String question, String context) {
return chatClient.prompt()
.system("基于以下资料回答问题,不要编造信息:\n" + context)
.user(question)
.call()
.content();
}
}
// 技术文档RAG:精确检索
@Service
public class TechDocRagTemplate extends RagTemplate {
private final VectorStore techDocStore;
@Override
protected List<Document> retrieve(String question) {
// 技术文档:更高的相似度阈值,更少的结果
return techDocStore.similaritySearch(
SearchRequest.query(question)
.withTopK(3)
.withSimilarityThreshold(0.8)); // 高阈值,确保精准
}
@Override
protected String preprocessQuestion(String question) {
// 技术问题:提取关键词,去掉口语化表达
return question.replaceAll("[啊吧哦呢]", "").trim();
}
}
// FAQ RAG:宽松检索
@Service
public class FaqRagTemplate extends RagTemplate {
private final VectorStore faqStore;
@Override
protected List<Document> retrieve(String question) {
// FAQ:更多候选,后面筛选
return faqStore.similaritySearch(
SearchRequest.query(question)
.withTopK(10)
.withSimilarityThreshold(0.5));
}
@Override
protected List<Document> filterAndRank(List<Document> docs, String question) {
// FAQ:优先返回标题匹配的问题
return docs.stream()
.sorted((a, b) -> {
String titleA = (String) a.getMetadata().getOrDefault("title", "");
String titleB = (String) b.getMetadata().getOrDefault("title", "");
// 标题包含关键词的排前面
boolean aMatch = titleA.contains(question.substring(0, Math.min(10, question.length())));
boolean bMatch = titleB.contains(question.substring(0, Math.min(10, question.length())));
return Boolean.compare(bMatch, aMatch);
})
.limit(5)
.toList();
}
}设计模式在AI工程里的价值,不在于"用了多高级的模式",而在于让代码可以在不修改核心逻辑的情况下扩展。当你下次需要加一个新的模型策略、新的Advisor,或者新的RAG变体时,不需要改已有代码,只需要加一个新实现。这才是设计模式的真正意义。
