第2338篇:Java AI的错误处理架构——优雅处理LLM调用异常的工程设计
2026/4/30大约 6 分钟
第2338篇:Java AI的错误处理架构——优雅处理LLM调用异常的工程设计
适读人群:开发Java AI服务的工程师,希望提升AI应用健壮性和用户体验的开发者 | 阅读时长:约17分钟 | 核心价值:建立分层的AI异常处理架构,实现智能降级和用户友好的错误反馈
LLM调用的异常比普通HTTP调用复杂得多。不是简单的网络错误,还有:
- 内容安全过滤(模型拒绝回答)
- Token限制超出
- 上下文窗口溢出
- 模型幻觉返回了格式错误的JSON
- 工具调用死循环
- API配额耗尽
每种错误对用户的影响和处理方式都不一样,不能统一用一个500错误了事。
异常分类体系
先把AI应用的异常分类清楚,才能对症下药:
// AI异常的顶层基类
public abstract class AiException extends RuntimeException {
private final AiErrorCode code;
private final boolean retryable; // 是否可以重试
private final boolean userVisible; // 错误信息是否对用户可见
protected AiException(AiErrorCode code, String message, boolean retryable, boolean userVisible) {
super(message);
this.code = code;
this.retryable = retryable;
this.userVisible = userVisible;
}
protected AiException(AiErrorCode code, String message, Throwable cause,
boolean retryable, boolean userVisible) {
super(message, cause);
this.code = code;
this.retryable = retryable;
this.userVisible = userVisible;
}
}
// 错误码枚举
public enum AiErrorCode {
// 可重试的错误
RATE_LIMITED("AI_001", "请求频率超限"),
SERVICE_TIMEOUT("AI_002", "AI服务响应超时"),
SERVICE_UNAVAILABLE("AI_003", "AI服务暂时不可用"),
// 不可重试的错误(参数问题)
CONTEXT_TOO_LONG("AI_004", "输入内容过长"),
CONTENT_FILTERED("AI_005", "内容被安全过滤"),
INVALID_REQUEST("AI_006", "无效的请求参数"),
// 系统错误(需要人工介入)
AUTHENTICATION_FAILED("AI_007", "AI服务认证失败"),
OUTPUT_PARSE_FAILED("AI_008", "AI输出解析失败"),
TOOL_EXECUTION_FAILED("AI_009", "工具调用执行失败"),
QUOTA_EXCEEDED("AI_010", "API配额已耗尽");
public final String code;
public final String defaultMessage;
AiErrorCode(String code, String defaultMessage) {
this.code = code;
this.defaultMessage = defaultMessage;
}
}
// 具体异常类
public class RateLimitedException extends AiException {
private final int retryAfterSeconds;
public RateLimitedException(int retryAfterSeconds) {
super(AiErrorCode.RATE_LIMITED,
"请求频率超限,请在" + retryAfterSeconds + "秒后重试",
true, true);
this.retryAfterSeconds = retryAfterSeconds;
}
public int getRetryAfterSeconds() { return retryAfterSeconds; }
}
public class ContentFilteredException extends AiException {
private final String filterReason;
public ContentFilteredException(String filterReason) {
super(AiErrorCode.CONTENT_FILTERED,
"内容不符合安全要求:" + filterReason,
false, true);
this.filterReason = filterReason;
}
}
public class ContextTooLongException extends AiException {
private final int currentTokens;
private final int maxTokens;
public ContextTooLongException(int currentTokens, int maxTokens) {
super(AiErrorCode.CONTEXT_TOO_LONG,
String.format("输入内容过长:%d tokens,最大允许%d tokens", currentTokens, maxTokens),
false, true);
this.currentTokens = currentTokens;
this.maxTokens = maxTokens;
}
}
public class OutputParseException extends AiException {
private final String rawOutput;
public OutputParseException(String rawOutput, Throwable cause) {
super(AiErrorCode.OUTPUT_PARSE_FAILED,
"AI输出解析失败",
cause, false, false); // 不对用户展示技术细节
this.rawOutput = rawOutput;
}
public String getRawOutput() { return rawOutput; }
}异常转换层:把底层异常转成业务异常
@Component
@Slf4j
public class LlmExceptionTranslator {
/**
* 把Spring AI/底层HTTP客户端的异常转换为业务异常
*/
public AiException translate(Exception e) {
String message = e.getMessage();
if (message == null) message = e.getClass().getSimpleName();
// 429 Too Many Requests
if (message.contains("429") || message.contains("rate limit") ||
message.contains("Rate limit")) {
int retryAfter = extractRetryAfter(message);
return new RateLimitedException(retryAfter);
}
// 401/403 认证失败
if (message.contains("401") || message.contains("403") ||
message.contains("Unauthorized") || message.contains("API key")) {
log.error("LLM API认证失败,请检查API Key配置");
return new AiException(AiErrorCode.AUTHENTICATION_FAILED,
"AI服务认证配置有误,请联系管理员", false, false) {};
}
// 内容过滤
if (message.contains("content_filter") || message.contains("safety") ||
message.contains("违规") || message.contains("inappropriate")) {
return new ContentFilteredException("内容包含不适当信息");
}
// Context window exceeded
if (message.contains("context_length_exceeded") ||
message.contains("maximum context length") ||
message.contains("too long")) {
return new ContextTooLongException(0, 0); // 无法精确获取token数时用0
}
// 超时
if (e instanceof java.util.concurrent.TimeoutException ||
message.contains("timeout") || message.contains("timed out")) {
return new AiException(AiErrorCode.SERVICE_TIMEOUT,
"AI服务响应超时,请重试", true, true) {};
}
// 503 Service Unavailable
if (message.contains("503") || message.contains("Service Unavailable") ||
message.contains("overloaded")) {
return new AiException(AiErrorCode.SERVICE_UNAVAILABLE,
"AI服务繁忙,请稍后重试", true, true) {};
}
// 未知错误,记录日志后包装
log.error("未知的LLM异常,原始错误:{}", message, e);
return new AiException(AiErrorCode.SERVICE_UNAVAILABLE,
"AI服务出现异常,请稍后重试", false, false) {};
}
private int extractRetryAfter(String message) {
// 尝试从错误消息中提取重试等待时间
try {
if (message.contains("retry after")) {
String[] parts = message.split("retry after");
if (parts.length > 1) {
String seconds = parts[1].trim().split("[^0-9]")[0];
return Integer.parseInt(seconds);
}
}
} catch (Exception ignored) {}
return 60; // 默认60秒
}
}重试策略:智能重试而不是盲目重试
@Service
@RequiredArgsConstructor
@Slf4j
public class RetryableLlmService {
private final ChatClient chatClient;
private final LlmExceptionTranslator exceptionTranslator;
// 重试配置
private static final int MAX_RETRIES = 3;
private static final long INITIAL_DELAY_MS = 1000;
private static final double BACKOFF_MULTIPLIER = 2.0;
public String chatWithRetry(String message) {
int attempt = 0;
long delayMs = INITIAL_DELAY_MS;
while (attempt < MAX_RETRIES) {
try {
return chatClient.prompt()
.user(message)
.call()
.content();
} catch (Exception e) {
AiException aiException = exceptionTranslator.translate(e);
attempt++;
// 不可重试的错误:直接抛出
if (!aiException.isRetryable()) {
log.warn("不可重试的AI错误:{},放弃重试", aiException.getMessage());
throw aiException;
}
// 达到最大重试次数
if (attempt >= MAX_RETRIES) {
log.error("重试{}次后仍然失败", MAX_RETRIES);
throw aiException;
}
// 限流错误:等待更长时间
if (aiException instanceof RateLimitedException rateLimited) {
delayMs = rateLimited.getRetryAfterSeconds() * 1000L;
}
log.warn("AI调用失败(第{}次),{}ms后重试:{}",
attempt, delayMs, aiException.getMessage());
try {
Thread.sleep(delayMs);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new RuntimeException("重试被中断", ie);
}
// 指数退避
delayMs = (long) (delayMs * BACKOFF_MULTIPLIER);
}
}
throw new RuntimeException("不应该到达这里");
}
}全局异常处理器:统一的用户响应
@RestControllerAdvice
@Slf4j
public class AiGlobalExceptionHandler {
@ExceptionHandler(AiException.class)
public ResponseEntity<ErrorResponse> handleAiException(AiException e, HttpServletRequest request) {
// 记录日志(内部错误记ERROR,用户操作错误记WARN)
if (e.isUserVisible()) {
log.warn("AI业务异常:code={}, message={}", e.getCode(), e.getMessage());
} else {
log.error("AI系统异常:code={}", e.getCode(), e);
}
// 构建用户响应
String userMessage;
int httpStatus;
switch (e.getCode()) {
case RATE_LIMITED -> {
userMessage = e.getMessage();
httpStatus = 429;
}
case CONTENT_FILTERED -> {
userMessage = "您的输入包含不适当内容,请修改后重试";
httpStatus = 400;
}
case CONTEXT_TOO_LONG -> {
userMessage = "输入内容过长,请缩短后重试(建议控制在2000字以内)";
httpStatus = 400;
}
case SERVICE_TIMEOUT, SERVICE_UNAVAILABLE -> {
userMessage = "AI服务繁忙,请稍后重试";
httpStatus = 503;
}
case AUTHENTICATION_FAILED -> {
// 认证失败不对用户暴露,显示通用错误
userMessage = "系统异常,请联系客服";
httpStatus = 500;
}
default -> {
userMessage = e.isUserVisible() ? e.getMessage() : "AI服务出现异常,请稍后重试";
httpStatus = 500;
}
}
ErrorResponse errorResponse = new ErrorResponse(
e.getCode().code,
userMessage,
request.getRequestURI(),
System.currentTimeMillis()
);
// 限流时在响应头加上重试时间
if (e instanceof RateLimitedException rateLimited) {
return ResponseEntity.status(httpStatus)
.header("Retry-After", String.valueOf(rateLimited.getRetryAfterSeconds()))
.body(errorResponse);
}
return ResponseEntity.status(httpStatus).body(errorResponse);
}
@ExceptionHandler(OutputParseException.class)
public ResponseEntity<ErrorResponse> handleOutputParseException(
OutputParseException e, HttpServletRequest request) {
log.error("AI输出解析失败,原始输出:{}",
e.getRawOutput().substring(0, Math.min(200, e.getRawOutput().length())), e);
// 输出解析失败时,返回原始文本作为降级响应
return ResponseEntity.ok(new ErrorResponse(
"AI_DEGRADED",
"AI返回了非结构化内容,已作为文本处理",
request.getRequestURI(),
System.currentTimeMillis()
));
}
public record ErrorResponse(String code, String message, String path, long timestamp) {}
}降级策略:出错时仍然给用户有价值的回应
@Service
@RequiredArgsConstructor
public class AiServiceWithFallback {
private final ChatClient primaryClient; // 主要的LLM(如GPT-4o)
private final ChatClient fallbackClient; // 降级的LLM(如GPT-3.5-turbo,便宜且稳定)
private final LlmExceptionTranslator translator;
public String chatWithFallback(String message) {
// 先用主模型
try {
return primaryClient.prompt().user(message).call().content();
} catch (Exception e) {
AiException aiException = translator.translate(e);
log.warn("主模型调用失败,尝试降级:{}", aiException.getMessage());
// 主模型超时或不可用时,降级到备用模型
if (aiException.getCode() == AiErrorCode.SERVICE_TIMEOUT ||
aiException.getCode() == AiErrorCode.SERVICE_UNAVAILABLE) {
try {
String fallbackResponse = fallbackClient.prompt()
.user(message)
.call()
.content();
// 告知用户当前是降级模式
return "(当前使用备用模型,回答质量可能略有下降)\n\n" + fallbackResponse;
} catch (Exception fallbackException) {
log.error("主备模型均不可用", fallbackException);
throw aiException; // 抛出原始异常
}
}
throw aiException;
}
}
}错误处理不是锦上添花,而是AI应用健壮性的基石。用户遇到错误时的体验,往往比功能本身更影响他们对产品的评价。
