第2092篇:AI代码审查——自动化Code Review的工程实践
2026/4/30大约 9 分钟
第2092篇:AI代码审查——自动化Code Review的工程实践
适读人群:想用AI提升代码质量的工程师和团队 | 阅读时长:约19分钟 | 核心价值:构建一套实用的AI Code Review系统,包括问题分类、严重度判断、规范检查和PR集成
代码审查是最费时又最容易形式化的工程实践之一。审查人压力大、时间紧的时候,往往只能扫一眼,真正的问题很容易漏掉。
AI代码审查不是要取代人工审查,而是在人工之前先过一遍:把明显的问题、规范违反、安全隐患先筛出来,让人工审查员把精力放在更有价值的架构决策和逻辑审查上。
代码审查的关注维度
/**
* Code Review关注的维度
* 不同维度有不同的检查方式和严重度
*/
public enum ReviewDimension {
SECURITY("安全漏洞", ReviewSeverity.CRITICAL,
"SQL注入、XSS、硬编码密码、不安全的随机数等"),
CORRECTNESS("逻辑正确性", ReviewSeverity.HIGH,
"空指针风险、数组越界、整数溢出、条件错误等"),
PERFORMANCE("性能问题", ReviewSeverity.MEDIUM,
"N+1查询、不必要的循环嵌套、大对象创建等"),
CODE_STYLE("代码规范", ReviewSeverity.LOW,
"命名规范、注释缺失、代码重复等"),
MAINTAINABILITY("可维护性", ReviewSeverity.LOW,
"函数过长、复杂度过高、魔法数字等"),
TEST_COVERAGE("测试覆盖", ReviewSeverity.MEDIUM,
"缺少边界条件测试、异常路径未测试等");
private final String displayName;
private final ReviewSeverity defaultSeverity;
private final String description;
ReviewDimension(String displayName, ReviewSeverity severity, String description) {
this.displayName = displayName;
this.defaultSeverity = severity;
this.description = description;
}
public enum ReviewSeverity { CRITICAL, HIGH, MEDIUM, LOW, INFO }
}核心代码审查服务
/**
* AI代码审查服务
* 对提交的代码进行多维度分析
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class AiCodeReviewService {
private final ChatLanguageModel llm;
private final ObjectMapper objectMapper;
// 每次审查的最大行数(避免超出token限制)
private static final int MAX_LINES_PER_REVIEW = 300;
/**
* 审查单个文件的代码变更
*/
public FileReviewResult reviewFileDiff(FileDiff diff, ReviewConfig config) {
log.info("开始审查文件: {}, 变更行数: {}", diff.getFilePath(), diff.getChangedLines());
// 太大的diff分批审查
if (diff.getChangedLines() > MAX_LINES_PER_REVIEW) {
return reviewLargeFileDiff(diff, config);
}
return performReview(diff.getFilePath(), diff.getDiffContent(), config);
}
private FileReviewResult performReview(
String filePath, String diffContent, ReviewConfig config) {
String language = detectLanguage(filePath);
String prompt = buildReviewPrompt(filePath, diffContent, language, config);
try {
String response = llm.generate(
SystemMessage.from(getSystemPrompt()),
UserMessage.from(prompt)
).content().text();
return parseReviewResponse(filePath, response);
} catch (Exception e) {
log.error("代码审查失败: file={}, error={}", filePath, e.getMessage());
return FileReviewResult.error(filePath, e.getMessage());
}
}
private String buildReviewPrompt(String filePath, String diffContent,
String language, ReviewConfig config) {
StringBuilder sb = new StringBuilder();
sb.append("请审查以下代码变更,发现潜在问题。\n\n");
sb.append("文件:").append(filePath).append("\n");
sb.append("语言:").append(language).append("\n");
// 指定需要关注的维度
if (config.getDimensions() != null && !config.getDimensions().isEmpty()) {
sb.append("重点关注:").append(
config.getDimensions().stream()
.map(d -> d.displayName())
.collect(Collectors.joining("、"))
).append("\n");
}
sb.append("\n代码变更(+ 表示新增,- 表示删除):\n```\n");
sb.append(diffContent);
sb.append("\n```\n\n");
sb.append("""
请输出JSON格式的审查结果:
{
"issues": [
{
"severity": "CRITICAL|HIGH|MEDIUM|LOW|INFO",
"dimension": "SECURITY|CORRECTNESS|PERFORMANCE|CODE_STYLE|MAINTAINABILITY|TEST_COVERAGE",
"lineNumber": 行号(可为null),
"title": "问题标题(一句话)",
"description": "详细说明问题是什么,为什么有问题",
"suggestion": "具体的修改建议(如有可能,提供修改后的代码片段)",
"codeSnippet": "有问题的代码片段"
}
],
"overallQuality": "EXCELLENT|GOOD|FAIR|POOR",
"summary": "总体评价(1-2句话)",
"approvalRecommendation": "APPROVE|REQUEST_CHANGES|NEEDS_DISCUSSION"
}
如果代码没有问题,issues数组为空即可。只输出JSON:
""");
return sb.toString();
}
private String getSystemPrompt() {
return """
你是一个经验丰富的Senior Software Engineer,专注于代码质量和安全。
在审查代码时,你会:
1. 关注安全漏洞(最高优先级)
2. 找出可能的运行时错误
3. 识别性能陷阱
4. 指出不符合最佳实践的代码
你的建议具体、可操作,不是泛泛的"这里需要优化"。
你不会挑剔代码风格的小问题(除非严重影响可读性)。
""";
}
private FileReviewResult parseReviewResponse(String filePath, String response) {
try {
String json = extractJson(response);
Map<String, Object> parsed = objectMapper.readValue(json,
new TypeReference<>() {});
List<Map<String, Object>> issuesRaw =
(List<Map<String, Object>>) parsed.getOrDefault("issues", List.of());
List<ReviewIssue> issues = issuesRaw.stream()
.map(this::parseIssue)
.filter(Objects::nonNull)
.toList();
return FileReviewResult.builder()
.filePath(filePath)
.issues(issues)
.overallQuality((String) parsed.getOrDefault("overallQuality", "FAIR"))
.summary((String) parsed.getOrDefault("summary", ""))
.approvalRecommendation(
(String) parsed.getOrDefault("approvalRecommendation", "NEEDS_DISCUSSION"))
.build();
} catch (Exception e) {
log.error("审查结果解析失败: {}", e.getMessage());
return FileReviewResult.error(filePath, "解析失败");
}
}
private ReviewIssue parseIssue(Map<String, Object> raw) {
try {
return ReviewIssue.builder()
.severity(ReviewDimension.ReviewSeverity.valueOf(
(String) raw.getOrDefault("severity", "LOW")))
.dimension(ReviewDimension.valueOf(
(String) raw.getOrDefault("dimension", "CODE_STYLE")))
.lineNumber((Integer) raw.get("lineNumber"))
.title((String) raw.getOrDefault("title", ""))
.description((String) raw.getOrDefault("description", ""))
.suggestion((String) raw.getOrDefault("suggestion", ""))
.codeSnippet((String) raw.get("codeSnippet"))
.build();
} catch (Exception e) {
log.warn("Issue解析失败: {}", e.getMessage());
return null;
}
}
/**
* 大文件分批审查,然后合并结果
*/
private FileReviewResult reviewLargeFileDiff(FileDiff diff, ReviewConfig config) {
log.info("大文件分批审查: {}, 总行数: {}", diff.getFilePath(), diff.getChangedLines());
List<String> chunks = splitDiff(diff.getDiffContent(), MAX_LINES_PER_REVIEW);
List<FileReviewResult> chunkResults = new ArrayList<>();
for (int i = 0; i < chunks.size(); i++) {
log.debug("审查chunk {}/{}", i + 1, chunks.size());
FileReviewResult chunkResult = performReview(
diff.getFilePath() + " (Part " + (i + 1) + "/" + chunks.size() + ")",
chunks.get(i),
config
);
chunkResults.add(chunkResult);
}
// 合并所有chunk的结果
return mergeResults(diff.getFilePath(), chunkResults);
}
private List<String> splitDiff(String diff, int maxLines) {
String[] lines = diff.split("\n");
List<String> chunks = new ArrayList<>();
StringBuilder current = new StringBuilder();
int lineCount = 0;
for (String line : lines) {
current.append(line).append("\n");
lineCount++;
if (lineCount >= maxLines) {
chunks.add(current.toString());
current = new StringBuilder();
lineCount = 0;
}
}
if (!current.isEmpty()) {
chunks.add(current.toString());
}
return chunks;
}
private FileReviewResult mergeResults(String filePath, List<FileReviewResult> results) {
List<ReviewIssue> allIssues = results.stream()
.flatMap(r -> r.getIssues().stream())
.toList();
// 取最严重的overall quality
String worstQuality = results.stream()
.map(FileReviewResult::getOverallQuality)
.min(Comparator.comparingInt(this::qualityRank))
.orElse("FAIR");
// 如果任何chunk需要变更,整体需要变更
String recommendation = results.stream()
.map(FileReviewResult::getApprovalRecommendation)
.anyMatch("REQUEST_CHANGES"::equals) ? "REQUEST_CHANGES" : "NEEDS_DISCUSSION";
return FileReviewResult.builder()
.filePath(filePath)
.issues(allIssues)
.overallQuality(worstQuality)
.summary(String.format("共发现%d个问题(%d个文件分批审查)",
allIssues.size(), results.size()))
.approvalRecommendation(recommendation)
.build();
}
private int qualityRank(String quality) {
return switch (quality) {
case "POOR" -> 0;
case "FAIR" -> 1;
case "GOOD" -> 2;
case "EXCELLENT" -> 3;
default -> 1;
};
}
private String detectLanguage(String filePath) {
if (filePath.endsWith(".java")) return "Java";
if (filePath.endsWith(".py")) return "Python";
if (filePath.endsWith(".ts") || filePath.endsWith(".tsx")) return "TypeScript";
if (filePath.endsWith(".js")) return "JavaScript";
if (filePath.endsWith(".go")) return "Go";
if (filePath.endsWith(".sql")) return "SQL";
return "Unknown";
}
private String extractJson(String text) {
int start = text.indexOf('{');
int end = text.lastIndexOf('}');
return start >= 0 && end > start ? text.substring(start, end + 1) : "{}";
}
}安全问题专项检查器
/**
* 安全漏洞专项检查
* 比通用审查更深入,针对Java常见安全问题
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class SecurityReviewService {
private final ChatLanguageModel llm;
// OWASP Top 10的模式(规则检查,快速但不如LLM智能)
private static final List<SecurityPattern> SECURITY_PATTERNS = List.of(
SecurityPattern.of("SQL注入风险",
Pattern.compile("\"\\s*\\+\\s*[a-zA-Z]|'\\s*\\+\\s*[a-zA-Z]"),
"直接字符串拼接SQL查询,可能导致SQL注入"),
SecurityPattern.of("硬编码密码",
Pattern.compile("(?i)(password|passwd|secret|api_key)\\s*=\\s*\"[^\"]{6,}\""),
"代码中包含硬编码的敏感凭据"),
SecurityPattern.of("不安全随机数",
Pattern.compile("new Random\\(\\)|Math\\.random\\(\\)"),
"使用了不安全的随机数生成器,安全场景应使用SecureRandom"),
SecurityPattern.of("XXE风险",
Pattern.compile("DocumentBuilderFactory|SAXParserFactory|XMLReader"),
"XML解析可能存在XXE注入风险,需要禁用外部实体"),
SecurityPattern.of("反序列化风险",
Pattern.compile("ObjectInputStream|readObject\\(\\)"),
"Java原生反序列化存在远程代码执行风险")
);
/**
* 对代码进行安全检查
* 先用规则快速扫描,再用LLM深度分析
*/
public SecurityReviewResult securityReview(String filePath, String code) {
List<SecurityIssue> issues = new ArrayList<>();
// 1. 规则扫描(快速)
for (SecurityPattern pattern : SECURITY_PATTERNS) {
Matcher matcher = pattern.regex().matcher(code);
while (matcher.find()) {
int lineNumber = countLines(code, matcher.start());
issues.add(SecurityIssue.builder()
.type(pattern.name())
.description(pattern.description())
.lineNumber(lineNumber)
.codeSnippet(extractContext(code, matcher.start(), 100))
.severity(ReviewDimension.ReviewSeverity.CRITICAL)
.source("规则扫描")
.build());
}
}
// 2. LLM深度安全分析(关注规则无法覆盖的场景)
String securityPrompt = String.format("""
请对以下Java代码进行深度安全审查,重点关注:
1. 认证和授权绕过
2. 敏感数据泄露(日志、响应体)
3. 路径遍历
4. 服务端请求伪造(SSRF)
5. 不安全的文件操作
代码:
```java
%s
```
输出JSON格式的安全问题列表(如无问题,issues为空数组):
{"issues": [{"type": "...", "description": "...", "lineNumber": null, "codeSnippet": "..."}]}
只输出JSON:
""", code.substring(0, Math.min(3000, code.length())));
try {
String response = llm.generate(securityPrompt).trim();
String json = extractJson(response);
// 解析并添加LLM发现的问题
// ... 解析逻辑省略
} catch (Exception e) {
log.warn("LLM安全分析失败: {}", e.getMessage());
}
return new SecurityReviewResult(filePath, issues,
issues.isEmpty() ? "PASS" : "FAIL");
}
private int countLines(String text, int position) {
return (int) text.substring(0, position).chars()
.filter(c -> c == '\n').count() + 1;
}
private String extractContext(String text, int position, int contextLength) {
int start = Math.max(0, position - 20);
int end = Math.min(text.length(), position + contextLength);
return text.substring(start, end);
}
private String extractJson(String text) {
int start = text.indexOf('{');
int end = text.lastIndexOf('}');
return start >= 0 && end > start ? text.substring(start, end + 1) : "{}";
}
public record SecurityPattern(String name, Pattern regex, String description) {
public static SecurityPattern of(String name, Pattern regex, String description) {
return new SecurityPattern(name, regex, description);
}
}
@Builder
public record SecurityIssue(
String type, String description, Integer lineNumber, String codeSnippet,
ReviewDimension.ReviewSeverity severity, String source
) {}
public record SecurityReviewResult(
String filePath, List<SecurityIssue> issues, String verdict
) {}
}与GitLab/GitHub集成
/**
* PR自动审查集成
* 在PR创建时自动触发AI审查,把结果作为评论发出
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class PullRequestReviewIntegration {
private final AiCodeReviewService reviewService;
private final SecurityReviewService securityReviewService;
private final GitLabClient gitLabClient; // 或 GitHubClient
/**
* 处理PR事件(Webhook触发)
*/
public void handlePullRequestEvent(PullRequestEvent event) {
if (event.getAction() != PullRequestAction.OPENED &&
event.getAction() != PullRequestAction.SYNCHRONIZE) {
return;
}
log.info("开始AI审查PR: #{} - {}", event.getPrNumber(), event.getTitle());
// 获取PR的文件差异
List<FileDiff> diffs = gitLabClient.getDiffs(event.getProjectId(), event.getPrNumber());
ReviewConfig config = ReviewConfig.builder()
.dimensions(List.of(
ReviewDimension.SECURITY,
ReviewDimension.CORRECTNESS,
ReviewDimension.PERFORMANCE
))
.maxIssuesPerFile(10) // 不要一口气报太多问题,会让开发者沮丧
.build();
List<FileReviewResult> allResults = new ArrayList<>();
for (FileDiff diff : diffs) {
// 跳过不需要审查的文件类型
if (shouldSkip(diff.getFilePath())) continue;
FileReviewResult result = reviewService.reviewFileDiff(diff, config);
allResults.add(result);
// 只在有问题时才添加评论(避免噪声)
if (!result.getIssues().isEmpty()) {
postFileReviewComment(event, diff, result);
}
}
// 发布总结评论
postSummaryComment(event, allResults);
// 如果有CRITICAL问题,自动请求变更
boolean hasCriticalIssues = allResults.stream()
.flatMap(r -> r.getIssues().stream())
.anyMatch(i -> i.getSeverity() == ReviewDimension.ReviewSeverity.CRITICAL);
if (hasCriticalIssues) {
gitLabClient.requestChanges(event.getProjectId(), event.getPrNumber(),
"AI审查发现关键安全或正确性问题,请处理后重新提交。");
log.warn("PR #{} 存在CRITICAL问题,已自动请求变更", event.getPrNumber());
}
}
private void postFileReviewComment(
PullRequestEvent event, FileDiff diff, FileReviewResult result) {
StringBuilder comment = new StringBuilder();
comment.append("🤖 **AI Code Review - ").append(diff.getFilePath()).append("**\n\n");
// 按严重度分组
Map<ReviewDimension.ReviewSeverity, List<ReviewIssue>> grouped =
result.getIssues().stream()
.collect(Collectors.groupingBy(ReviewIssue::getSeverity));
for (ReviewDimension.ReviewSeverity severity :
ReviewDimension.ReviewSeverity.values()) {
List<ReviewIssue> issues = grouped.get(severity);
if (issues == null || issues.isEmpty()) continue;
comment.append(getSeverityEmoji(severity))
.append(" **").append(severity.name()).append("**\n\n");
for (ReviewIssue issue : issues) {
comment.append("**").append(issue.getTitle()).append("**");
if (issue.getLineNumber() != null) {
comment.append(" (第").append(issue.getLineNumber()).append("行)");
}
comment.append("\n");
comment.append(issue.getDescription()).append("\n");
if (issue.getSuggestion() != null && !issue.getSuggestion().isEmpty()) {
comment.append("💡 建议:").append(issue.getSuggestion()).append("\n");
}
comment.append("\n");
}
}
gitLabClient.postComment(event.getProjectId(), event.getPrNumber(), comment.toString());
}
private void postSummaryComment(PullRequestEvent event,
List<FileReviewResult> results) {
long criticalCount = countBySeverity(results,
ReviewDimension.ReviewSeverity.CRITICAL);
long highCount = countBySeverity(results,
ReviewDimension.ReviewSeverity.HIGH);
long totalCount = results.stream()
.mapToLong(r -> r.getIssues().size())
.sum();
String summary = String.format("""
## 🤖 AI Code Review 摘要
共审查 **%d** 个文件,发现 **%d** 个问题(%d个严重,%d个高风险)
%s
> ⚠️ AI审查仅供参考,不能替代人工审查。请结合实际情况处理建议。
""",
results.size(), totalCount, criticalCount, highCount,
criticalCount > 0 ? "❌ **发现关键问题,建议处理后再合并**" : "✅ 未发现关键问题"
);
gitLabClient.postComment(event.getProjectId(), event.getPrNumber(), summary);
}
private long countBySeverity(List<FileReviewResult> results,
ReviewDimension.ReviewSeverity severity) {
return results.stream()
.flatMap(r -> r.getIssues().stream())
.filter(i -> i.getSeverity() == severity)
.count();
}
private String getSeverityEmoji(ReviewDimension.ReviewSeverity severity) {
return switch (severity) {
case CRITICAL -> "🚨";
case HIGH -> "⚠️";
case MEDIUM -> "💛";
case LOW -> "💬";
case INFO -> "ℹ️";
};
}
private boolean shouldSkip(String filePath) {
return filePath.endsWith(".json") ||
filePath.endsWith(".md") ||
filePath.endsWith(".lock") ||
filePath.contains("test") || // 可选:跳过测试文件
filePath.contains("generated"); // 跳过自动生成的文件
}
}实践效果和注意事项
哪些问题AI找得好:
- SQL注入、XSS等模式化的安全问题
- 空指针可能(明显的null未判断)
- 硬编码配置(密码、API key)
- 明显的性能问题(循环内创建对象、未关闭的资源)
哪些问题AI容易漏:
- 需要了解业务逻辑才能判断的问题
- 跨文件、跨模块的依赖问题
- 并发竞争条件(除非非常明显)
- 细微的算法逻辑错误
降低误报的策略:
- 给LLM提供更多上下文(相关类的定义、接口说明)
- 设置置信度过滤(低置信度的问题不展示或标注"待确认")
- 积累反馈,定期改进Prompt
AI代码审查的价值不在于"替代人",而在于把机械重复的检查自动化,让人工审查员专注于真正需要判断力的部分。
