Spring AI企业级知识库:从0到1完整项目实战
2026/4/30大约 3 分钟
Spring AI企业级知识库:从0到1完整项目实战
适读人群:想做AI落地项目的Java工程师
文章价值:知识库系统完整设计 + 核心代码 + 部署方案
项目背景与目标
企业知识库问答系统是目前AI落地最成熟、ROI最高的应用场景之一。本文带你从零搭建一个支持多部门、多格式文档的智能问答系统。
系统目标:
- 支持上传PDF/Word/网页等多格式文档
- 自然语言问答,返回有依据的答案
- 多租户隔离(不同部门看不到彼此的数据)
- 支持1000+并发用户查询
系统整体架构
数据模型设计
// 文档库实体
@Entity
@Table(name = "knowledge_base")
@Data
public class KnowledgeBase {
@Id
@GeneratedValue(strategy = GenerationType.UUID)
private String id;
private String name;
private String description;
private String tenantId; // 租户ID(部门隔离)
@Enumerated(EnumType.STRING)
private KnowledgeBaseStatus status;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
}
// 文档实体
@Entity
@Table(name = "knowledge_document")
@Data
public class KnowledgeDocument {
@Id
@GeneratedValue(strategy = GenerationType.UUID)
private String id;
private String knowledgeBaseId;
private String originalFilename;
private String storageKey; // MinIO存储路径
private String fileType;
private Long fileSize;
@Enumerated(EnumType.STRING)
private DocumentStatus status; // PROCESSING/READY/FAILED
private Integer chunkCount;
private LocalDateTime indexedAt;
}
// 向量块元数据(存在PGVector的metadata列)
record ChunkMetadata(
String documentId,
String knowledgeBaseId,
String tenantId,
String filename,
int chunkIndex,
int totalChunks
) {}核心服务实现
文档入库流水线
@Service
@Slf4j
@RequiredArgsConstructor
public class DocumentPipelineService {
private final DocumentLoaderFactory loaderFactory;
private final VectorStore vectorStore;
private final MinioService minioService;
private final KnowledgeDocumentRepository docRepo;
@Async("documentProcessorPool")
public void processDocument(String docId) {
KnowledgeDocument doc = docRepo.findById(docId).orElseThrow();
doc.setStatus(DocumentStatus.PROCESSING);
docRepo.save(doc);
try {
// 1. 从MinIO下载
byte[] fileBytes = minioService.download(doc.getStorageKey());
// 2. 加载和分块
List<Document> chunks = loadAndChunk(fileBytes, doc);
// 3. 增加多租户元数据
String tenantId = getTenantId(doc.getKnowledgeBaseId());
chunks.forEach(chunk -> {
chunk.getMetadata().put("tenant_id", tenantId);
chunk.getMetadata().put("kb_id", doc.getKnowledgeBaseId());
chunk.getMetadata().put("doc_id", docId);
chunk.getMetadata().put("filename", doc.getOriginalFilename());
});
// 4. 向量化入库
vectorStore.add(chunks);
// 5. 更新状态
doc.setStatus(DocumentStatus.READY);
doc.setChunkCount(chunks.size());
doc.setIndexedAt(LocalDateTime.now());
docRepo.save(doc);
log.info("文档处理完成: {}, 生成{}个分块", doc.getOriginalFilename(), chunks.size());
} catch (Exception e) {
doc.setStatus(DocumentStatus.FAILED);
docRepo.save(doc);
log.error("文档处理失败: {}", docId, e);
}
}
}多租户问答服务
@Service
@RequiredArgsConstructor
public class TenantAwareQaService {
private final ChatClient chatClient;
private final VectorStore vectorStore;
public QaResponse query(String question, String tenantId, String kbId) {
// 多租户过滤:只检索当前租户/知识库的文档
FilterExpressionBuilder filter = new FilterExpressionBuilder();
Filter.Expression tenantFilter = filter.and(
filter.eq("tenant_id", tenantId),
filter.eq("kb_id", kbId)
).build();
List<Document> docs = vectorStore.similaritySearch(
SearchRequest.query(question)
.withTopK(5)
.withSimilarityThreshold(0.7)
.withFilterExpression(tenantFilter)
);
if (docs.isEmpty()) {
return QaResponse.noAnswer("在当前知识库中未找到相关信息");
}
String context = buildContext(docs);
String answer = chatClient.prompt()
.system("你是企业知识库助手,只基于提供的资料回答。")
.user(u -> u.text("资料:\n{ctx}\n\n问题:{q}")
.param("ctx", context).param("q", question))
.call()
.content();
return QaResponse.builder()
.answer(answer)
.sources(extractSources(docs))
.build();
}
}部署架构
性能优化要点
- 异步文档处理:上传后立即返回,后台Kafka+Worker处理
- 向量索引分区:按tenant_id分区,避免全量扫描
- 答案缓存:相同问题+知识库组合,缓存24小时
- 预热机制:系统启动时加载高频问题的答案到缓存
