第2218篇：手写内容的识别与处理——从手写笔记到结构化数据

老张2026/4/30大约 12 分钟

第2218篇：手写内容的识别与处理——从手写笔记到结构化数据

适读人群：做文档智能、表单处理、OCR应用的Java工程师 | 阅读时长：约16分钟 | 核心价值：掌握手写识别的完整工程链路，从图像预处理到结构化数据提取

我有个客户是做保险理赔的，他们每天要处理几千张手写的索赔表单。

之前全靠人工录入，一个熟练的录入员一天能处理100-150张，人力成本高，而且偶尔还会录错。他们想上OCR，但试了市面上几个商业产品，识别率只有75%左右——每4张表单里就有1张需要人工核查，反而增加了工作量。

关键问题出在哪？手写识别比打印字识别难得多，而且工程上的挑战不只是模型精度，还有图像质量参差不齐、字段定位困难、识别后数据校验等一系列问题。

最终我们把端到端识别准确率提到了92%，把人工复核量降低了60%。这篇文章把完整方案拆开讲。

手写识别的工程挑战全景

图像预处理：垃圾进垃圾出

识别效果 80% 取决于图像质量。在送进识别模型之前，必须做充分的预处理：

/**
 * 手写文档图像预处理管道
 * 处理歪斜、模糊、对比度等常见问题
 */
@Service
@Slf4j
public class HandwritingImagePreprocessor {

    /**
     * 完整预处理流水线
     */
    public PreprocessResult preprocess(byte[] rawImageBytes) {
        try {
            BufferedImage image = ImageIO.read(new ByteArrayInputStream(rawImageBytes));
            PreprocessResult result = new PreprocessResult();

            // 1. 检查并旋转方向（EXIF旋转修正）
            image = correctOrientation(image, rawImageBytes);

            // 2. 分辨率标准化（保证至少300 DPI等效分辨率）
            image = normalizeResolution(image, 300);

            // 3. 去噪处理（高斯模糊去除噪点，保留文字边缘）
            image = denoiseImage(image);

            // 4. 倾斜校正（Deskew）
            double skewAngle = detectSkewAngle(image);
            if (Math.abs(skewAngle) > 0.5) { // 超过0.5度才校正
                image = rotateImage(image, -skewAngle);
                result.setDeskewAngle(skewAngle);
                log.debug("倾斜校正: angle={}°", String.format("%.2f", skewAngle));
            }

            // 5. 透视校正（适用于拍照场景，非扫描仪）
            image = correctPerspective(image);

            // 6. 自适应二值化（Sauvola方法，处理光照不均）
            image = adaptiveBinarize(image);

            // 7. 印章/水印去除（可选，根据场景）
            image = removeStampOverlay(image);

            result.setProcessedImage(imageToBytes(image));
            result.setSuccess(true);
            return result;

        } catch (Exception e) {
            log.error("图像预处理失败", e);
            return PreprocessResult.failed(e.getMessage());
        }
    }

    /**
     * 倾斜角度检测（基于霍夫变换）
     */
    private double detectSkewAngle(BufferedImage image) {
        int width = image.getWidth();
        int height = image.getHeight();

        // 简化的倾斜检测：基于水平文字行的投影
        // 生产环境建议用 Leptonica 或 OpenCV 的 Hough Transform
        int[] horizontalProjection = new int[height];
        for (int y = 0; y < height; y++) {
            for (int x = 0; x < width; x++) {
                int gray = getGray(image, x, y);
                if (gray < 128) { // 深色像素（文字）
                    horizontalProjection[y]++;
                }
            }
        }

        // 寻找峰值行（文字行）
        // 理想情况下，水平文字的投影直方图有明显的峰谷交替
        // 此处简化为0度（实际需要旋转多个角度找最大方差）
        return estimateSkewFromProjection(horizontalProjection);
    }

    private double estimateSkewFromProjection(int[] projection) {
        // 简化实现：实际应尝试多个角度找projection方差最大的角度
        // 生产环境建议集成 OpenCV Java bindings
        return 0.0;
    }

    /**
     * Sauvola 自适应二值化
     * 相比全局阈值，对光照不均的手写文档效果好得多
     */
    private BufferedImage adaptiveBinarize(BufferedImage input) {
        int width = input.getWidth();
        int height = input.getHeight();
        int windowSize = 25; // 局部窗口大小
        double k = 0.5; // Sauvola参数，越大阈值越高（更多白色）

        BufferedImage output = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY);

        for (int y = 0; y < height; y++) {
            for (int x = 0; x < width; x++) {
                // 计算局部均值和标准差
                int[] localStats = computeLocalStats(input, x, y, windowSize);
                int mean = localStats[0];
                double stdDev = Math.sqrt(localStats[1]);

                // Sauvola阈值公式
                double threshold = mean * (1 + k * (stdDev / 128.0 - 1));
                int gray = getGray(input, x, y);
                int binaryValue = gray < threshold ? 0 : 255;

                int rgb = (binaryValue << 16) | (binaryValue << 8) | binaryValue;
                output.setRGB(x, y, rgb);
            }
        }

        return output;
    }

    private int[] computeLocalStats(BufferedImage image, int cx, int cy, int size) {
        int half = size / 2;
        int xStart = Math.max(0, cx - half);
        int xEnd = Math.min(image.getWidth() - 1, cx + half);
        int yStart = Math.max(0, cy - half);
        int yEnd = Math.min(image.getHeight() - 1, cy + half);

        long sum = 0;
        long sumSq = 0;
        int count = 0;

        for (int y = yStart; y <= yEnd; y++) {
            for (int x = xStart; x <= xEnd; x++) {
                int gray = getGray(image, x, y);
                sum += gray;
                sumSq += (long) gray * gray;
                count++;
            }
        }

        int mean = (int) (sum / count);
        long variance = sumSq / count - (long) mean * mean;
        return new int[]{mean, (int) variance};
    }

    private int getGray(BufferedImage image, int x, int y) {
        int rgb = image.getRGB(x, y);
        int r = (rgb >> 16) & 0xFF;
        int g = (rgb >> 8) & 0xFF;
        int b = rgb & 0xFF;
        return (int) (0.299 * r + 0.587 * g + 0.114 * b);
    }

    private BufferedImage denoiseImage(BufferedImage input) {
        // 中值滤波去噪，保留边缘
        return input; // 简化，生产用 OpenCV medianBlur
    }

    private BufferedImage correctOrientation(BufferedImage image, byte[] rawBytes) {
        // 读取EXIF方向并旋转
        return image;
    }

    private BufferedImage normalizeResolution(BufferedImage image, int targetDpi) {
        // 如果图片分辨率低于目标DPI，放大
        return image;
    }

    private BufferedImage correctPerspective(BufferedImage image) {
        // 透视校正（四点变换），适用于拍照场景
        return image;
    }

    private BufferedImage removeStampOverlay(BufferedImage image) {
        // 去除红色/蓝色印章覆盖（颜色空间过滤）
        return image;
    }

    private BufferedImage rotateImage(BufferedImage image, double angle) {
        double radians = Math.toRadians(angle);
        int newWidth = (int) Math.abs(image.getWidth() * Math.cos(radians)) +
                (int) Math.abs(image.getHeight() * Math.sin(radians));
        int newHeight = (int) Math.abs(image.getWidth() * Math.sin(radians)) +
                (int) Math.abs(image.getHeight() * Math.cos(radians));

        BufferedImage rotated = new BufferedImage(newWidth, newHeight, image.getType());
        Graphics2D g = rotated.createGraphics();
        g.setRenderingHint(RenderingHints.KEY_INTERPOLATION,
                RenderingHints.VALUE_INTERPOLATION_BICUBIC);
        g.translate(newWidth / 2, newHeight / 2);
        g.rotate(radians);
        g.translate(-image.getWidth() / 2, -image.getHeight() / 2);
        g.drawImage(image, 0, 0, null);
        g.dispose();
        return rotated;
    }

    private byte[] imageToBytes(BufferedImage image) throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ImageIO.write(image, "PNG", baos);
        return baos.toByteArray();
    }
}

结构化表单的字段定位

手写表单识别不只是识别文字，还需要知道每个字段的位置和对应的字段名：

/**
 * 表单字段定位服务
 * 基于模板匹配，找到每个字段的填写区域
 */
@Service
@Slf4j
public class FormFieldLocator {

    @Autowired
    private OpenAiClient openAiClient;

    /**
     * 方法一：基于模板的字段定位
     * 适用于固定格式表单（如保险理赔单、入职表）
     */
    public Map<String, Rectangle> locateFieldsByTemplate(byte[] formImageBytes,
                                                          FormTemplate template) {
        Map<String, Rectangle> fieldLocations = new HashMap<>();

        // 计算图片与模板的变换矩阵
        AffineTransform transform = computeAlignmentTransform(formImageBytes, template);

        // 将模板中的字段坐标映射到实际图片坐标
        for (FormField field : template.getFields()) {
            Rectangle templateRect = field.getBoundingBox();
            Rectangle actualRect = transformRectangle(templateRect, transform);
            fieldLocations.put(field.getFieldName(), actualRect);
        }

        return fieldLocations;
    }

    /**
     * 方法二：多模态模型智能字段定位
     * 适用于非固定格式表单或模板不可用时
     * 让多模态模型识别表单结构
     */
    public List<FormFieldResult> locateFieldsByMultimodal(byte[] formImageBytes,
                                                           List<String> expectedFields) {
        String base64 = Base64.getEncoder().encodeToString(formImageBytes);

        String fieldsDescription = String.join("、", expectedFields);
        String prompt = String.format("""
                这是一张手写表单图片。请识别并提取以下字段的内容：%s
                
                对每个字段，提供：
                1. 字段名称
                2. 识别到的内容（手写文字）
                3. 置信度（0-1，1表示非常确定）
                4. 如果字段为空或无法识别，说明原因
                
                输出JSON格式：
                {
                  "fields": [
                    {
                      "fieldName": "字段名",
                      "value": "识别值",
                      "confidence": 0.95,
                      "empty": false,
                      "unreadableReason": null
                    }
                  ]
                }
                """, fieldsDescription);

        String response = openAiClient.chatMultimodal(prompt, base64, "image/png");

        try {
            String cleaned = response.replaceAll("```json\\s*", "").replaceAll("```\\s*", "").trim();
            JsonNode root = new ObjectMapper().readTree(cleaned);
            JsonNode fieldsNode = root.get("fields");

            List<FormFieldResult> results = new ArrayList<>();
            if (fieldsNode != null) {
                for (JsonNode field : fieldsNode) {
                    results.add(FormFieldResult.builder()
                            .fieldName(field.get("fieldName").asText())
                            .value(field.has("value") ? field.get("value").asText("") : "")
                            .confidence(field.get("confidence").asDouble(0.5))
                            .empty(field.get("empty").asBoolean(false))
                            .unreadableReason(field.has("unreadableReason") ?
                                    field.get("unreadableReason").asText(null) : null)
                            .build());
                }
            }
            return results;

        } catch (Exception e) {
            log.error("多模态字段定位结果解析失败: {}", response, e);
            throw new FormParseException("表单解析失败", e);
        }
    }

    private AffineTransform computeAlignmentTransform(byte[] imageBytes, FormTemplate template) {
        // 特征点匹配（锚点对齐）
        // 简化实现，生产环境用 OpenCV ORB/SIFT特征匹配
        return new AffineTransform(); // 单位变换
    }

    private Rectangle transformRectangle(Rectangle rect, AffineTransform transform) {
        Point2D topLeft = transform.transform(new Point2D.Double(rect.x, rect.y), null);
        Point2D bottomRight = transform.transform(
                new Point2D.Double(rect.x + rect.width, rect.y + rect.height), null);
        return new Rectangle(
                (int) topLeft.getX(), (int) topLeft.getY(),
                (int) (bottomRight.getX() - topLeft.getX()),
                (int) (bottomRight.getY() - topLeft.getY())
        );
    }
}

识别后的数据校验与纠错

识别出来的原始文字往往需要进一步校验和纠错：

/**
 * 手写识别结果的后处理校验器
 * 结合业务规则对识别结果进行格式校验和纠错
 */
@Service
@Slf4j
public class HandwritingResultValidator {

    /**
     * 对识别结果进行多维度校验
     */
    public ValidationReport validate(Map<String, FormFieldResult> recognizedFields,
                                      FormSchema schema) {
        ValidationReport report = new ValidationReport();

        for (FormFieldDefinition fieldDef : schema.getFields()) {
            String fieldName = fieldDef.getFieldName();
            FormFieldResult fieldResult = recognizedFields.get(fieldName);

            if (fieldResult == null) {
                report.addIssue(fieldName, ValidationIssue.MISSING_REQUIRED_FIELD);
                continue;
            }

            String rawValue = fieldResult.getValue();

            // 1. 格式校验
            ValidationResult formatResult = validateFormat(rawValue, fieldDef.getFieldType());
            if (!formatResult.isValid()) {
                // 尝试自动纠错
                String corrected = attemptAutoCorrection(rawValue, fieldDef.getFieldType());
                if (corrected != null) {
                    fieldResult.setValue(corrected);
                    fieldResult.setAutoCorrected(true);
                    fieldResult.setOriginalValue(rawValue);
                    report.addAutoCorrection(fieldName, rawValue, corrected);
                } else {
                    report.addIssue(fieldName, ValidationIssue.FORMAT_ERROR,
                            "期望格式: " + fieldDef.getFieldType() + ", 实际: " + rawValue);
                }
            }

            // 2. 业务规则校验
            if (fieldDef.hasBusinessRule()) {
                boolean rulePass = evaluateBusinessRule(
                        fieldResult.getValue(), fieldDef.getBusinessRule(), recognizedFields);
                if (!rulePass) {
                    report.addIssue(fieldName, ValidationIssue.BUSINESS_RULE_VIOLATION,
                            fieldDef.getBusinessRule().getDescription());
                }
            }

            // 3. 置信度告警
            if (fieldResult.getConfidence() < 0.7) {
                report.addLowConfidenceField(fieldName, fieldResult.getConfidence());
            }
        }

        // 标记需要人工复核的字段
        report.setRequiresHumanReview(
                !report.getIssues().isEmpty() ||
                !report.getLowConfidenceFields().isEmpty()
        );

        return report;
    }

    /**
     * 常见格式自动纠错
     */
    private String attemptAutoCorrection(String rawValue, FieldType fieldType) {
        if (rawValue == null || rawValue.isEmpty()) return null;

        return switch (fieldType) {
            case DATE -> correctDate(rawValue);
            case PHONE -> correctPhone(rawValue);
            case ID_CARD -> correctIdCard(rawValue);
            case AMOUNT -> correctAmount(rawValue);
            default -> null;
        };
    }

    /**
     * 日期纠错：处理手写日期的常见错误
     * 例如："2024.3.5" -> "2024-03-05"
     *       "24年3月5日" -> "2024-03-05"
     */
    private String correctDate(String raw) {
        // 去除空格
        String cleaned = raw.trim().replaceAll("\\s+", "");

        // 尝试多种日期格式
        String[] patterns = {
                "yyyy[./年-]M[./月-]d[日]?",
                "yy[./年-]M[./月-]d[日]?",
                "M[./月-]d[日]?[./]yyyy",
        };

        for (String pattern : patterns) {
            try {
                DateTimeFormatter formatter = DateTimeFormatter.ofPattern(
                        pattern.replace("[日]?", "").replace("[./月-]", "-")
                                .replace("[./年-]", "-"));
                // 简化：实际需要正则预处理后再parse
                LocalDate date = LocalDate.parse(cleaned, formatter);
                return date.format(DateTimeFormatter.ISO_LOCAL_DATE);
            } catch (Exception ignored) {}
        }

        // 手写常见字符混淆纠错
        String digitCorrected = cleaned
                .replace("O", "0").replace("o", "0")  // 字母O -> 数字0
                .replace("l", "1").replace("I", "1")  // 字母l/I -> 数字1
                .replace("Z", "2")                     // 字母Z -> 数字2
                .replace("S", "5");                    // 字母S -> 数字5

        if (!digitCorrected.equals(cleaned)) {
            return correctDate(digitCorrected); // 递归尝试
        }

        return null; // 无法自动纠正
    }

    /**
     * 金额纠错：统一格式，去除非数字字符
     */
    private String correctAmount(String raw) {
        // 提取数字和小数点
        String numericOnly = raw.replaceAll("[^0-9.]", "");
        if (numericOnly.isEmpty()) return null;

        try {
            BigDecimal amount = new BigDecimal(numericOnly);
            return amount.setScale(2, RoundingMode.HALF_UP).toPlainString();
        } catch (NumberFormatException e) {
            return null;
        }
    }

    /**
     * 手机号纠错
     */
    private String correctPhone(String raw) {
        String digitsOnly = raw.replaceAll("[^0-9]", "");
        if (digitsOnly.length() == 11 && digitsOnly.startsWith("1")) {
            return digitsOnly;
        }
        return null;
    }

    /**
     * 身份证号纠错（处理字母O/0混淆等）
     */
    private String correctIdCard(String raw) {
        String normalized = raw.toUpperCase().replaceAll("\\s+", "");
        // 身份证前17位只有数字，第18位是数字或X
        // 字母O -> 0 的替换（第18位的X不能替换）
        if (normalized.length() == 18) {
            String first17 = normalized.substring(0, 17).replace("O", "0");
            String last1 = normalized.substring(17);
            return first17 + last1;
        }
        return null;
    }

    private boolean evaluateBusinessRule(String value, BusinessRule rule,
                                          Map<String, FormFieldResult> allFields) {
        // 业务规则引擎（简化）
        return true;
    }
}

低置信度字段的人工复核流程

对于系统无法确信的字段，设计高效的人工复核界面：

/**
 * 人工复核任务生成与管理
 * 将低置信度字段推送给人工审核员
 */
@Service
@Slf4j
public class HumanReviewTaskService {

    @Autowired
    private ReviewTaskRepository reviewTaskRepository;

    @Autowired
    private NotificationService notificationService;

    /**
     * 基于校验报告生成人工复核任务
     * 只让人工处理真正有问题的字段，不搞全量复核
     */
    public HumanReviewTask createReviewTask(String documentId,
                                             Map<String, FormFieldResult> recognizedFields,
                                             ValidationReport validationReport,
                                             byte[] formImageBytes) {
        List<ReviewItem> reviewItems = new ArrayList<>();

        // 1. 格式错误的字段
        for (Map.Entry<String, ValidationIssue> issue : validationReport.getIssues().entrySet()) {
            if (issue.getValue() != ValidationIssue.MISSING_REQUIRED_FIELD) {
                FormFieldResult field = recognizedFields.get(issue.getKey());
                reviewItems.add(ReviewItem.builder()
                        .fieldName(issue.getKey())
                        .recognizedValue(field != null ? field.getValue() : "")
                        .issue(issue.getValue().getDescription())
                        .priority(ReviewPriority.HIGH)
                        .build());
            }
        }

        // 2. 低置信度字段（未被格式错误捕获的）
        for (Map.Entry<String, Double> lowConf : validationReport.getLowConfidenceFields().entrySet()) {
            if (reviewItems.stream().noneMatch(r -> r.getFieldName().equals(lowConf.getKey()))) {
                FormFieldResult field = recognizedFields.get(lowConf.getKey());
                reviewItems.add(ReviewItem.builder()
                        .fieldName(lowConf.getKey())
                        .recognizedValue(field != null ? field.getValue() : "")
                        .confidence(lowConf.getValue())
                        .issue("置信度低: " + String.format("%.0f%%", lowConf.getValue() * 100))
                        .priority(ReviewPriority.NORMAL)
                        .build());
            }
        }

        if (reviewItems.isEmpty()) {
            // 无需人工复核，直接完成
            return HumanReviewTask.notRequired(documentId);
        }

        // 只截取相关字段区域给审核员，而非整张图
        // 减少审核员认知负担
        HumanReviewTask task = HumanReviewTask.builder()
                .taskId(UUID.randomUUID().toString())
                .documentId(documentId)
                .reviewItems(reviewItems)
                .formImageBytes(formImageBytes) // 完整表单图，供参考
                .status(ReviewStatus.PENDING)
                .createdAt(Instant.now())
                .estimatedReviewMinutes(calculateEstimatedTime(reviewItems))
                .build();

        reviewTaskRepository.save(task);

        // 通知审核员
        notificationService.notifyReviewersAvailableTask(task);

        log.info("创建人工复核任务: documentId={}, reviewItemCount={}",
                documentId, reviewItems.size());
        return task;
    }

    /**
     * 审核员提交复核结果
     */
    @Transactional
    public ReviewCompleteResult submitReview(String taskId, String reviewerId,
                                              Map<String, String> correctedValues) {
        HumanReviewTask task = reviewTaskRepository.findById(taskId)
                .orElseThrow(() -> new TaskNotFoundException(taskId));

        // 更新字段值
        for (Map.Entry<String, String> correction : correctedValues.entrySet()) {
            task.applyCorrection(correction.getKey(), correction.getValue(), reviewerId);
        }

        task.setStatus(ReviewStatus.COMPLETED);
        task.setCompletedAt(Instant.now());
        task.setReviewerId(reviewerId);
        reviewTaskRepository.save(task);

        // 将审核结果反馈给学习系统（改善模型）
        publishReviewFeedback(task, correctedValues);

        return ReviewCompleteResult.success(taskId);
    }

    private int calculateEstimatedTime(List<ReviewItem> items) {
        // 假设每个字段平均30秒人工审核
        return items.size() * 30 / 60 + 1; // 分钟
    }

    private void publishReviewFeedback(HumanReviewTask task,
                                        Map<String, String> corrections) {
        // 发布到消息队列，供模型持续学习使用
    }
}

批量处理与性能指标

/**
 * 手写识别批量处理服务
 * 监控各环节成功率，持续优化流水线
 */
@Service
@Slf4j
public class HandwritingBatchProcessor {

    @Autowired
    private HandwritingImagePreprocessor preprocessor;

    @Autowired
    private FormFieldLocator fieldLocator;

    @Autowired
    private HandwritingResultValidator validator;

    @Autowired
    private HumanReviewTaskService reviewService;

    @Autowired
    private MeterRegistry meterRegistry;

    public BatchProcessResult processBatch(List<FormDocument> documents,
                                            FormTemplate template) {
        BatchProcessResult batchResult = new BatchProcessResult();
        Counter successCounter = meterRegistry.counter("handwriting.process.success");
        Counter failCounter = meterRegistry.counter("handwriting.process.fail");
        Counter reviewRequiredCounter = meterRegistry.counter("handwriting.review.required");

        for (FormDocument document : documents) {
            long startTime = System.currentTimeMillis();
            try {
                // 1. 图像预处理
                PreprocessResult preprocessResult = preprocessor.preprocess(document.getImageBytes());
                if (!preprocessResult.isSuccess()) {
                    failCounter.increment();
                    batchResult.addFailure(document.getDocumentId(), "图像预处理失败");
                    continue;
                }

                // 2. 字段识别
                List<FormFieldResult> fieldResults = fieldLocator.locateFieldsByMultimodal(
                        preprocessResult.getProcessedImage(), template.getFieldNames());

                Map<String, FormFieldResult> fieldMap = fieldResults.stream()
                        .collect(Collectors.toMap(FormFieldResult::getFieldName,
                                r -> r, (a, b) -> a));

                // 3. 校验
                ValidationReport report = validator.validate(fieldMap, template.getSchema());

                // 4. 判断是否需要人工复核
                if (report.isRequiresHumanReview()) {
                    reviewService.createReviewTask(document.getDocumentId(),
                            fieldMap, report, preprocessResult.getProcessedImage());
                    reviewRequiredCounter.increment();
                }

                long elapsed = System.currentTimeMillis() - startTime;
                meterRegistry.timer("handwriting.process.duration").record(elapsed,
                        TimeUnit.MILLISECONDS);

                successCounter.increment();
                batchResult.addSuccess(document.getDocumentId(), fieldMap,
                        report.isRequiresHumanReview());

            } catch (Exception e) {
                log.error("文档处理失败: documentId={}", document.getDocumentId(), e);
                failCounter.increment();
                batchResult.addFailure(document.getDocumentId(), e.getMessage());
            }
        }

        // 打印批次统计
        log.info("批次处理完成: total={}, success={}, failed={}, reviewRequired={}",
                documents.size(),
                batchResult.getSuccessCount(),
                batchResult.getFailureCount(),
                batchResult.getReviewRequiredCount());

        return batchResult;
    }
}

实践数据与优化总结

回到文章开头那个保险理赔案例，优化的过程和数据：

优化措施	识别准确率变化	人工复核率变化
基础OCR（未预处理）	75%	100%
加图像预处理	83%	70%
换多模态模型（GPT-4V）	89%	40%
加业务规则校验+自动纠错	89%	30%
加低置信度过滤+针对性人工复核	92%（最终入库）	20%

关键收益：人工复核量从100%降到20%，每个审核员的日处理量从100张提升到500张。

几条重要经验：

图像质量是天花板。 预处理做好，模型识别率自然提升。
不要追求100%自动化。 20%的人工复核是合理的，强行避免会引入更多错误。
低置信度字段精准复核比全量复核效率高5倍。 让人工只看真正有疑问的字段。
人工纠错数据是宝贵的训练数据。 每一次人工纠错都是对模型的改进机会。