第2224篇：多模态数据飞轮——用用户反馈持续改善视觉理解能力

老张2026/4/30大约 9 分钟

第2224篇：多模态数据飞轮——用用户反馈持续改善视觉理解能力

适读人群：做多模态产品的工程师和产品经理 | 阅读时长：约15分钟 | 核心价值：设计多模态数据飞轮，将用户反馈转化为模型改进的持续循环

做产品最让人兴奋的时刻之一，是发现用户在无意中帮你改善产品。

我们的图文问答系统上线半年后，我去查用户行为数据，发现了一个有趣的规律：用户对同一张图片连续提问时，后面几个问题往往是在纠正前面答案的错误。

"这个数字是多少？"——"87" "确定是87吗？"——"啊对，是78，看反了"

用户不知道，这一串对话就是一份完美的错误样本。如果把这些"纠正性对话"收集起来，就是宝贵的训练数据——哪里错了、如何纠正，清清楚楚。

这就是多模态数据飞轮的核心思想：产品使用产生数据，数据改善模型，模型提升产品，产品吸引更多使用。

数据飞轮的四个环节

信号收集：显式与隐式反馈

/**
 * 多模态用户反馈收集服务
 * 收集显式反馈（点赞/点踩）和隐式行为信号
 */
@Service
@Slf4j
public class MultimodalFeedbackCollector {

    @Autowired
    private FeedbackRepository feedbackRepository;

    @Autowired
    private SessionAnalyzer sessionAnalyzer;

    @Autowired
    private KafkaTemplate<String, String> kafkaTemplate;

    /**
     * 收集显式反馈
     * 用户主动点赞、点踩、或提交文字纠正
     */
    public void collectExplicitFeedback(ExplicitFeedback feedback) {
        FeedbackRecord record = FeedbackRecord.builder()
                .feedbackId(UUID.randomUUID().toString())
                .sessionId(feedback.getSessionId())
                .requestId(feedback.getRequestId())
                .imageId(feedback.getImageId())
                .prompt(feedback.getPrompt())
                .modelResponse(feedback.getModelResponse())
                .feedbackType(feedback.getFeedbackType())  // THUMBS_UP/THUMBS_DOWN/CORRECTION
                .correctedResponse(feedback.getCorrectedResponse())
                .userId(feedback.getUserId())
                .timestamp(Instant.now())
                .build();

        feedbackRepository.save(record);

        // 推送到数据处理管道
        kafkaTemplate.send("multimodal.feedback.explicit",
                record.getFeedbackId(),
                serialize(record));

        log.info("收集显式反馈: requestId={}, type={}",
                feedback.getRequestId(), feedback.getFeedbackType());
    }

    /**
     * 分析对话会话，提取隐式行为信号
     *
     * 隐式信号的含义：
     * - 用户在同一图片上重复提同类问题 -> 上次回答不满意
     * - 用户的追问包含纠正词 -> 上次回答有错误
     * - 用户在收到回答后立即离开 -> 可能满意（或放弃）
     * - 用户花费很长时间后才发下一条消息 -> 回答让用户困惑
     */
    @Scheduled(fixedDelay = 300_000) // 每5分钟分析一次会话
    public void analyzeSessionsForImplicitSignals() {
        // 获取过去1小时内完成的会话
        Instant from = Instant.now().minus(Duration.ofHours(1));
        Instant to = Instant.now().minus(Duration.ofMinutes(5)); // 5分钟前的会话

        List<ConversationSession> sessions = sessionRepository.findCompletedSessionsInRange(from, to);

        for (ConversationSession session : sessions) {
            List<ImplicitSignal> signals = sessionAnalyzer.extractImplicitSignals(session);

            for (ImplicitSignal signal : signals) {
                FeedbackRecord record = FeedbackRecord.builder()
                        .feedbackId(UUID.randomUUID().toString())
                        .sessionId(session.getSessionId())
                        .feedbackType(FeedbackType.IMPLICIT)
                        .signalType(signal.getSignalType())
                        .relatedRequestId(signal.getRelatedRequestId())
                        .confidence(signal.getConfidence())
                        .timestamp(Instant.now())
                        .build();

                feedbackRepository.save(record);
            }
        }
    }

    @Autowired
    private ConversationSessionRepository sessionRepository;

    private String serialize(Object obj) {
        try {
            return new ObjectMapper().writeValueAsString(obj);
        } catch (JsonProcessingException e) {
            return "{}";
        }
    }
}

/**
 * 对话会话隐式信号分析器
 */
@Service
public class SessionAnalyzer {

    // 纠正性词汇，出现在用户追问中，说明上次回答有误
    private static final List<String> CORRECTION_MARKERS = Arrays.asList(
            "不对", "错了", "不是", "应该是", "实际上", "其实", "你看错了",
            "no", "wrong", "not", "actually", "that's not right", "incorrect"
    );

    public List<ImplicitSignal> extractImplicitSignals(ConversationSession session) {
        List<ImplicitSignal> signals = new ArrayList<>();
        List<ConversationTurn> turns = session.getTurns();

        for (int i = 1; i < turns.size(); i++) {
            ConversationTurn currentTurn = turns.get(i);
            ConversationTurn prevTurn = turns.get(i - 1);

            // 信号1：用户追问包含纠正性词汇
            if (prevTurn.getRole().equals("assistant")) {
                String userFollowUp = currentTurn.getContent();
                boolean hasCorrectionMarker = CORRECTION_MARKERS.stream()
                        .anyMatch(marker -> userFollowUp.toLowerCase().contains(marker));

                if (hasCorrectionMarker) {
                    signals.add(ImplicitSignal.builder()
                            .signalType(SignalType.CORRECTION_IMPLICIT)
                            .relatedRequestId(prevTurn.getRequestId())
                            .confidence(0.8)
                            .description("用户追问含纠正词: " + userFollowUp.substring(0, Math.min(50, userFollowUp.length())))
                            .build());
                }
            }

            // 信号2：重复问类似问题
            if (i >= 2 && areSimilarQuestions(turns.get(i - 2).getContent(),
                    currentTurn.getContent())) {
                signals.add(ImplicitSignal.builder()
                        .signalType(SignalType.REPEATED_QUESTION)
                        .relatedRequestId(turns.get(i - 2).getRequestId())
                        .confidence(0.7)
                        .description("重复提问")
                        .build());
            }

            // 信号3：回答时间异常长（模型响应慢）
            if (prevTurn.getResponseTimeMs() != null && prevTurn.getResponseTimeMs() > 15000) {
                signals.add(ImplicitSignal.builder()
                        .signalType(SignalType.SLOW_RESPONSE)
                        .relatedRequestId(prevTurn.getRequestId())
                        .confidence(1.0)
                        .build());
            }
        }

        return signals;
    }

    private boolean areSimilarQuestions(String q1, String q2) {
        if (q1 == null || q2 == null) return false;
        // 简单的词重叠判断
        Set<String> words1 = new HashSet<>(Arrays.asList(q1.split("\\s+")));
        Set<String> words2 = new HashSet<>(Arrays.asList(q2.split("\\s+")));
        Set<String> intersection = new HashSet<>(words1);
        intersection.retainAll(words2);
        Set<String> union = new HashSet<>(words1);
        union.addAll(words2);
        return union.isEmpty() ? false : (double) intersection.size() / union.size() > 0.6;
    }
}

数据处理：从原始信号到训练样本

/**
 * 反馈数据处理流水线
 * 将原始反馈信号转化为可用的训练数据
 */
@Service
@Slf4j
public class FeedbackDataProcessor {

    @Autowired
    private FeedbackRepository feedbackRepository;

    @Autowired
    private TrainingDataRepository trainingDataRepository;

    @Autowired
    private OpenAiClient openAiClient;

    /**
     * 处理负面反馈，生成改进的训练样本
     * 核心逻辑：从"错误"中学习
     */
    @Transactional
    public List<TrainingSample> processNegativeFeedback(String feedbackId) {
        FeedbackRecord feedback = feedbackRepository.findById(feedbackId)
                .orElseThrow(() -> new FeedbackNotFoundException(feedbackId));

        List<TrainingSample> samples = new ArrayList<>();

        if (feedback.getFeedbackType() == FeedbackType.THUMBS_DOWN) {
            // 点踩但未提供纠正：使用LLM分析可能的错误原因
            TrainingSample sample = analyzeAndGenerateSample(feedback);
            if (sample != null) samples.add(sample);

        } else if (feedback.getFeedbackType() == FeedbackType.CORRECTION) {
            // 用户提供了正确答案：直接生成对比样本
            TrainingSample sample = TrainingSample.builder()
                    .sampleId(UUID.randomUUID().toString())
                    .sourceType(SampleSourceType.USER_CORRECTION)
                    .imageId(feedback.getImageId())
                    .prompt(feedback.getPrompt())
                    .wrongResponse(feedback.getModelResponse())
                    .correctResponse(feedback.getCorrectedResponse())
                    .quality(SampleQuality.HIGH) // 用户提供的纠正质量最高
                    .createdAt(Instant.now())
                    .build();

            samples.add(sample);

        } else if (feedback.getFeedbackType() == FeedbackType.IMPLICIT
                && feedback.getSignalType() == SignalType.CORRECTION_IMPLICIT) {
            // 隐式纠正信号：分析对话获取正确答案
            TrainingSample sample = extractCorrectionFromConversation(feedback);
            if (sample != null) samples.add(sample);
        }

        for (TrainingSample sample : samples) {
            trainingDataRepository.save(sample);
        }

        log.info("从反馈生成训练样本: feedbackId={}, samplesCount={}",
                feedbackId, samples.size());
        return samples;
    }

    /**
     * 对话中的隐式纠正提取
     * 分析"用户纠正"的对话，提取正确的问答对
     */
    private TrainingSample extractCorrectionFromConversation(FeedbackRecord feedback) {
        // 获取相关的对话上下文
        ConversationSession session = sessionRepository.findById(feedback.getSessionId())
                .orElse(null);
        if (session == null) return null;

        String correctionAnalysisPrompt = buildCorrectionAnalysisPrompt(session, feedback);
        String analysis = openAiClient.chat(correctionAnalysisPrompt);

        try {
            JsonNode node = new ObjectMapper().readTree(analysis);
            if (node.get("hasClearCorrection").asBoolean(false)) {
                return TrainingSample.builder()
                        .sampleId(UUID.randomUUID().toString())
                        .sourceType(SampleSourceType.IMPLICIT_CORRECTION)
                        .imageId(feedback.getImageId())
                        .prompt(node.get("originalQuestion").asText())
                        .wrongResponse(node.get("wrongAnswer").asText())
                        .correctResponse(node.get("correctAnswer").asText())
                        .quality(SampleQuality.MEDIUM) // 隐式提取，质量中等
                        .build();
            }
        } catch (Exception e) {
            log.warn("对话纠正提取失败: feedbackId={}", feedback.getFeedbackId(), e);
        }
        return null;
    }

    private String buildCorrectionAnalysisPrompt(ConversationSession session,
                                                   FeedbackRecord feedback) {
        StringBuilder conversation = new StringBuilder();
        session.getTurns().forEach(turn ->
                conversation.append(turn.getRole()).append(": ")
                        .append(turn.getContent()).append("\n"));

        return String.format("""
                分析以下对话，判断是否存在明确的纠正：
                
                %s
                
                请判断并输出JSON：
                {
                  "hasClearCorrection": true/false,
                  "originalQuestion": "原始问题",
                  "wrongAnswer": "错误回答",
                  "correctAnswer": "正确答案（来自用户纠正）"
                }
                
                只有在对话中有明确的用户纠正时才输出 hasClearCorrection: true。
                """, conversation.toString());
    }

    private TrainingSample analyzeAndGenerateSample(FeedbackRecord feedback) {
        // 用LLM分析可能的错误并生成改进样本（置信度较低，仅作参考）
        return null; // 简化
    }

    @Autowired
    private ConversationSessionRepository sessionRepository;
}

模型改进：如何使用收集到的数据

/**
 * 模型改进决策器
 * 根据积累的训练数据决定改进策略
 */
@Service
@Slf4j
public class ModelImprovementDecider {

    @Autowired
    private TrainingDataRepository trainingDataRepository;

    @Autowired
    private PromptVersionRepository promptVersionRepository;

    /**
     * 分析错误模式，决定改进方向
     * 优先级：Prompt优化 > 知识库更新 > 微调（成本最高）
     */
    public ImprovementPlan analyzeAndPlan(String taskType) {
        // 获取最近30天的负面样本
        List<TrainingSample> negativeSamples = trainingDataRepository
                .findNegativeSamples(taskType, LocalDate.now().minusDays(30));

        if (negativeSamples.isEmpty()) {
            return ImprovementPlan.noActionNeeded();
        }

        // 错误模式分析
        Map<ErrorPattern, Long> errorPatterns = categorizeErrors(negativeSamples);

        ImprovementPlan plan = new ImprovementPlan();

        // 如果格式错误占多数 -> Prompt工程就能解决
        long formatErrors = errorPatterns.getOrDefault(ErrorPattern.FORMAT_ERROR, 0L);
        if ((double) formatErrors / negativeSamples.size() > 0.3) {
            plan.addAction(ImprovementAction.PROMPT_REVISION,
                    "格式错误占比高，优化Prompt的输出格式要求");
        }

        // 如果知识性错误占多数 -> 更新RAG知识库
        long knowledgeErrors = errorPatterns.getOrDefault(ErrorPattern.KNOWLEDGE_ERROR, 0L);
        if ((double) knowledgeErrors / negativeSamples.size() > 0.3) {
            plan.addAction(ImprovementAction.KNOWLEDGE_BASE_UPDATE,
                    "知识性错误多，补充相关领域知识到RAG库");
        }

        // 如果视觉理解错误占多数且样本量足够 -> 考虑微调
        long visualErrors = errorPatterns.getOrDefault(ErrorPattern.VISUAL_UNDERSTANDING_ERROR, 0L);
        if ((double) visualErrors / negativeSamples.size() > 0.4
                && negativeSamples.size() >= 500) {
            plan.addAction(ImprovementAction.FINE_TUNE,
                    "视觉理解错误多且样本量充足，建议微调视觉编码器");
        }

        return plan;
    }

    /**
     * 错误分类
     * 用LLM辅助批量分类错误类型
     */
    private Map<ErrorPattern, Long> categorizeErrors(List<TrainingSample> samples) {
        Map<ErrorPattern, Long> counts = new HashMap<>();

        // 批量分类（避免逐条调用LLM，使用规则+少量LLM辅助）
        for (TrainingSample sample : samples) {
            ErrorPattern pattern = classifyError(sample);
            counts.merge(pattern, 1L, Long::sum);
        }

        return counts;
    }

    private ErrorPattern classifyError(TrainingSample sample) {
        String wrongResponse = sample.getWrongResponse();
        String correctResponse = sample.getCorrectResponse();

        if (wrongResponse == null || correctResponse == null) {
            return ErrorPattern.UNKNOWN;
        }

        // 规则分类（快速）
        if (!isValidJson(wrongResponse) && isValidJson(correctResponse)) {
            return ErrorPattern.FORMAT_ERROR;
        }

        if (wrongResponse.length() > correctResponse.length() * 2) {
            return ErrorPattern.HALLUCINATION;
        }

        if (containsNumbers(wrongResponse) && containsNumbers(correctResponse)
                && !extractNumbers(wrongResponse).equals(extractNumbers(correctResponse))) {
            return ErrorPattern.NUMBER_MISREAD;
        }

        return ErrorPattern.VISUAL_UNDERSTANDING_ERROR;
    }

    private boolean isValidJson(String text) {
        try {
            new ObjectMapper().readTree(text);
            return true;
        } catch (Exception e) {
            return false;
        }
    }

    private boolean containsNumbers(String text) {
        return text != null && text.matches(".*\\d+.*");
    }

    private String extractNumbers(String text) {
        if (text == null) return "";
        return text.replaceAll("[^0-9]", "");
    }
}

Prompt 自动优化：数据驱动的迭代

/**
 * 基于反馈数据自动优化 Prompt
 * 针对高频错误类型，自动生成改进的 Prompt 变体
 */
@Service
@Slf4j
public class PromptAutoOptimizer {

    @Autowired
    private OpenAiClient openAiClient;

    @Autowired
    private TrainingDataRepository trainingDataRepository;

    @Autowired
    private PromptAbTestingService abTestingService;

    /**
     * 根据错误样本自动生成改进的 Prompt
     */
    public Optional<PromptVariant> generateImprovedPrompt(
            String currentPrompt, List<TrainingSample> errorSamples) {

        if (errorSamples.isEmpty()) return Optional.empty();

        // 汇总错误案例
        StringBuilder errorSummary = new StringBuilder();
        errorSamples.stream().limit(10).forEach(sample -> {
            errorSummary.append("---\n");
            errorSummary.append("提示词：").append(sample.getPrompt()).append("\n");
            errorSummary.append("错误回答：").append(sample.getWrongResponse()).append("\n");
            errorSummary.append("正确答案：").append(sample.getCorrectResponse()).append("\n");
        });

        String optimizationPrompt = String.format("""
                当前的图片分析 Prompt 是：
                ---
                %s
                ---
                
                以下是一些用这个 Prompt 产生的错误案例：
                %s
                
                请分析这些错误的共同原因，并提出改进后的 Prompt。
                改进目标：
                1. 保持原 Prompt 的核心要求
                2. 添加能避免上述错误的具体指令
                3. 不要让 Prompt 过于冗长（不超过原长度的150%%）
                
                只输出改进后的 Prompt 文本，不要任何解释。
                """, currentPrompt, errorSummary.toString());

        String improvedPrompt = openAiClient.chat(optimizationPrompt);

        return Optional.of(PromptVariant.builder()
                .name("auto-improved-" + LocalDate.now())
                .promptTemplate(improvedPrompt)
                .baselinePrompt(currentPrompt)
                .build());
    }
}

飞轮的量化指标

数据飞轮运转得好不好，需要持续监控：

/**
 * 数据飞轮健康度监控
 */
@Service
public class DataFlywheelMetrics {

    @Autowired
    private MeterRegistry meterRegistry;

    @Autowired
    private FeedbackRepository feedbackRepository;

    @Autowired
    private TrainingDataRepository trainingDataRepository;

    @Scheduled(fixedDelay = 3600_000) // 每小时
    public void reportMetrics() {
        Instant lastHour = Instant.now().minus(Duration.ofHours(1));

        // 1. 反馈收集量
        long explicitFeedbackCount = feedbackRepository
                .countByTypeAndTimestampAfter(FeedbackType.EXPLICIT, lastHour);
        long implicitSignalCount = feedbackRepository
                .countByTypeAndTimestampAfter(FeedbackType.IMPLICIT, lastHour);

        // 2. 负面反馈率（反映模型当前质量）
        long totalInteractions = feedbackRepository.countTotalInteractions(lastHour);
        long negativeFeedback = feedbackRepository
                .countByFeedbackTypeAndTimestampAfter(FeedbackType.THUMBS_DOWN, lastHour);
        double negativeRate = totalInteractions > 0 ?
                (double) negativeFeedback / totalInteractions : 0;

        // 3. 训练数据积累速度
        long newTrainingSamples = trainingDataRepository
                .countByCreatedAtAfter(lastHour);

        // 4. 数据转化率（信号 -> 可用训练样本）
        long processedFeedback = feedbackRepository.countProcessed(lastHour);
        double conversionRate = explicitFeedbackCount > 0 ?
                (double) newTrainingSamples / explicitFeedbackCount : 0;

        // 上报到监控
        meterRegistry.gauge("flywheel.feedback.explicit_hourly", explicitFeedbackCount);
        meterRegistry.gauge("flywheel.feedback.implicit_hourly", implicitSignalCount);
        meterRegistry.gauge("flywheel.quality.negative_rate", negativeRate);
        meterRegistry.gauge("flywheel.data.new_samples_hourly", newTrainingSamples);
        meterRegistry.gauge("flywheel.data.conversion_rate", conversionRate);

        log.info("数据飞轮指标: explicitFeedback={}, implicitSignal={}, " +
                        "negativeRate={:.1%}, newSamples={}, conversionRate={:.1%}",
                explicitFeedbackCount, implicitSignalCount,
                negativeRate, newTrainingSamples, conversionRate);
    }
}

飞轮启动的冷启动问题

新产品上线初期，用户少，反馈数据不够怎么办？

方法一：主动测试采集。 从现有用户中招募内测用户，设计明确的测试任务，收集有标注的测试数据。

方法二：合成数据增强。 对已有的正确样本做数据增强（图片旋转、亮度调整、文字改写），扩大训练数据规模。

方法三：从错误日志中挖掘。 即使没有显式反馈，也可以从模型输出异常（格式错误、超时重试）中挖掘问题样本。

方法四：人工种子标注。 花2-3周时间，让团队成员或外包标注员完成初始的1000条高质量标注，奠定数据质量基础。

数据飞轮不是魔法，它需要工程系统来支撑。建好管道，飞轮才能转起来。