第1635篇:AI应用的Feature Flag设计——灰度控制与A/B测试的工程化实现
第1635篇:AI应用的Feature Flag设计——灰度控制与A/B测试的工程化实现
Feature Flag这个概念在传统软件开发里已经很成熟了,但在AI应用里它有一些特殊的地方,值得单独聊聊。
上个月有个朋友在做一个智能客服项目,老板要求把一个新的Agent功能先给10%的用户用,观察一周再决定是否全量。他当时的实现是写了个if判断,把用户ID除以10余数为0的就走新逻辑,其他走旧逻辑。这确实能跑,但没法动态调整比例,没法做A/B分组,更没法按照用户属性做定向灰度,属于硬编码的"假灰度"。
真正的Feature Flag系统应该是怎样的?今天来聊清楚。
AI应用中Feature Flag的特殊性
普通的Feature Flag主要控制代码路径,AI应用里除了代码路径,还需要控制:
- Prompt版本:新的Prompt还没验证好,先给部分用户用
- 模型版本:从GPT-4切换到GPT-4o,先给内测用户试
- Agent行为:新增了一个工具调用能力,先灰度验证安全性
- 上下文策略:新的记忆管理策略,先观察对话质量影响
- 响应风格:简洁版 vs 详细版,做A/B测试看用户偏好
这些维度加在一起,比普通Feature Flag复杂得多,需要更灵活的设计。
核心数据模型设计
先把数据模型设计好:
@Data
@Builder
@Entity
@Table(name = "ai_feature_flags")
public class AIFeatureFlag {
@Id
private String flagKey; // 唯一标识,如 "new-prompt-v2"
private String displayName; // 显示名称
private String description; // 描述
@Enumerated(EnumType.STRING)
private FlagType flagType; // 标志类型
@Enumerated(EnumType.STRING)
private FlagStatus status; // DRAFT / ACTIVE / DISABLED / ARCHIVED
// 灰度策略
@Convert(converter = RolloutStrategyConverter.class)
private RolloutStrategy rolloutStrategy;
// 变体配置(用于A/B测试)
@Convert(converter = VariantListConverter.class)
private List<FlagVariant> variants;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
private String createdBy;
public enum FlagType {
FEATURE_TOGGLE, // 功能开关(只有开/关)
AB_TEST, // A/B测试(多个变体)
GRADUAL_ROLLOUT // 渐进式灰度(百分比)
}
public enum FlagStatus {
DRAFT, // 草稿,不生效
ACTIVE, // 生效中
DISABLED, // 已禁用(快速关闭)
ARCHIVED // 归档(Feature稳定后清理)
}
}
@Data
@Builder
public class FlagVariant {
private String variantId; // 变体ID,如 "control" / "treatment"
private String name; // 变体名称
private double weight; // 权重,所有变体权重之和应为1.0
private Map<String, Object> config; // 该变体的具体配置
}
@Data
@Builder
public class RolloutStrategy {
@Enumerated(EnumType.STRING)
private StrategyType type;
// PERCENTAGE类型:灰度百分比
private Double percentage;
// USER_LIST类型:白名单用户ID列表
private List<String> userList;
// ATTRIBUTE_BASED类型:基于用户属性的规则
private List<AttributeRule> attributeRules;
public enum StrategyType {
ALL, // 全量
NONE, // 全不放量
PERCENTAGE, // 百分比灰度
USER_LIST, // 白名单
ATTRIBUTE_BASED // 基于属性
}
}
@Data
@Builder
public class AttributeRule {
private String attribute; // 用户属性,如 "plan" / "region" / "userType"
private String operator; // EQUALS / IN / CONTAINS / GREATER_THAN
private Object value; // 匹配值
}Feature Flag评估引擎
有了数据模型,核心就是评估引擎——给定用户信息和Flag Key,返回该用户应该走哪个变体:
@Service
@Slf4j
public class FeatureFlagEvaluator {
private final FeatureFlagRepository repository;
private final UserContextProvider userContextProvider;
// 本地缓存,减少数据库查询(有效期5分钟)
private final LoadingCache<String, Optional<AIFeatureFlag>> flagCache;
public FeatureFlagEvaluator(FeatureFlagRepository repository,
UserContextProvider userContextProvider) {
this.repository = repository;
this.userContextProvider = userContextProvider;
this.flagCache = Caffeine.newBuilder()
.expireAfterWrite(5, TimeUnit.MINUTES)
.maximumSize(1000)
.build(key -> repository.findByFlagKey(key));
}
/**
* 评估Feature Flag,返回匹配的变体
*/
public EvaluationResult evaluate(String flagKey, String userId) {
return evaluate(flagKey, userId, userContextProvider.getUserAttributes(userId));
}
public EvaluationResult evaluate(String flagKey, String userId,
Map<String, Object> userAttributes) {
Optional<AIFeatureFlag> flagOpt = flagCache.get(flagKey);
if (flagOpt.isEmpty()) {
log.debug("Flag {} 不存在,返回默认关闭", flagKey);
return EvaluationResult.disabled(flagKey);
}
AIFeatureFlag flag = flagOpt.get();
if (flag.getStatus() != AIFeatureFlag.FlagStatus.ACTIVE) {
return EvaluationResult.disabled(flagKey);
}
// 评估灰度策略:该用户是否在灰度范围内
if (!isUserInRollout(flag.getRolloutStrategy(), userId, userAttributes)) {
return EvaluationResult.disabled(flagKey);
}
// 如果有多个变体,决定走哪个变体
if (flag.getFlagType() == AIFeatureFlag.FlagType.AB_TEST) {
FlagVariant variant = selectVariant(flag.getVariants(), userId);
return EvaluationResult.enabled(flagKey, variant.getVariantId(),
variant.getConfig());
}
return EvaluationResult.enabled(flagKey, "default", Map.of());
}
private boolean isUserInRollout(RolloutStrategy strategy, String userId,
Map<String, Object> userAttributes) {
if (strategy == null) return false;
return switch (strategy.getType()) {
case ALL -> true;
case NONE -> false;
case PERCENTAGE -> isUserInPercentage(userId, strategy.getPercentage());
case USER_LIST -> strategy.getUserList().contains(userId);
case ATTRIBUTE_BASED -> matchAttributeRules(strategy.getAttributeRules(), userAttributes);
};
}
private boolean isUserInPercentage(String userId, double percentage) {
// 用userId哈希值确保相同用户始终在同一组(不随机,可重复)
int hash = Math.abs(userId.hashCode() % 100);
return hash < (int)(percentage * 100);
}
private boolean matchAttributeRules(List<AttributeRule> rules,
Map<String, Object> userAttributes) {
if (rules == null || rules.isEmpty()) return false;
// 所有规则都满足才放量(AND逻辑)
return rules.stream().allMatch(rule -> matchRule(rule, userAttributes));
}
private boolean matchRule(AttributeRule rule, Map<String, Object> userAttributes) {
Object attrValue = userAttributes.get(rule.getAttribute());
if (attrValue == null) return false;
return switch (rule.getOperator()) {
case "EQUALS" -> attrValue.equals(rule.getValue());
case "IN" -> rule.getValue() instanceof List<?> list && list.contains(attrValue);
case "CONTAINS" -> attrValue.toString().contains(rule.getValue().toString());
case "GREATER_THAN" -> compareNumbers(attrValue, rule.getValue()) > 0;
case "LESS_THAN" -> compareNumbers(attrValue, rule.getValue()) < 0;
default -> false;
};
}
private FlagVariant selectVariant(List<FlagVariant> variants, String userId) {
// 用哈希值确保同一用户始终命中同一变体
int hash = Math.abs((userId + "variant_seed").hashCode() % 1000);
double point = hash / 1000.0;
double cumulative = 0.0;
for (FlagVariant variant : variants) {
cumulative += variant.getWeight();
if (point <= cumulative) {
return variant;
}
}
return variants.get(variants.size() - 1);
}
private int compareNumbers(Object a, Object b) {
double da = Double.parseDouble(a.toString());
double db = Double.parseDouble(b.toString());
return Double.compare(da, db);
}
}
@Data
@Builder
public class EvaluationResult {
private String flagKey;
private boolean enabled;
private String variantId;
private Map<String, Object> config;
public static EvaluationResult enabled(String flagKey, String variantId,
Map<String, Object> config) {
return EvaluationResult.builder()
.flagKey(flagKey)
.enabled(true)
.variantId(variantId)
.config(config != null ? config : Map.of())
.build();
}
public static EvaluationResult disabled(String flagKey) {
return EvaluationResult.builder()
.flagKey(flagKey)
.enabled(false)
.config(Map.of())
.build();
}
public <T> T getConfigValue(String key, T defaultValue) {
Object value = config.get(key);
if (value == null) return defaultValue;
try {
return (T) value;
} catch (ClassCastException e) {
return defaultValue;
}
}
}在AI服务中使用Feature Flag
有了评估引擎,在实际业务代码里怎么用?先封装一个AI专用的Flag工具类:
@Component
@Slf4j
public class AIFeatureFlagService {
private final FeatureFlagEvaluator evaluator;
public AIFeatureFlagService(FeatureFlagEvaluator evaluator) {
this.evaluator = evaluator;
}
/**
* 获取当前用户应该用哪个Prompt模板版本
*/
public String getPromptTemplateVersion(String userId) {
EvaluationResult result = evaluator.evaluate("prompt-template-version", userId);
return result.getConfigValue("version", "v1");
}
/**
* 获取当前用户应该用哪个模型
*/
public String getModelForUser(String userId, String defaultModel) {
EvaluationResult result = evaluator.evaluate("model-selection", userId);
if (!result.isEnabled()) return defaultModel;
return result.getConfigValue("model", defaultModel);
}
/**
* 判断是否对该用户启用特定Agent功能
*/
public boolean isAgentFeatureEnabled(String featureName, String userId) {
EvaluationResult result = evaluator.evaluate("agent-feature-" + featureName, userId);
return result.isEnabled();
}
/**
* 获取A/B测试变体(带自动上报)
*/
public String getABTestVariant(String experimentName, String userId) {
EvaluationResult result = evaluator.evaluate(experimentName, userId);
if (!result.isEnabled()) return "control";
String variant = result.getVariantId();
// 异步上报曝光事件
reportExposure(experimentName, userId, variant);
return variant;
}
private void reportExposure(String experiment, String userId, String variant) {
// 上报到数据分析平台,用于后续效果分析
}
}在ChatService里使用:
@Service
@Slf4j
public class SmartChatService {
private final ChatClient defaultChatClient;
private final Map<String, ChatClient> modelClients;
private final PromptTemplateRepository templateRepository;
private final AIFeatureFlagService flagService;
public String chat(String userId, String userInput, List<Message> history) {
// 根据Feature Flag动态选择模型
String modelName = flagService.getModelForUser(userId, "gpt-4");
ChatClient client = modelClients.getOrDefault(modelName, defaultChatClient);
// 根据Feature Flag选择Prompt模板版本
String templateVersion = flagService.getPromptTemplateVersion(userId);
String systemPrompt = templateRepository.getTemplate(templateVersion);
// 判断是否启用了新的RAG功能
boolean useNewRAG = flagService.isAgentFeatureEnabled("new-rag-v2", userId);
// 判断A/B测试变体(详细回答 vs 简洁回答)
String responseStyle = flagService.getABTestVariant("response-style-test", userId);
String styleInstruction = "concise".equals(responseStyle)
? "请用简洁的语言回答,不超过200字"
: "请详细回答,可以展开说明";
String fullSystemPrompt = systemPrompt + "\n\n" + styleInstruction;
log.debug("用户{} 使用模型={}, 模板={}, RAG={}, 风格={}",
userId, modelName, templateVersion, useNewRAG, responseStyle);
if (useNewRAG) {
return chatWithNewRAG(client, fullSystemPrompt, userId, userInput, history);
} else {
return chatBasic(client, fullSystemPrompt, userInput, history);
}
}
private String chatBasic(ChatClient client, String systemPrompt,
String userInput, List<Message> history) {
return client.prompt()
.system(systemPrompt)
.messages(history)
.user(userInput)
.call()
.content();
}
private String chatWithNewRAG(ChatClient client, String systemPrompt,
String userId, String userInput, List<Message> history) {
// 新版RAG逻辑
// ...
return "";
}
}实验数据收集与分析
A/B测试的核心价值在于数据,所以实验事件的收集要设计好:
@Service
@Slf4j
public class ExperimentMetricsService {
private final KafkaTemplate<String, ExperimentEvent> kafkaTemplate;
// 记录曝光事件(用户看到了哪个变体)
public void recordExposure(String userId, String experiment, String variant) {
ExperimentEvent event = ExperimentEvent.builder()
.eventType("exposure")
.userId(userId)
.experiment(experiment)
.variant(variant)
.timestamp(Instant.now())
.build();
kafkaTemplate.send("ai-experiment-events", userId, event);
}
// 记录转化事件(用户的行为结果)
public void recordConversion(String userId, String conversionType,
Map<String, Object> properties) {
ExperimentEvent event = ExperimentEvent.builder()
.eventType("conversion")
.userId(userId)
.conversionType(conversionType)
.properties(properties)
.timestamp(Instant.now())
.build();
kafkaTemplate.send("ai-experiment-events", userId, event);
}
// 记录AI特有的质量指标
public void recordAIMetrics(String userId, String experiment, String variant,
AICallMetrics metrics) {
ExperimentEvent event = ExperimentEvent.builder()
.eventType("ai_metrics")
.userId(userId)
.experiment(experiment)
.variant(variant)
.properties(Map.of(
"response_length", metrics.getResponseLength(),
"latency_ms", metrics.getLatencyMs(),
"user_rating", metrics.getUserRating(), // 如果有的话
"retry_count", metrics.getRetryCount()
))
.timestamp(Instant.now())
.build();
kafkaTemplate.send("ai-experiment-events", userId, event);
}
}运营管理界面的API设计
Feature Flag一定要有后台管理界面,不然运营同学没法操作:
@RestController
@RequestMapping("/admin/feature-flags")
@PreAuthorize("hasRole('FEATURE_FLAG_ADMIN')")
public class FeatureFlagAdminController {
private final FeatureFlagService flagService;
private final FeatureFlagEvaluator evaluator;
@GetMapping
public Page<AIFeatureFlag> listFlags(
@RequestParam(defaultValue = "0") int page,
@RequestParam(defaultValue = "20") int size) {
return flagService.findAll(PageRequest.of(page, size));
}
@PostMapping
public AIFeatureFlag createFlag(@RequestBody @Valid CreateFlagRequest request) {
return flagService.create(request);
}
@PutMapping("/{flagKey}/rollout")
public AIFeatureFlag updateRollout(
@PathVariable String flagKey,
@RequestBody RolloutStrategy strategy) {
return flagService.updateRollout(flagKey, strategy);
}
@PostMapping("/{flagKey}/enable")
public AIFeatureFlag enableFlag(@PathVariable String flagKey) {
return flagService.updateStatus(flagKey, AIFeatureFlag.FlagStatus.ACTIVE);
}
@PostMapping("/{flagKey}/disable")
public AIFeatureFlag disableFlag(@PathVariable String flagKey) {
// 快速关闭:立即生效,不等缓存过期
evaluator.invalidateCache(flagKey);
return flagService.updateStatus(flagKey, AIFeatureFlag.FlagStatus.DISABLED);
}
// 模拟特定用户会命中哪个变体(调试用)
@GetMapping("/{flagKey}/simulate")
public EvaluationResult simulate(
@PathVariable String flagKey,
@RequestParam String userId) {
return evaluator.evaluate(flagKey, userId);
}
// 查看各变体的流量分布(用于验证分流是否均匀)
@GetMapping("/{flagKey}/distribution")
public Map<String, Long> getDistribution(
@PathVariable String flagKey,
@RequestParam(defaultValue = "1000") int sampleSize) {
// 用虚假用户ID测试分布
Map<String, Long> distribution = new HashMap<>();
for (int i = 0; i < sampleSize; i++) {
EvaluationResult result = evaluator.evaluate(flagKey, "test-user-" + i);
String variant = result.isEnabled() ?
result.getVariantId() : "disabled";
distribution.merge(variant, 1L, Long::sum);
}
return distribution;
}
}一个完整的使用流程
一些实践经验
用了大半年Feature Flag系统,有几点体会:
1. 及时清理历史Flag
Feature Flag是技术债,实验结束后一定要及时归档并清除代码中的分支。我们规定每个Flag不超过30天必须有一个最终决定(全量或废弃),否则会被自动归档并告警。
2. Flag命名要有规范
我们的命名规范是:{模块}-{功能}-{版本号},比如chat-new-rag-v2、agent-tool-call-v3。命名混乱的话,两个月后你自己都不知道这个Flag是干什么的了。
3. 默认值要谨慎
当Flag服务不可用时(比如Redis挂了),评估结果应该fallback到什么?通常是"最安全的"状态,也就是新功能关闭、用稳定的旧版本。别让Flag服务的故障影响到核心功能。
4. 区分内部测试和生产灰度
内部员工和测试账号应该能够独立控制,让他们先用所有新功能,不影响正式用户的A/B实验数据。
Feature Flag是AI应用工程化的基础设施之一,做好了能让整个团队的迭代效率大幅提升。
