/**
 * 基于令牌桶的限流器
 * 支持突发流量，限制平均速率
 */
public class TokenBucketLimiter {
    
    private final long capacity;        // 桶容量（允许的最大突发量）
    private final double refillRate;    // 令牌填充速率（个/秒）
    private double tokens;              // 当前令牌数
    private long lastRefillTime;        // 上次填充时间
    
    public TokenBucketLimiter(long capacity, double refillRate) {
        this.capacity = capacity;
        this.refillRate = refillRate;
        this.tokens = capacity; // 初始满桶
        this.lastRefillTime = System.nanoTime();
    }
    
    /**
     * 尝试获取令牌（非阻塞）
     * @param required 需要的令牌数
     */
    public synchronized boolean tryAcquire(int required) {
        refill();
        if (tokens >= required) {
            tokens -= required;
            return true;
        }
        return false;
    }
    
    private void refill() {
        long now = System.nanoTime();
        double elapsed = (now - lastRefillTime) / 1_000_000_000.0; // 转换为秒
        double newTokens = elapsed * refillRate;
        tokens = Math.min(capacity, tokens + newTokens);
        lastRefillTime = now;
    }
}

4.2 集群限流（Redis Lua脚本）

@Component
@Slf4j
public class ClusterRateLimiter {

    @Autowired
    private StringRedisTemplate redisTemplate;

    /**
     * 滑动窗口限流 Lua 脚本
     * 原子操作：检查 + 记录，防止并发竞争
     */
    private static final String SLIDING_WINDOW_LUA =
        "local key = KEYS[1]\n" +
        "local window_ms = tonumber(ARGV[1])\n" +
        "local limit = tonumber(ARGV[2])\n" +
        "local now = tonumber(ARGV[3])\n" +
        "local window_start = now - window_ms\n" +
        // 删除窗口外的旧记录
        "redis.call('ZREMRANGEBYSCORE', key, 0, window_start)\n" +
        // 统计当前窗口内的请求数
        "local count = redis.call('ZCARD', key)\n" +
        "if count < limit then\n" +
        "    redis.call('ZADD', key, now, now .. '-' .. math.random(1, 999999))\n" +
        "    redis.call('EXPIRE', key, math.ceil(window_ms / 1000) + 1)\n" +
        "    return 1\n" +  // 允许
        "else\n" +
        "    return 0\n" +  // 拒绝
        "end";

    /**
     * 令牌桶限流 Lua 脚本（更精确的突发控制）
     */
    private static final String TOKEN_BUCKET_LUA =
        "local key = KEYS[1]\n" +
        "local capacity = tonumber(ARGV[1])\n" +
        "local refill_rate = tonumber(ARGV[2])\n" +  // 每秒填充的令牌数
        "local now = tonumber(ARGV[3])\n" +
        "local requested = tonumber(ARGV[4])\n" +
        // 从Redis获取桶的当前状态
        "local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')\n" +
        "local tokens = tonumber(bucket[1]) or capacity\n" +
        "local last_refill = tonumber(bucket[2]) or now\n" +
        // 计算需要补充的令牌
        "local elapsed = (now - last_refill) / 1000\n" +  // 毫秒转秒
        "local new_tokens = elapsed * refill_rate\n" +
        "tokens = math.min(capacity, tokens + new_tokens)\n" +
        // 尝试消耗令牌
        "if tokens >= requested then\n" +
        "    tokens = tokens - requested\n" +
        "    redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)\n" +
        "    redis.call('EXPIRE', key, 3600)\n" +
        "    return 1\n" +
        "else\n" +
        "    redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)\n" +
        "    redis.call('EXPIRE', key, 3600)\n" +
        "    return 0\n" +
        "end";

    /**
     * 基于用户ID的API限流
     * 规则：每个用户每分钟最多调用60次
     */
    public boolean isUserAllowed(String userId, String apiPath) {
        String key = "ratelimit:user:" + userId + ":" + apiPath;
        return executeScript(key, 60000, 60); // 60秒窗口，60次限制
    }

    /**
     * 基于IP的全局限流
     * 规则：每个IP每秒最多100次
     */
    public boolean isIpAllowed(String ip) {
        String key = "ratelimit:ip:" + ip;
        return executeScript(key, 1000, 100); // 1秒窗口，100次限制
    }

    /**
     * 基于API Key的限流（开放平台）
     */
    public boolean isApiKeyAllowed(String apiKey, int limit, int windowSeconds) {
        String key = "ratelimit:apikey:" + apiKey;
        return executeScript(key, windowSeconds * 1000L, limit);
    }

    private boolean executeScript(String key, long windowMs, int limit) {
        DefaultRedisScript<Long> script = new DefaultRedisScript<>(
            SLIDING_WINDOW_LUA, Long.class);
        
        Long result = redisTemplate.execute(
            script,
            Collections.singletonList(key),
            String.valueOf(windowMs),
            String.valueOf(limit),
            String.valueOf(System.currentTimeMillis())
        );
        
        return Long.valueOf(1L).equals(result);
    }
}

4.3 三级限流AOP拦截器

/**
 * 限流注解
 */
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface RateLimit {
    int qps() default 100;              // 全局QPS限制
    int userLimit() default 10;         // 单用户每分钟限制（0=不限）
    int ipLimit() default 100;          // 单IP每分钟限制（0=不限）
    String key() default "";            // 自定义限流Key（支持SpEL）
}

@Aspect
@Component
@Slf4j
public class RateLimitAspect {

    @Autowired
    private ClusterRateLimiter clusterLimiter;
    
    // 单机本地令牌桶，key=方法签名
    private final ConcurrentHashMap<String, RateLimiter> localLimiters = 
        new ConcurrentHashMap<>();

    @Around("@annotation(rateLimit)")
    public Object around(ProceedingJoinPoint pjp, RateLimit rateLimit) throws Throwable {
        String methodKey = pjp.getSignature().toShortString();
        
        // 第三级：本地令牌桶（最快，<100ns）
        RateLimiter localLimiter = localLimiters.computeIfAbsent(
            methodKey, 
            k -> RateLimiter.create(rateLimit.qps())  // Guava令牌桶
        );
        
        if (!localLimiter.tryAcquire()) {
            log.warn("本地限流触发, method={}", methodKey);
            throw new RateLimitException("请求过于频繁，请稍后重试");
        }
        
        // 第二级：Redis集群限流（精确，约1ms）
        HttpServletRequest request = getCurrentRequest();
        if (request != null) {
            String userId = getCurrentUserId();
            String ip = getClientIp(request);
            
            // 用户维度限流
            if (rateLimit.userLimit() > 0 && userId != null) {
                if (!clusterLimiter.isUserAllowed(userId, methodKey)) {
                    log.warn("用户限流触发, userId={}, method={}", userId, methodKey);
                    throw new RateLimitException("操作过于频繁，请稍后重试");
                }
            }
            
            // IP维度限流
            if (rateLimit.ipLimit() > 0) {
                if (!clusterLimiter.isIpAllowed(ip)) {
                    log.warn("IP限流触发, ip={}", ip);
                    throw new RateLimitException("请求过于频繁");
                }
            }
        }
        
        return pjp.proceed();
    }
}

4.4 网关层全局限流（Spring Cloud Gateway）

@Component
public class GlobalRateLimitFilter implements GlobalFilter, Ordered {

    @Autowired
    private ReactiveStringRedisTemplate reactiveRedisTemplate;
    
    // IP黑名单（Redis Set，可动态更新）
    private static final String IP_BLACKLIST_KEY = "ratelimit:ip:blacklist";
    // 全局令牌桶Key
    private static final String GLOBAL_BUCKET_KEY = "ratelimit:global";
    
    private static final int GLOBAL_QPS = 20000; // 网关层整体QPS上限

    @Override
    public Mono<Void> filter(ServerWebExchange exchange, GatewayFilterChain chain) {
        ServerHttpRequest request = exchange.getRequest();
        String ip = getClientIp(request);
        
        return checkIpBlacklist(ip)
            .flatMap(isBlocked -> {
                if (isBlocked) {
                    return rejectRequest(exchange, HttpStatus.FORBIDDEN, "IP已被封禁");
                }
                return checkGlobalRateLimit();
            })
            .flatMap(isAllowed -> {
                if (!isAllowed) {
                    return rejectRequest(exchange, HttpStatus.TOO_MANY_REQUESTS, 
                        "系统繁忙，请稍后重试");
                }
                return chain.filter(exchange);
            });
    }
    
    private Mono<Boolean> checkIpBlacklist(String ip) {
        return reactiveRedisTemplate.opsForSet()
            .isMember(IP_BLACKLIST_KEY, ip)
            .defaultIfEmpty(false);
    }
    
    private Mono<Boolean> checkGlobalRateLimit() {
        // 使用令牌桶算法的Lua脚本
        // 此处简化展示，完整实现见ClusterRateLimiter
        return reactiveRedisTemplate.opsForValue()
            .increment(GLOBAL_BUCKET_KEY)
            .map(count -> count <= GLOBAL_QPS);
    }
    
    private Mono<Void> rejectRequest(
            ServerWebExchange exchange, HttpStatus status, String message) {
        ServerHttpResponse response = exchange.getResponse();
        response.setStatusCode(status);
        response.getHeaders().set("Content-Type", "application/json;charset=UTF-8");
        String body = "{\"code\":" + status.value() + ",\"message\":\"" + message + "\"}";
        DataBuffer buffer = response.bufferFactory().wrap(body.getBytes());
        return response.writeWith(Mono.just(buffer));
    }
    
    @Override
    public int getOrder() {
        return -100; // 优先级最高
    }
}

4.5 限流降级处理

@RestControllerAdvice
public class RateLimitExceptionHandler {

    /**
     * 限流异常统一处理
     * 返回标准的429响应，并设置Retry-After头
     */
    @ExceptionHandler(RateLimitException.class)
    @ResponseStatus(HttpStatus.TOO_MANY_REQUESTS)
    public ApiResponse<Void> handleRateLimit(
            RateLimitException e, HttpServletResponse response) {
        // 告知客户端多久后可以重试
        response.setHeader("Retry-After", "60");
        response.setHeader("X-RateLimit-Remaining", "0");
        return ApiResponse.fail(429, e.getMessage());
    }
}

五、扩展性设计

动态限流规则

限流阈值不能写死在代码里，需要支持运维在不重启服务的情况下调整。

方案：把限流规则存在配置中心（Nacos），应用监听规则变更事件，动态更新本地令牌桶速率。

@NacosConfigListener(dataId = "rate-limit-rules", groupId = "DEFAULT_GROUP")
public void onConfigChange(String config) {
    RateLimitConfig newConfig = JsonUtils.fromJson(config, RateLimitConfig.class);
    // 更新本地令牌桶
    newConfig.getApiRules().forEach((apiPath, qps) -> {
        RateLimiter limiter = localLimiters.get(apiPath);
        if (limiter != null) {
            limiter.setRate(qps); // Guava RateLimiter支持动态调整速率
        }
    });
    log.info("限流规则已更新: {}", newConfig);
}

自适应限流

根据系统负载自动调整限流阈值：CPU使用率 > 80% 时，限流阈值降为正常的50%；CPU使用率 > 95% 时，降为10%。

六、踩坑实录

坑1：Guava令牌桶的令牌积累突刺

系统低流量时，Guava令牌桶会积累大量令牌。限流配置是每秒100次，系统静默1分钟后，突然来了1000次请求，前100×60=6000个令牌会被瞬间消耗，实际上允许了6000次在短时间内通过，远超后端承受能力。

解决方案：在创建RateLimiter时设置warmupPeriod（预热期），限制令牌积累。或者不用令牌桶，改用漏桶（Guava的SmoothBursty和SmoothWarmingUp的区别）。

坑2：Redis限流在网络抖动时失效

Redis连接超时时，限流Lua脚本执行失败，异常被catch后默认放行（fail open策略）。结果每次Redis抖动的那几秒，限流全部失效，流量直接打到后端。

解决方案：fail open还是fail close取决于业务。对于核心保护接口，Redis不可用时应该fail close（默认拒绝），宁可误杀正常请求，也不让超限流量伤害后端。同时本地令牌桶作为兜底，即使Redis挂了，本地限流仍然生效。

坑3：滑动窗口的Redis内存泄露

滑动窗口把每次请求的时间戳存在ZSet里，每个请求占约50字节。高QPS接口每秒1万次调用，ZSet大小1万条 × 50字节 = 500KB，每个Key如此，10万个活跃用户 × 500KB = 50GB，Redis内存直接撑爆。

解决方案：对高QPS接口改用计数器法（固定窗口），内存消耗仅1个整数；对低QPS接口才用滑动窗口。

七、总结

三级限流的核心价值是层层递进、各司其职：

层级	位置	算法	延迟	维度
第一级	网关	令牌桶	<1ms	IP、全局
第二级	应用层（Redis）	滑动窗口	~1ms	用户、API Key
第三级	单机内存	令牌桶	<0.1ms	接口级别

限流系统的设计原则：尽早拒绝，代价最小；分层防御，互为兜底；fail close优于fail open（对核心保护场景）。