Implementing Distributed Rate Limiting

Table of Contents

Rate limiting protects your services from abuse and ensures fair resource usage. In distributed systems, this gets tricky.

The Challenge #

In a single server, rate limiting is straightforward - keep a counter in memory. With multiple servers, you need shared state.

Client → Load Balancer → Server 1 (counter: 5)
                       → Server 2 (counter: 3)
                       → Server 3 (counter: 4)

# Client made 12 requests but each server thinks they're under limit

Algorithms #

Token Bucket #

Tokens are added at a fixed rate. Each request consumes a token.

type TokenBucket struct {
    rate       float64   // tokens per second
    capacity   int64     // max tokens
    tokens     float64
    lastRefill time.Time
    mu         sync.Mutex
}

func (b *TokenBucket) Allow() bool {
    b.mu.Lock()
    defer b.mu.Unlock()
    
    now := time.Now()
    elapsed := now.Sub(b.lastRefill).Seconds()
    
    // Add tokens based on elapsed time
    b.tokens = min(float64(b.capacity), b.tokens + elapsed * b.rate)
    b.lastRefill = now
    
    if b.tokens >= 1 {
        b.tokens--
        return true
    }
    return false
}

Pros: Allows bursts up to capacity Cons: Memory per client

Sliding Window Log #

Track timestamps of each request in a window.

type SlidingWindowLog struct {
    windowSize time.Duration
    limit      int
    timestamps []time.Time
    mu         sync.Mutex
}

func (s *SlidingWindowLog) Allow() bool {
    s.mu.Lock()
    defer s.mu.Unlock()
    
    now := time.Now()
    windowStart := now.Add(-s.windowSize)
    
    // Remove old timestamps
    valid := make([]time.Time, 0)
    for _, ts := range s.timestamps {
        if ts.After(windowStart) {
            valid = append(valid, ts)
        }
    }
    s.timestamps = valid
    
    if len(s.timestamps) < s.limit {
        s.timestamps = append(s.timestamps, now)
        return true
    }
    return false
}

Pros: Precise Cons: Memory scales with request rate

Sliding Window Counter #

Approximate using fixed windows with interpolation.

type SlidingWindowCounter struct {
    windowSize time.Duration
    limit      int
    prevCount  int
    currCount  int
    currWindow time.Time
    mu         sync.Mutex
}

func (s *SlidingWindowCounter) Allow() bool {
    s.mu.Lock()
    defer s.mu.Unlock()
    
    now := time.Now()
    currWindowStart := now.Truncate(s.windowSize)
    
    // New window?
    if currWindowStart.After(s.currWindow) {
        s.prevCount = s.currCount
        s.currCount = 0
        s.currWindow = currWindowStart
    }
    
    // Calculate weighted count
    elapsed := now.Sub(currWindowStart).Seconds()
    windowSeconds := s.windowSize.Seconds()
    weight := elapsed / windowSeconds
    
    estimate := float64(s.prevCount) * (1 - weight) + float64(s.currCount)
    
    if int(estimate) < s.limit {
        s.currCount++
        return true
    }
    return false
}

Pros: Memory efficient, smooth Cons: Approximate

Redis Implementation #

For distributed systems, use Redis:

Token Bucket with Redis #

const tokenBucketScript = `
local key = KEYS[1]
local rate = tonumber(ARGV[1])
local capacity = tonumber(ARGV[2])
local now = tonumber(ARGV[3])
local requested = tonumber(ARGV[4])

local data = redis.call("HMGET", key, "tokens", "last_refill")
local tokens = tonumber(data[1]) or capacity
local last_refill = tonumber(data[2]) or now

-- Calculate new tokens
local elapsed = now - last_refill
local new_tokens = math.min(capacity, tokens + elapsed * rate)

local allowed = 0
if new_tokens >= requested then
    new_tokens = new_tokens - requested
    allowed = 1
end

redis.call("HMSET", key, "tokens", new_tokens, "last_refill", now)
redis.call("EXPIRE", key, math.ceil(capacity / rate) * 2)

return allowed
`

type RedisRateLimiter struct {
    client   *redis.Client
    script   *redis.Script
    rate     float64
    capacity int64
}

func (r *RedisRateLimiter) Allow(ctx context.Context, key string) (bool, error) {
    now := float64(time.Now().UnixNano()) / 1e9
    
    result, err := r.script.Run(ctx, r.client, []string{key},
        r.rate, r.capacity, now, 1).Int()
    if err != nil {
        return false, err
    }
    
    return result == 1, nil
}

Sliding Window with Redis #

const slidingWindowScript = `
local key = KEYS[1]
local window_size = tonumber(ARGV[1])
local limit = tonumber(ARGV[2])
local now = tonumber(ARGV[3])

local window_start = now - window_size

-- Remove old entries
redis.call("ZREMRANGEBYSCORE", key, 0, window_start)

-- Count current entries
local count = redis.call("ZCARD", key)

if count < limit then
    redis.call("ZADD", key, now, now .. "-" .. math.random())
    redis.call("EXPIRE", key, window_size)
    return 1
end

return 0
`

func (r *RedisSlidingWindow) Allow(ctx context.Context, key string) (bool, error) {
    now := time.Now().Unix()
    windowSize := int64(r.windowSize.Seconds())
    
    result, err := r.script.Run(ctx, r.client, []string{key},
        windowSize, r.limit, now).Int()
    if err != nil {
        return false, err
    }
    
    return result == 1, nil
}

HTTP Middleware #

func RateLimitMiddleware(limiter *RedisRateLimiter) func(http.Handler) http.Handler {
    return func(next http.Handler) http.Handler {
        return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
            // Use IP or API key as identifier
            key := getClientIdentifier(r)
            
            allowed, err := limiter.Allow(r.Context(), "ratelimit:"+key)
            if err != nil {
                // Fail open or closed based on your needs
                log.Printf("rate limiter error: %v", err)
                next.ServeHTTP(w, r)
                return
            }
            
            if !allowed {
                w.Header().Set("Retry-After", "60")
                w.Header().Set("X-RateLimit-Limit", strconv.Itoa(limiter.limit))
                w.Header().Set("X-RateLimit-Remaining", "0")
                http.Error(w, "Too Many Requests", http.StatusTooManyRequests)
                return
            }
            
            next.ServeHTTP(w, r)
        })
    }
}

func getClientIdentifier(r *http.Request) string {
    // Prefer API key if available
    if apiKey := r.Header.Get("X-API-Key"); apiKey != "" {
        return "apikey:" + apiKey
    }
    
    // Fall back to IP
    ip := r.Header.Get("X-Forwarded-For")
    if ip == "" {
        ip = r.RemoteAddr
    }
    return "ip:" + ip
}

Multi-Tier Rate Limiting #

Different limits for different scopes:

type MultiTierLimiter struct {
    global   *RedisRateLimiter  // 10000/min for all
    perUser  *RedisRateLimiter  // 100/min per user
    perIP    *RedisRateLimiter  // 1000/min per IP
}

func (m *MultiTierLimiter) Allow(ctx context.Context, userID, ip string) (bool, error) {
    // Check global limit first
    if allowed, _ := m.global.Allow(ctx, "global"); !allowed {
        return false, nil
    }
    
    // Check user limit
    if userID != "" {
        if allowed, _ := m.perUser.Allow(ctx, "user:"+userID); !allowed {
            return false, nil
        }
    }
    
    // Check IP limit
    if allowed, _ := m.perIP.Allow(ctx, "ip:"+ip); !allowed {
        return false, nil
    }
    
    return true, nil
}

Handling Redis Failures #

Don’t let rate limiter failures break your service:

func (r *RedisRateLimiter) AllowWithFallback(ctx context.Context, key string) bool {
    allowed, err := r.Allow(ctx, key)
    if err != nil {
        // Log error
        log.Printf("rate limiter error: %v", err)
        
        // Fail open: allow request
        // Fail closed: deny request
        return r.failOpen
    }
    return allowed
}

Key Takeaways #

Token bucket is great for APIs with burst allowance
Sliding window is more precise but uses more memory
Use Lua scripts for atomic Redis operations
Implement multi-tier limits for different scopes
Decide on fail-open vs fail-closed behavior
Always return rate limit headers to clients

Proper rate limiting protects your services and improves reliability for all users.