Implementing Distributed Rate Limiting
How to implement rate limiting in distributed systems - token bucket, sliding window, and Redis-based solutions.
Table of Contents
Rate limiting protects your services from abuse and ensures fair resource usage. In distributed systems, this gets tricky.
The Challenge #
In a single server, rate limiting is straightforward - keep a counter in memory. With multiple servers, you need shared state.
Client → Load Balancer → Server 1 (counter: 5)
→ Server 2 (counter: 3)
→ Server 3 (counter: 4)
# Client made 12 requests but each server thinks they're under limit
Algorithms #
Token Bucket #
Tokens are added at a fixed rate. Each request consumes a token.
type TokenBucket struct {
rate float64 // tokens per second
capacity int64 // max tokens
tokens float64
lastRefill time.Time
mu sync.Mutex
}
func (b *TokenBucket) Allow() bool {
b.mu.Lock()
defer b.mu.Unlock()
now := time.Now()
elapsed := now.Sub(b.lastRefill).Seconds()
// Add tokens based on elapsed time
b.tokens = min(float64(b.capacity), b.tokens + elapsed * b.rate)
b.lastRefill = now
if b.tokens >= 1 {
b.tokens--
return true
}
return false
}
Pros: Allows bursts up to capacity Cons: Memory per client
Sliding Window Log #
Track timestamps of each request in a window.
type SlidingWindowLog struct {
windowSize time.Duration
limit int
timestamps []time.Time
mu sync.Mutex
}
func (s *SlidingWindowLog) Allow() bool {
s.mu.Lock()
defer s.mu.Unlock()
now := time.Now()
windowStart := now.Add(-s.windowSize)
// Remove old timestamps
valid := make([]time.Time, 0)
for _, ts := range s.timestamps {
if ts.After(windowStart) {
valid = append(valid, ts)
}
}
s.timestamps = valid
if len(s.timestamps) < s.limit {
s.timestamps = append(s.timestamps, now)
return true
}
return false
}
Pros: Precise Cons: Memory scales with request rate
Sliding Window Counter #
Approximate using fixed windows with interpolation.
type SlidingWindowCounter struct {
windowSize time.Duration
limit int
prevCount int
currCount int
currWindow time.Time
mu sync.Mutex
}
func (s *SlidingWindowCounter) Allow() bool {
s.mu.Lock()
defer s.mu.Unlock()
now := time.Now()
currWindowStart := now.Truncate(s.windowSize)
// New window?
if currWindowStart.After(s.currWindow) {
s.prevCount = s.currCount
s.currCount = 0
s.currWindow = currWindowStart
}
// Calculate weighted count
elapsed := now.Sub(currWindowStart).Seconds()
windowSeconds := s.windowSize.Seconds()
weight := elapsed / windowSeconds
estimate := float64(s.prevCount) * (1 - weight) + float64(s.currCount)
if int(estimate) < s.limit {
s.currCount++
return true
}
return false
}
Pros: Memory efficient, smooth Cons: Approximate
Redis Implementation #
For distributed systems, use Redis:
Token Bucket with Redis #
const tokenBucketScript = `
local key = KEYS[1]
local rate = tonumber(ARGV[1])
local capacity = tonumber(ARGV[2])
local now = tonumber(ARGV[3])
local requested = tonumber(ARGV[4])
local data = redis.call("HMGET", key, "tokens", "last_refill")
local tokens = tonumber(data[1]) or capacity
local last_refill = tonumber(data[2]) or now
-- Calculate new tokens
local elapsed = now - last_refill
local new_tokens = math.min(capacity, tokens + elapsed * rate)
local allowed = 0
if new_tokens >= requested then
new_tokens = new_tokens - requested
allowed = 1
end
redis.call("HMSET", key, "tokens", new_tokens, "last_refill", now)
redis.call("EXPIRE", key, math.ceil(capacity / rate) * 2)
return allowed
`
type RedisRateLimiter struct {
client *redis.Client
script *redis.Script
rate float64
capacity int64
}
func (r *RedisRateLimiter) Allow(ctx context.Context, key string) (bool, error) {
now := float64(time.Now().UnixNano()) / 1e9
result, err := r.script.Run(ctx, r.client, []string{key},
r.rate, r.capacity, now, 1).Int()
if err != nil {
return false, err
}
return result == 1, nil
}
Sliding Window with Redis #
const slidingWindowScript = `
local key = KEYS[1]
local window_size = tonumber(ARGV[1])
local limit = tonumber(ARGV[2])
local now = tonumber(ARGV[3])
local window_start = now - window_size
-- Remove old entries
redis.call("ZREMRANGEBYSCORE", key, 0, window_start)
-- Count current entries
local count = redis.call("ZCARD", key)
if count < limit then
redis.call("ZADD", key, now, now .. "-" .. math.random())
redis.call("EXPIRE", key, window_size)
return 1
end
return 0
`
func (r *RedisSlidingWindow) Allow(ctx context.Context, key string) (bool, error) {
now := time.Now().Unix()
windowSize := int64(r.windowSize.Seconds())
result, err := r.script.Run(ctx, r.client, []string{key},
windowSize, r.limit, now).Int()
if err != nil {
return false, err
}
return result == 1, nil
}
HTTP Middleware #
func RateLimitMiddleware(limiter *RedisRateLimiter) func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Use IP or API key as identifier
key := getClientIdentifier(r)
allowed, err := limiter.Allow(r.Context(), "ratelimit:"+key)
if err != nil {
// Fail open or closed based on your needs
log.Printf("rate limiter error: %v", err)
next.ServeHTTP(w, r)
return
}
if !allowed {
w.Header().Set("Retry-After", "60")
w.Header().Set("X-RateLimit-Limit", strconv.Itoa(limiter.limit))
w.Header().Set("X-RateLimit-Remaining", "0")
http.Error(w, "Too Many Requests", http.StatusTooManyRequests)
return
}
next.ServeHTTP(w, r)
})
}
}
func getClientIdentifier(r *http.Request) string {
// Prefer API key if available
if apiKey := r.Header.Get("X-API-Key"); apiKey != "" {
return "apikey:" + apiKey
}
// Fall back to IP
ip := r.Header.Get("X-Forwarded-For")
if ip == "" {
ip = r.RemoteAddr
}
return "ip:" + ip
}
Multi-Tier Rate Limiting #
Different limits for different scopes:
type MultiTierLimiter struct {
global *RedisRateLimiter // 10000/min for all
perUser *RedisRateLimiter // 100/min per user
perIP *RedisRateLimiter // 1000/min per IP
}
func (m *MultiTierLimiter) Allow(ctx context.Context, userID, ip string) (bool, error) {
// Check global limit first
if allowed, _ := m.global.Allow(ctx, "global"); !allowed {
return false, nil
}
// Check user limit
if userID != "" {
if allowed, _ := m.perUser.Allow(ctx, "user:"+userID); !allowed {
return false, nil
}
}
// Check IP limit
if allowed, _ := m.perIP.Allow(ctx, "ip:"+ip); !allowed {
return false, nil
}
return true, nil
}
Handling Redis Failures #
Don’t let rate limiter failures break your service:
func (r *RedisRateLimiter) AllowWithFallback(ctx context.Context, key string) bool {
allowed, err := r.Allow(ctx, key)
if err != nil {
// Log error
log.Printf("rate limiter error: %v", err)
// Fail open: allow request
// Fail closed: deny request
return r.failOpen
}
return allowed
}
Key Takeaways #
- Token bucket is great for APIs with burst allowance
- Sliding window is more precise but uses more memory
- Use Lua scripts for atomic Redis operations
- Implement multi-tier limits for different scopes
- Decide on fail-open vs fail-closed behavior
- Always return rate limit headers to clients
Proper rate limiting protects your services and improves reliability for all users.