Building Production Microservices with Go
Practical patterns for Go microservices - project structure, error handling, graceful shutdown, health checks, and observability.
Table of Contents
Go is excellent for microservices. Here are patterns I use in production.
Project Structure #
├── cmd/
│ └── server/
│ └── main.go # Entry point
├── internal/
│ ├── config/ # Configuration
│ ├── handler/ # HTTP handlers
│ ├── service/ # Business logic
│ ├── repository/ # Data access
│ └── domain/ # Domain models
├── pkg/ # Shared packages
├── api/ # API definitions (OpenAPI, proto)
└── deployments/ # Kubernetes manifests
Configuration #
Use environment variables with sensible defaults:
type Config struct {
Port int `env:"PORT" envDefault:"8080"`
DatabaseURL string `env:"DATABASE_URL,required"`
LogLevel string `env:"LOG_LEVEL" envDefault:"info"`
Timeout time.Duration `env:"TIMEOUT" envDefault:"30s"`
}
func LoadConfig() (*Config, error) {
cfg := &Config{}
if err := env.Parse(cfg); err != nil {
return nil, fmt.Errorf("parsing config: %w", err)
}
return cfg, nil
}
Graceful Shutdown #
Handle SIGTERM properly:
func main() {
cfg, _ := config.LoadConfig()
// Initialize dependencies
db, _ := repository.NewDatabase(cfg.DatabaseURL)
defer db.Close()
svc := service.NewOrderService(db)
handler := handler.NewOrderHandler(svc)
srv := &http.Server{
Addr: fmt.Sprintf(":%d", cfg.Port),
Handler: handler.Routes(),
ReadTimeout: cfg.Timeout,
WriteTimeout: cfg.Timeout,
}
// Start server
go func() {
log.Printf("Starting server on port %d", cfg.Port)
if err := srv.ListenAndServe(); err != http.ErrServerClosed {
log.Fatalf("Server error: %v", err)
}
}()
// Wait for shutdown signal
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGTERM, syscall.SIGINT)
<-quit
log.Println("Shutting down...")
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := srv.Shutdown(ctx); err != nil {
log.Fatalf("Shutdown error: %v", err)
}
log.Println("Server stopped")
}
Health Checks #
Kubernetes needs liveness and readiness probes:
type HealthChecker struct {
db *sql.DB
}
func (h *HealthChecker) Liveness(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte("OK"))
}
func (h *HealthChecker) Readiness(w http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(r.Context(), 5*time.Second)
defer cancel()
if err := h.db.PingContext(ctx); err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
json.NewEncoder(w).Encode(map[string]string{"error": "database unavailable"})
return
}
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(map[string]string{"status": "ready"})
}
Error Handling #
Use domain errors with HTTP mapping:
// domain/errors.go
type Error struct {
Code string
Message string
Err error
}
func (e *Error) Error() string {
if e.Err != nil {
return fmt.Sprintf("%s: %v", e.Message, e.Err)
}
return e.Message
}
var (
ErrNotFound = &Error{Code: "NOT_FOUND", Message: "resource not found"}
ErrConflict = &Error{Code: "CONFLICT", Message: "resource already exists"}
ErrValidation = &Error{Code: "VALIDATION", Message: "validation failed"}
ErrUnauthorized = &Error{Code: "UNAUTHORIZED", Message: "unauthorized"}
)
// handler/middleware.go
func ErrorMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Wrap with error recovery
defer func() {
if err := recover(); err != nil {
log.Printf("panic recovered: %v", err)
respondError(w, http.StatusInternalServerError, "internal error")
}
}()
next.ServeHTTP(w, r)
})
}
func mapErrorToStatus(err error) int {
var domainErr *domain.Error
if errors.As(err, &domainErr) {
switch domainErr.Code {
case "NOT_FOUND":
return http.StatusNotFound
case "CONFLICT":
return http.StatusConflict
case "VALIDATION":
return http.StatusBadRequest
case "UNAUTHORIZED":
return http.StatusUnauthorized
}
}
return http.StatusInternalServerError
}
Structured Logging #
Use structured logging for observability:
import "log/slog"
func main() {
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slog.LevelInfo,
}))
slog.SetDefault(logger)
}
func (h *OrderHandler) GetOrder(w http.ResponseWriter, r *http.Request) {
orderID := chi.URLParam(r, "id")
slog.Info("fetching order",
slog.String("order_id", orderID),
slog.String("trace_id", r.Header.Get("X-Trace-ID")),
)
order, err := h.service.GetOrder(r.Context(), orderID)
if err != nil {
slog.Error("failed to fetch order",
slog.String("order_id", orderID),
slog.Any("error", err),
)
respondError(w, mapErrorToStatus(err), err.Error())
return
}
respondJSON(w, http.StatusOK, order)
}
Request Context #
Pass context through the entire call chain:
func (s *OrderService) CreateOrder(ctx context.Context, req CreateOrderRequest) (*Order, error) {
// Context carries deadlines, cancellation, and request-scoped values
// Check if context is already cancelled
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
// Start transaction with context
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return nil, fmt.Errorf("starting transaction: %w", err)
}
defer tx.Rollback()
// All operations use context
order, err := s.repo.CreateOrder(ctx, tx, req)
if err != nil {
return nil, err
}
if err := s.inventory.Reserve(ctx, order.Items); err != nil {
return nil, fmt.Errorf("reserving inventory: %w", err)
}
if err := tx.Commit(); err != nil {
return nil, fmt.Errorf("committing transaction: %w", err)
}
return order, nil
}
HTTP Client with Retry #
type HTTPClient struct {
client *http.Client
retries int
}
func NewHTTPClient(timeout time.Duration, retries int) *HTTPClient {
return &HTTPClient{
client: &http.Client{
Timeout: timeout,
Transport: &http.Transport{
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
},
},
retries: retries,
}
}
func (c *HTTPClient) Do(ctx context.Context, req *http.Request) (*http.Response, error) {
var resp *http.Response
var err error
for attempt := 0; attempt <= c.retries; attempt++ {
req = req.WithContext(ctx)
resp, err = c.client.Do(req)
if err == nil && resp.StatusCode < 500 {
return resp, nil
}
if attempt < c.retries {
backoff := time.Duration(attempt+1) * 100 * time.Millisecond
select {
case <-time.After(backoff):
case <-ctx.Done():
return nil, ctx.Err()
}
}
}
return resp, err
}
Testing #
Use interfaces for testability:
type OrderRepository interface {
Create(ctx context.Context, order *Order) error
GetByID(ctx context.Context, id string) (*Order, error)
}
// In tests
type mockOrderRepo struct {
orders map[string]*Order
}
func (m *mockOrderRepo) GetByID(ctx context.Context, id string) (*Order, error) {
order, ok := m.orders[id]
if !ok {
return nil, domain.ErrNotFound
}
return order, nil
}
func TestGetOrder(t *testing.T) {
repo := &mockOrderRepo{
orders: map[string]*Order{"123": {ID: "123", Total: 100}},
}
svc := NewOrderService(repo)
order, err := svc.GetOrder(context.Background(), "123")
require.NoError(t, err)
assert.Equal(t, 100, order.Total)
}
Key Takeaways #
- Structure code by domain, not by layer
- Always handle graceful shutdown
- Use context for cancellation and timeouts
- Implement proper health checks
- Log structured data for observability
- Design for testability with interfaces
These patterns help build services that are maintainable and production-ready.