Claude
Skills
Sign in
โ† Back

golang-observability-opentelemetry

Included with Lifetime
$97 forever

Instrumenting Go applications with OpenTelemetry for distributed tracing, Prometheus for metrics, and structured logging with slog

toolchainobservabilitygolangopentelemetryprometheustracingmetricsloggingslog

What this skill does


# Go Observability with OpenTelemetry

## Overview

Modern Go applications require comprehensive observability through the three pillars: traces, metrics, and logs. OpenTelemetry provides vendor-neutral instrumentation for distributed tracing, Prometheus offers powerful metrics collection, and Go's slog package (1.21+) delivers structured logging with minimal overhead.

**Key Features:**
- ๐Ÿ” **OpenTelemetry**: Distributed tracing with context propagation
- ๐Ÿ“Š **Prometheus**: Metrics collection with /metrics endpoint
- ๐Ÿ“ **Structured Logging**: slog with JSON formatting and correlation IDs
- ๐ŸŽฏ **Auto-Instrumentation**: HTTP/gRPC middleware patterns
- ๐Ÿ’š **Health Checks**: Kubernetes-ready readiness/liveness probes
- ๐Ÿ”„ **Graceful Shutdown**: Clean exporter shutdown and signal handling

## When to Use This Skill

Activate this skill when:
- Instrumenting microservices for production observability
- Setting up distributed tracing across service boundaries
- Creating operational dashboards with Prometheus/Grafana
- Debugging production performance issues or bottlenecks
- Implementing SLOs and monitoring SLIs
- Adding observability to existing Go applications
- Correlating logs, traces, and metrics for debugging

## Core Observability Principles

### The Three Pillars

1. **Traces**: Understand request flow across distributed systems
2. **Metrics**: Measure system behavior and performance over time
3. **Logs**: Record discrete events for debugging and audit

### Correlation Strategy

All three pillars must share common identifiers:
- **Trace ID**: Links all operations in a request
- **Span ID**: Identifies specific operation within trace
- **Request ID**: Correlates logs with traces and metrics

## OpenTelemetry Integration

### Installation

```bash
go get go.opentelemetry.io/otel
go get go.opentelemetry.io/otel/sdk
go get go.opentelemetry.io/otel/exporters/jaeger
go get go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp
```

### Basic Setup

```go
package main

import (
    "context"
    "log"

    "go.opentelemetry.io/otel"
    "go.opentelemetry.io/otel/exporters/jaeger"
    "go.opentelemetry.io/otel/sdk/resource"
    sdktrace "go.opentelemetry.io/otel/sdk/trace"
    semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
)

func initTracer(serviceName string) (*sdktrace.TracerProvider, error) {
    // Create Jaeger exporter
    exporter, err := jaeger.New(jaeger.WithCollectorEndpoint(
        jaeger.WithEndpoint("http://localhost:14268/api/traces"),
    ))
    if err != nil {
        return nil, err
    }

    // Create resource with service name
    res, err := resource.Merge(
        resource.Default(),
        resource.NewWithAttributes(
            semconv.SchemaURL,
            semconv.ServiceName(serviceName),
            semconv.ServiceVersion("1.0.0"),
        ),
    )
    if err != nil {
        return nil, err
    }

    // Create tracer provider
    tp := sdktrace.NewTracerProvider(
        sdktrace.WithBatcher(exporter),
        sdktrace.WithResource(res),
        sdktrace.WithSampler(sdktrace.AlwaysSample()), // Use probability sampler in production
    )

    otel.SetTracerProvider(tp)
    return tp, nil
}

func main() {
    tp, err := initTracer("order-service")
    if err != nil {
        log.Fatal(err)
    }
    defer func() {
        if err := tp.Shutdown(context.Background()); err != nil {
            log.Printf("Error shutting down tracer: %v", err)
        }
    }()

    // Application code...
}
```

### Creating Spans

```go
import (
    "context"

    "go.opentelemetry.io/otel"
    "go.opentelemetry.io/otel/attribute"
    "go.opentelemetry.io/otel/codes"
    "go.opentelemetry.io/otel/trace"
)

func ProcessOrder(ctx context.Context, order Order) error {
    tracer := otel.Tracer("order-service")
    ctx, span := tracer.Start(ctx, "ProcessOrder")
    defer span.End()

    // Add attributes
    span.SetAttributes(
        attribute.String("order.id", order.ID),
        attribute.Int("order.items", len(order.Items)),
        attribute.Float64("order.total", order.Total),
    )

    // Validate order (creates child span)
    if err := validateOrder(ctx, order); err != nil {
        span.RecordError(err)
        span.SetStatus(codes.Error, "validation failed")
        return err
    }

    // Fulfill order
    if err := fulfillOrder(ctx, order); err != nil {
        span.RecordError(err)
        span.SetStatus(codes.Error, "fulfillment failed")
        return err
    }

    span.SetStatus(codes.Ok, "order processed successfully")
    return nil
}

func validateOrder(ctx context.Context, order Order) error {
    _, span := otel.Tracer("order-service").Start(ctx, "validateOrder")
    defer span.End()

    // Validation logic...
    return nil
}
```

### HTTP Middleware Instrumentation

```go
import (
    "net/http"

    "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
)

func main() {
    // Wrap handler with automatic tracing
    handler := http.HandlerFunc(orderHandler)
    wrappedHandler := otelhttp.NewHandler(handler, "order-handler")

    http.Handle("/orders", wrappedHandler)
    http.ListenAndServe(":8080", nil)
}

// Manual instrumentation for more control
func orderHandler(w http.ResponseWriter, r *http.Request) {
    ctx := r.Context()
    tracer := otel.Tracer("order-service")

    ctx, span := tracer.Start(ctx, "orderHandler")
    defer span.End()

    // Extract order ID from request
    orderID := r.URL.Query().Get("id")
    span.SetAttributes(attribute.String("order.id", orderID))

    // Process order with propagated context
    order, err := fetchOrder(ctx, orderID)
    if err != nil {
        span.RecordError(err)
        http.Error(w, "Order not found", http.StatusNotFound)
        return
    }

    // ... handle response
}
```

## Prometheus Metrics

### Installation

```bash
go get github.com/prometheus/client_golang/prometheus
go get github.com/prometheus/client_golang/prometheus/promhttp
```

### Metric Types and Patterns

```go
package metrics

import (
    "github.com/prometheus/client_golang/prometheus"
    "github.com/prometheus/client_golang/prometheus/promauto"
)

var (
    // Counter: Monotonically increasing value
    httpRequestsTotal = promauto.NewCounterVec(
        prometheus.CounterOpts{
            Name: "http_requests_total",
            Help: "Total number of HTTP requests",
        },
        []string{"method", "path", "status"},
    )

    // Gauge: Value that can go up or down
    activeConnections = promauto.NewGauge(
        prometheus.GaugeOpts{
            Name: "active_connections",
            Help: "Number of active connections",
        },
    )

    // Histogram: Observations bucketed by value
    httpRequestDuration = promauto.NewHistogramVec(
        prometheus.HistogramOpts{
            Name:    "http_request_duration_seconds",
            Help:    "HTTP request duration in seconds",
            Buckets: prometheus.DefBuckets, // [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
        },
        []string{"method", "path"},
    )

    // Summary: Similar to histogram but calculates quantiles
    dbQueryDuration = promauto.NewSummaryVec(
        prometheus.SummaryOpts{
            Name:       "db_query_duration_seconds",
            Help:       "Database query duration",
            Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
        },
        []string{"query_type"},
    )
)
```

### Metrics Middleware

```go
import (
    "net/http"
    "strconv"
    "time"

    "github.com/prometheus/client_golang/prometheus/promhttp"
)

// Metrics middleware that instruments all HTTP handlers
func MetricsMiddleware(next http.Handler) http.Handler {
    return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
        start := time.Now()

        // Track active connections
        activeConnections.Inc()
        defer activeConnections.Dec()

        // Wrap response writer to capture status code
        rw := &responseWriter{

Related in toolchain