# ============================================================================= # MONITORING & OBSERVABILITY SERVICES # ============================================================================= # Usage: docker compose -f compose.yaml -f compose.monitoring.yaml up -d # ============================================================================= services: # ============================================================================= # ERROR TRACKING - GlitchTip (Sentry-compatible) # ============================================================================= glitchtip: image: glitchtip/glitchtip:v4.1 container_name: classeo_glitchtip depends_on: glitchtip-db: condition: service_healthy glitchtip-redis: condition: service_healthy environment: DATABASE_URL: postgresql://glitchtip:glitchtip@glitchtip-db:5432/glitchtip SECRET_KEY: ${GLITCHTIP_SECRET_KEY:-change_me_in_production_very_secret_key} REDIS_URL: redis://glitchtip-redis:6379/0 GLITCHTIP_DOMAIN: ${GLITCHTIP_DOMAIN:-http://localhost:8081} DEFAULT_FROM_EMAIL: ${DEFAULT_FROM_EMAIL:-glitchtip@classeo.local} EMAIL_URL: ${EMAIL_URL:-smtp://mailpit:1025} CELERY_WORKER_AUTOSCALE: "1,3" CELERY_WORKER_MAX_TASKS_PER_CHILD: "10000" ENABLE_ORGANIZATION_CREATION: "true" ENABLE_USER_REGISTRATION: "true" ports: - "8081:8080" healthcheck: test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/_health/')\""] interval: 30s timeout: 10s retries: 5 start_period: 60s restart: unless-stopped glitchtip-worker: image: glitchtip/glitchtip:v4.1 container_name: classeo_glitchtip_worker depends_on: glitchtip-db: condition: service_healthy glitchtip-redis: condition: service_healthy environment: DATABASE_URL: postgresql://glitchtip:glitchtip@glitchtip-db:5432/glitchtip SECRET_KEY: ${GLITCHTIP_SECRET_KEY:-change_me_in_production_very_secret_key} REDIS_URL: redis://glitchtip-redis:6379/0 command: ./bin/run-celery-with-beat.sh restart: unless-stopped glitchtip-db: image: postgres:18.1-alpine container_name: classeo_glitchtip_db environment: POSTGRES_DB: glitchtip POSTGRES_USER: glitchtip POSTGRES_PASSWORD: glitchtip volumes: - glitchtip_postgres_data:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready -U glitchtip -d glitchtip"] interval: 10s timeout: 5s retries: 5 start_period: 10s restart: unless-stopped glitchtip-redis: image: redis:7.4-alpine container_name: classeo_glitchtip_redis command: redis-server --appendonly yes --maxmemory 128mb --maxmemory-policy allkeys-lru volumes: - glitchtip_redis_data:/data healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 10s timeout: 5s retries: 5 start_period: 5s restart: unless-stopped # ============================================================================= # METRICS - Prometheus # ============================================================================= prometheus: image: prom/prometheus:v3.2.0 container_name: classeo_prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=15d' - '--web.enable-lifecycle' volumes: - ./monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - ./monitoring/prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro - prometheus_data:/prometheus ports: - "9090:9090" healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:9090/-/healthy"] interval: 10s timeout: 5s retries: 5 start_period: 10s restart: unless-stopped # ============================================================================= # DASHBOARDS - Grafana # ============================================================================= grafana: image: grafana/grafana:11.4.0 container_name: classeo_grafana environment: GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin} GF_USERS_ALLOW_SIGN_UP: "false" GF_SERVER_ROOT_URL: ${GRAFANA_ROOT_URL:-http://localhost:3001} volumes: - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro - grafana_data:/var/lib/grafana ports: - "3001:3000" depends_on: prometheus: condition: service_healthy loki: condition: service_healthy healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/api/health"] interval: 10s timeout: 5s retries: 5 start_period: 30s restart: unless-stopped # ============================================================================= # LOGS - Loki # ============================================================================= loki: image: grafana/loki:3.3.2 container_name: classeo_loki command: -config.file=/etc/loki/config.yml volumes: - ./monitoring/loki/config.yml:/etc/loki/config.yml:ro - loki_data:/loki ports: - "3100:3100" healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:3100/ready"] interval: 10s timeout: 5s retries: 5 start_period: 30s restart: unless-stopped # ============================================================================= # LOG COLLECTOR - Promtail # ============================================================================= promtail: image: grafana/promtail:3.3.2 container_name: classeo_promtail command: -config.file=/etc/promtail/config.yml volumes: - ./monitoring/promtail/config.yml:/etc/promtail/config.yml:ro - /var/run/docker.sock:/var/run/docker.sock:ro depends_on: loki: condition: service_healthy restart: unless-stopped # ============================================================================= # ALERTING - Alertmanager # ============================================================================= alertmanager: image: prom/alertmanager:v0.28.0 container_name: classeo_alertmanager command: - '--config.file=/etc/alertmanager/alertmanager.yml' - '--storage.path=/alertmanager' volumes: - ./monitoring/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro - alertmanager_data:/alertmanager ports: - "9093:9093" healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:9093/-/healthy"] interval: 10s timeout: 5s retries: 5 start_period: 10s restart: unless-stopped # ============================================================================= # VOLUMES PERSISTANTS MONITORING # ============================================================================= volumes: glitchtip_postgres_data: glitchtip_redis_data: prometheus_data: grafana_data: loki_data: alertmanager_data: