Implémentation complète de la stack d'observabilité pour le monitoring de la plateforme multi-tenant Classeo. ## Error Tracking (GlitchTip) - Intégration Sentry SDK avec GlitchTip auto-hébergé - Scrubber PII avant envoi (RGPD: emails, tokens JWT, NIR français) - Contexte enrichi: tenant_id, user_id, correlation_id - Configuration backend (sentry.yaml) et frontend (sentry.ts) ## Metrics (Prometheus) - Endpoint /metrics avec restriction IP en production - Métriques HTTP: requests_total, request_duration_seconds (histogramme) - Métriques sécurité: login_failures_total par tenant - Métriques santé: health_check_status (postgres, redis, rabbitmq) - Storage Redis pour persistance entre requêtes ## Logs (Loki) - Processors Monolog: CorrelationIdLogProcessor, PiiScrubberLogProcessor - Détection PII: emails, téléphones FR, tokens JWT, NIR français - Labels structurés: tenant_id, correlation_id, level ## Dashboards (Grafana) - Dashboard principal: latence P50/P95/P99, error rate, RPS - Dashboard par tenant: métriques isolées par sous-domaine - Dashboard infrastructure: santé postgres/redis/rabbitmq - Datasources avec UIDs fixes pour portabilité ## Alertes (Alertmanager) - HighApiLatencyP95/P99: SLA monitoring (200ms/500ms) - HighErrorRate: error rate > 1% pendant 2 min - ExcessiveLoginFailures: détection brute force - ApplicationUnhealthy: health check failures ## Infrastructure - InfrastructureHealthChecker: service partagé (DRY) - HealthCheckController: endpoint /health pour load balancers - Pre-push hook: make ci && make e2e avant push
73 lines
2.0 KiB
YAML
73 lines
2.0 KiB
YAML
# Promtail Configuration for Classeo
|
|
# Collects logs from Docker containers and ships to Loki
|
|
|
|
server:
|
|
http_listen_port: 9080
|
|
grpc_listen_port: 0
|
|
|
|
positions:
|
|
filename: /tmp/positions.yaml
|
|
|
|
clients:
|
|
- url: http://loki:3100/loki/api/v1/push
|
|
|
|
scrape_configs:
|
|
# Docker container logs via Docker socket
|
|
- job_name: docker
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
refresh_interval: 5s
|
|
relabel_configs:
|
|
# Only scrape classeo containers
|
|
- source_labels: ['__meta_docker_container_name']
|
|
regex: '/classeo_.*'
|
|
action: keep
|
|
# Extract container name as label
|
|
- source_labels: ['__meta_docker_container_name']
|
|
regex: '/classeo_(.*)'
|
|
target_label: service
|
|
# Add environment label
|
|
- source_labels: []
|
|
target_label: environment
|
|
replacement: ${ENVIRONMENT:-development}
|
|
# Add project label
|
|
- source_labels: []
|
|
target_label: project
|
|
replacement: classeo
|
|
|
|
pipeline_stages:
|
|
# Parse JSON logs from PHP backend
|
|
- json:
|
|
expressions:
|
|
level: level
|
|
message: message
|
|
channel: channel
|
|
correlation_id: extra.correlation_id
|
|
tenant_id: extra.tenant_id
|
|
user_id: context.user_id
|
|
timestamp: datetime
|
|
source: log
|
|
# Extract labels from parsed JSON
|
|
- labels:
|
|
level:
|
|
channel:
|
|
correlation_id:
|
|
tenant_id:
|
|
# Set timestamp from log entry
|
|
- timestamp:
|
|
source: timestamp
|
|
format: "2006-01-02T15:04:05.000000Z07:00"
|
|
fallback_formats:
|
|
- "2006-01-02T15:04:05Z07:00"
|
|
- RFC3339
|
|
# Filter out health check noise
|
|
- match:
|
|
selector: '{service="php"}'
|
|
stages:
|
|
- drop:
|
|
expression: '.*GET /health.*'
|
|
drop_counter_reason: health_check_noise
|
|
- drop:
|
|
expression: '.*GET /metrics.*'
|
|
drop_counter_reason: metrics_endpoint_noise
|