feat: Observabilité et monitoring complet
Implémentation complète de la stack d'observabilité pour le monitoring de la plateforme multi-tenant Classeo. ## Error Tracking (GlitchTip) - Intégration Sentry SDK avec GlitchTip auto-hébergé - Scrubber PII avant envoi (RGPD: emails, tokens JWT, NIR français) - Contexte enrichi: tenant_id, user_id, correlation_id - Configuration backend (sentry.yaml) et frontend (sentry.ts) ## Metrics (Prometheus) - Endpoint /metrics avec restriction IP en production - Métriques HTTP: requests_total, request_duration_seconds (histogramme) - Métriques sécurité: login_failures_total par tenant - Métriques santé: health_check_status (postgres, redis, rabbitmq) - Storage Redis pour persistance entre requêtes ## Logs (Loki) - Processors Monolog: CorrelationIdLogProcessor, PiiScrubberLogProcessor - Détection PII: emails, téléphones FR, tokens JWT, NIR français - Labels structurés: tenant_id, correlation_id, level ## Dashboards (Grafana) - Dashboard principal: latence P50/P95/P99, error rate, RPS - Dashboard par tenant: métriques isolées par sous-domaine - Dashboard infrastructure: santé postgres/redis/rabbitmq - Datasources avec UIDs fixes pour portabilité ## Alertes (Alertmanager) - HighApiLatencyP95/P99: SLA monitoring (200ms/500ms) - HighErrorRate: error rate > 1% pendant 2 min - ExcessiveLoginFailures: détection brute force - ApplicationUnhealthy: health check failures ## Infrastructure - InfrastructureHealthChecker: service partagé (DRY) - HealthCheckController: endpoint /health pour load balancers - Pre-push hook: make ci && make e2e avant push
This commit is contained in:
72
monitoring/promtail/config.yml
Normal file
72
monitoring/promtail/config.yml
Normal file
@@ -0,0 +1,72 @@
|
||||
# Promtail Configuration for Classeo
|
||||
# Collects logs from Docker containers and ships to Loki
|
||||
|
||||
server:
|
||||
http_listen_port: 9080
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /tmp/positions.yaml
|
||||
|
||||
clients:
|
||||
- url: http://loki:3100/loki/api/v1/push
|
||||
|
||||
scrape_configs:
|
||||
# Docker container logs via Docker socket
|
||||
- job_name: docker
|
||||
docker_sd_configs:
|
||||
- host: unix:///var/run/docker.sock
|
||||
refresh_interval: 5s
|
||||
relabel_configs:
|
||||
# Only scrape classeo containers
|
||||
- source_labels: ['__meta_docker_container_name']
|
||||
regex: '/classeo_.*'
|
||||
action: keep
|
||||
# Extract container name as label
|
||||
- source_labels: ['__meta_docker_container_name']
|
||||
regex: '/classeo_(.*)'
|
||||
target_label: service
|
||||
# Add environment label
|
||||
- source_labels: []
|
||||
target_label: environment
|
||||
replacement: ${ENVIRONMENT:-development}
|
||||
# Add project label
|
||||
- source_labels: []
|
||||
target_label: project
|
||||
replacement: classeo
|
||||
|
||||
pipeline_stages:
|
||||
# Parse JSON logs from PHP backend
|
||||
- json:
|
||||
expressions:
|
||||
level: level
|
||||
message: message
|
||||
channel: channel
|
||||
correlation_id: extra.correlation_id
|
||||
tenant_id: extra.tenant_id
|
||||
user_id: context.user_id
|
||||
timestamp: datetime
|
||||
source: log
|
||||
# Extract labels from parsed JSON
|
||||
- labels:
|
||||
level:
|
||||
channel:
|
||||
correlation_id:
|
||||
tenant_id:
|
||||
# Set timestamp from log entry
|
||||
- timestamp:
|
||||
source: timestamp
|
||||
format: "2006-01-02T15:04:05.000000Z07:00"
|
||||
fallback_formats:
|
||||
- "2006-01-02T15:04:05Z07:00"
|
||||
- RFC3339
|
||||
# Filter out health check noise
|
||||
- match:
|
||||
selector: '{service="php"}'
|
||||
stages:
|
||||
- drop:
|
||||
expression: '.*GET /health.*'
|
||||
drop_counter_reason: health_check_noise
|
||||
- drop:
|
||||
expression: '.*GET /metrics.*'
|
||||
drop_counter_reason: metrics_endpoint_noise
|
||||
Reference in New Issue
Block a user