turf_saas/infra/grafana/dashboards/turf-saas-overview.json

{
  "title": "Turf SaaS — Overview",
  "uid": "turf-saas-overview",
  "schemaVersion": 38,
  "version": 1,
  "refresh": "30s",
  "time": { "from": "now-6h", "to": "now" },
  "tags": ["turf-saas"],
  "panels": [
    {
      "id": 1,
      "type": "stat",
      "title": "Request Rate (req/s)",
      "gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 },
      "targets": [
        {
          "datasource": "Prometheus",
          "expr": "sum(rate(http_requests_total[5m]))",
          "legendFormat": "req/s"
        }
      ],
      "options": { "colorMode": "background", "graphMode": "area" }
    },
    {
      "id": 2,
      "type": "stat",
      "title": "Error Rate (5xx)",
      "gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 },
      "targets": [
        {
          "datasource": "Prometheus",
          "expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m])) / sum(rate(http_requests_total[5m])) * 100",
          "legendFormat": "error %"
        }
      ],
      "fieldConfig": {
        "defaults": {
          "unit": "percent",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              { "color": "green", "value": null },
              { "color": "yellow", "value": 0.5 },
              { "color": "red", "value": 1 }
            ]
          }
        }
      }
    },
    {
      "id": 3,
      "type": "stat",
      "title": "p95 Latency",
      "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
      "targets": [
        {
          "datasource": "Prometheus",
          "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
          "legendFormat": "p95"
        }
      ],
      "fieldConfig": {
        "defaults": {
          "unit": "s",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              { "color": "green", "value": null },
              { "color": "yellow", "value": 1 },
              { "color": "red", "value": 2 }
            ]
          }
        }
      }
    },
    {
      "id": 4,
      "type": "stat",
      "title": "ML Top-1 Accuracy",
      "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
      "targets": [
        {
          "datasource": "Prometheus",
          "expr": "ml_prediction_accuracy_ratio{accuracy_type=\"top1\"} * 100",
          "legendFormat": "top-1 %"
        }
      ],
      "fieldConfig": {
        "defaults": {
          "unit": "percent",
          "thresholds": {
            "mode": "absolute",
            "steps": [
              { "color": "red", "value": null },
              { "color": "yellow", "value": 25 },
              { "color": "green", "value": 35 }
            ]
          }
        }
      }
    },
    {
      "id": 5,
      "type": "timeseries",
      "title": "HTTP Requests by Service",
      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
      "targets": [
        {
          "datasource": "Prometheus",
          "expr": "sum(rate(http_requests_total[5m])) by (service)",
          "legendFormat": "{{ service }}"
        }
      ],
      "fieldConfig": {
        "defaults": { "unit": "reqps" }
      }
    },
    {
      "id": 6,
      "type": "timeseries",
      "title": "Request Duration p50/p95/p99",
      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
      "targets": [
        {
          "datasource": "Prometheus",
          "expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
          "legendFormat": "p50"
        },
        {
          "datasource": "Prometheus",
          "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
          "legendFormat": "p95"
        },
        {
          "datasource": "Prometheus",
          "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
          "legendFormat": "p99"
        }
      ],
      "fieldConfig": {
        "defaults": { "unit": "s" }
      }
    },
    {
      "id": 7,
      "type": "timeseries",
      "title": "ML Predictions per Hour",
      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 },
      "targets": [
        {
          "datasource": "Prometheus",
          "expr": "sum(increase(ml_predictions_total[1h])) by (model_type)",
          "legendFormat": "{{ model_type }}"
        }
      ]
    },
    {
      "id": 8,
      "type": "timeseries",
      "title": "DB Query Duration",
      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 },
      "targets": [
        {
          "datasource": "Prometheus",
          "expr": "histogram_quantile(0.95, sum(rate(db_query_duration_seconds_bucket[5m])) by (le, operation))",
          "legendFormat": "{{ operation }} p95"
        }
      ],
      "fieldConfig": {
        "defaults": { "unit": "s" }
      }
    }
  ]
}