turf_saas/metrics.py

#!/usr/bin/env python3
"""
Prometheus metrics instrumentation for Turf SaaS.
Import this module in Flask apps to expose /metrics endpoint.
"""

import time
import functools
import logging
from typing import Callable, Any

try:
    from prometheus_client import (
        Counter,
        Histogram,
        Gauge,
        Summary,
        generate_latest,
        CONTENT_TYPE_LATEST,
        CollectorRegistry,
        multiprocess,
        REGISTRY,
    )

    PROMETHEUS_AVAILABLE = True
except ImportError:
    PROMETHEUS_AVAILABLE = False

logger = logging.getLogger(__name__)

# ============================================================
# Metric definitions
# ============================================================

if PROMETHEUS_AVAILABLE:
    # HTTP metrics
    HTTP_REQUESTS_TOTAL = Counter(
        "http_requests_total",
        "Total number of HTTP requests",
        ["method", "endpoint", "status_code", "service"],
    )

    HTTP_REQUEST_DURATION = Histogram(
        "http_request_duration_seconds",
        "HTTP request duration in seconds",
        ["method", "endpoint", "service"],
        buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0],
    )

    HTTP_REQUESTS_IN_PROGRESS = Gauge(
        "http_requests_in_progress",
        "Number of HTTP requests currently being processed",
        ["method", "endpoint", "service"],
    )

    # ML prediction metrics
    ML_PREDICTIONS_TOTAL = Counter(
        "ml_predictions_total",
        "Total ML prediction requests",
        ["model_type", "race_type"],
    )

    ML_PREDICTION_DURATION = Histogram(
        "ml_prediction_duration_seconds",
        "ML prediction duration in seconds",
        ["model_type"],
        buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0],
    )

    ML_PREDICTION_ACCURACY = Gauge(
        "ml_prediction_accuracy_ratio",
        "Rolling ML prediction accuracy (top-1, top-3)",
        ["accuracy_type"],
    )

    ML_PREDICTION_DRIFT = Gauge(
        "ml_prediction_drift_score",
        "Feature drift score for ML models (0=no drift, 1=full drift)",
        ["feature_group"],
    )

    # Database metrics
    DB_QUERIES_TOTAL = Counter(
        "db_queries_total", "Total database queries", ["operation", "table"]
    )

    DB_QUERY_DURATION = Histogram(
        "db_query_duration_seconds",
        "Database query duration",
        ["operation"],
        buckets=[0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0],
    )

    DB_CONNECTION_POOL_SIZE = Gauge(
        "db_connection_pool_size", "Current database connection pool size"
    )

    # Business metrics
    RACES_SCRAPED_TOTAL = Counter(
        "races_scraped_total", "Total number of races scraped", ["source", "discipline"]
    )

    PREDICTIONS_ACCURACY_DAILY = Gauge(
        "predictions_accuracy_daily_ratio",
        "Daily prediction accuracy ratio",
        ["date", "race_type"],
    )

    ACTIVE_SUBSCRIPTIONS = Gauge(
        "active_subscriptions_total", "Number of active SaaS subscriptions", ["plan"]
    )

    # App health
    APP_INFO = Gauge(
        "app_info", "Application build information", ["version", "service", "env"]
    )


# ============================================================
# Flask integration
# ============================================================


def init_metrics(app, service_name: str = "unknown"):
    """
    Register Prometheus metrics middleware on a Flask app.

    Usage:
        from metrics import init_metrics
        init_metrics(app, service_name="combined-api")
    """
    if not PROMETHEUS_AVAILABLE:
        logger.warning("prometheus_client not installed — metrics disabled")
        return

    from flask import request, Response

    # Set app info gauge
    APP_INFO.labels(
        version=app.config.get("VERSION", "unknown"),
        service=service_name,
        env=app.config.get("ENV", "unknown"),
    ).set(1)

    @app.before_request
    def before_request():
        request._start_time = time.time()
        HTTP_REQUESTS_IN_PROGRESS.labels(
            method=request.method, endpoint=request.path, service=service_name
        ).inc()

    @app.after_request
    def after_request(response):
        duration = time.time() - getattr(request, "_start_time", time.time())
        endpoint = request.path

        HTTP_REQUESTS_TOTAL.labels(
            method=request.method,
            endpoint=endpoint,
            status_code=str(response.status_code),
            service=service_name,
        ).inc()

        HTTP_REQUEST_DURATION.labels(
            method=request.method, endpoint=endpoint, service=service_name
        ).observe(duration)

        HTTP_REQUESTS_IN_PROGRESS.labels(
            method=request.method, endpoint=endpoint, service=service_name
        ).dec()

        return response

    @app.route("/metrics")
    def metrics_endpoint():
        """Prometheus metrics scrape endpoint."""
        return Response(generate_latest(), mimetype=CONTENT_TYPE_LATEST)

    @app.route("/health")
    def health_endpoint():
        """Docker / load-balancer health check endpoint."""
        from flask import jsonify

        return jsonify({"status": "ok", "service": service_name})

    logger.info(f"Prometheus metrics initialized for service: {service_name}")


# ============================================================
# Decorator helpers
# ============================================================


def track_ml_prediction(model_type: str = "xgboost", race_type: str = "flat"):
    """Decorator to track ML prediction calls."""

    def decorator(func: Callable) -> Callable:
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            if not PROMETHEUS_AVAILABLE:
                return func(*args, **kwargs)
            start = time.time()
            try:
                result = func(*args, **kwargs)
                ML_PREDICTIONS_TOTAL.labels(
                    model_type=model_type, race_type=race_type
                ).inc()
                return result
            finally:
                ML_PREDICTION_DURATION.labels(model_type=model_type).observe(
                    time.time() - start
                )

        return wrapper

    return decorator


def track_db_query(operation: str = "select", table: str = "unknown"):
    """Decorator to track DB query calls."""

    def decorator(func: Callable) -> Callable:
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            if not PROMETHEUS_AVAILABLE:
                return func(*args, **kwargs)
            start = time.time()
            try:
                result = func(*args, **kwargs)
                DB_QUERIES_TOTAL.labels(operation=operation, table=table).inc()
                return result
            finally:
                DB_QUERY_DURATION.labels(operation=operation).observe(
                    time.time() - start
                )

        return wrapper

    return decorator


def update_ml_accuracy(top1_accuracy: float, top3_accuracy: float):
    """Update ML accuracy gauges (call from scheduler)."""
    if not PROMETHEUS_AVAILABLE:
        return
    ML_PREDICTION_ACCURACY.labels(accuracy_type="top1").set(top1_accuracy)
    ML_PREDICTION_ACCURACY.labels(accuracy_type="top3").set(top3_accuracy)


def update_subscription_count(plan_counts: dict):
    """Update subscription count gauges."""
    if not PROMETHEUS_AVAILABLE:
        return
    for plan, count in plan_counts.items():
        ACTIVE_SUBSCRIPTIONS.labels(plan=plan).set(count)