turf_saas/dashboard_api.py

#!/usr/bin/env python3
print("STARTING API...")
"""
API serve - Turf Dashboard Data
Includes ML predictions using XGBoost models
"""

from flask import Flask, jsonify, send_file, send_from_directory, request
import sqlite3
from datetime import datetime, timedelta
import pickle
import os

try:
    import pandas as pd
    import numpy as np
    from sklearn.preprocessing import LabelEncoder

    ML_AVAILABLE = True
except ImportError:
    ML_AVAILABLE = False
    pd = None
    np = None
    LabelEncoder = None

app = Flask(__name__)
DB_PATH = "/home/h3r7/turf_saas/turf_saas.db"
MODEL_PATH = "/home/h3r7/turf_saas/xgboost_models.pkl"

ml_models = None


def load_models():
    """Load XGBoost models"""
    global ml_models
    if ml_models is None and os.path.exists(MODEL_PATH):
        try:
            with open(MODEL_PATH, "rb") as f:
                ml_models = pickle.load(f)
            print("✅ XGBoost models loaded")
        except Exception as e:
            print(f"⚠️  Failed to load models: {e}")
            ml_models = False
    return ml_models


def get_db():
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    return conn


def ensure_ml_cache_table(conn):
    """Crée la table ml_predictions_cache si elle n'existe pas, et ajoute les colonnes manquantes"""
    conn.execute("""
        CREATE TABLE IF NOT EXISTS ml_predictions_cache (
            id             INTEGER PRIMARY KEY AUTOINCREMENT,
            date           TEXT NOT NULL,
            num_reunion    INTEGER,
            num_course     INTEGER,
            horse_name     TEXT,
            horse_number   INTEGER,
            odds           REAL,
            prob_top1      REAL,
            prob_top3      REAL,
            ml_score       REAL,
            recommendation TEXT,
            is_value_bet   INTEGER DEFAULT 0,
            is_outlier     INTEGER DEFAULT 0,
            race_label     TEXT,
            race_name      TEXT,
            hippodrome     TEXT,
            discipline     TEXT,
            distance       REAL,
            heure          TEXT,
            risque_label   TEXT DEFAULT 'neutral',
            risque_score   INTEGER DEFAULT 50,
            model_version  TEXT DEFAULT 'xgboost_v1',
            created_at     TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            UNIQUE(date, num_reunion, num_course, horse_name)
        )
    """)
    conn.execute("""
        CREATE INDEX IF NOT EXISTS idx_ml_cache_date
        ON ml_predictions_cache(date)
    """)
    # Migration : ajouter colonnes risque si table existante sans elles
    try:
        conn.execute("ALTER TABLE ml_predictions_cache ADD COLUMN risque_label TEXT DEFAULT 'neutral'")
    except Exception:
        pass
    try:
        conn.execute("ALTER TABLE ml_predictions_cache ADD COLUMN risque_score INTEGER DEFAULT 50")
    except Exception:
        pass
    conn.commit()


def get_ml_from_cache(conn, date):
    """Lit les prédictions ML depuis le cache BDD. Retourne (predictions, course_info) ou (None, None)"""
    ensure_ml_cache_table(conn)
    cursor = conn.execute(
        """SELECT * FROM ml_predictions_cache WHERE date = ? ORDER BY ml_score DESC""",
        (date,)
    )
    rows = cursor.fetchall()
    if not rows:
        return None, None

    predictions = []
    course_info = {}
    for row in rows:
        r = dict(row)
        pred = {
            "horse_name":     r["horse_name"],
            "horse_number":   r["horse_number"],
            "odds":           r["odds"],
            "prob_top1":      r["prob_top1"],
            "prob_top3":      r["prob_top3"],
            "ml_score":       r["ml_score"],
            "recommendation": r["recommendation"],
            "is_value_bet":   r["is_value_bet"],
            "is_outlier":     r["is_outlier"],
            "num_reunion":    r["num_reunion"],
            "num_course":     r["num_course"],
            "race_label":     r["race_label"],
            "race_name":      r["race_name"],
            "hippodrome":     r["hippodrome"],
            "discipline":     r["discipline"],
            "distance":       r["distance"],
            "heure":          r["heure"],
            "risque_label":   r["risque_label"] if "risque_label" in r.keys() else "neutral",
            "risque_score":   r["risque_score"] if "risque_score" in r.keys() else 50,
        }
        predictions.append(pred)
        key = f"{r['num_reunion']}_{r['num_course']}"
        if key not in course_info:
            course_info[key] = {
                "libelle":          r["race_name"],
                "libelle_court":    r["hippodrome"],
                "discipline":       r["discipline"],
                "distance":         r["distance"],
                "heure_depart_str": r["heure"],
            }
    return predictions, course_info


def save_ml_to_cache(conn, date, predictions, model_version="xgboost_v1"):
    """Sauvegarde les prédictions ML dans le cache BDD (INSERT OR REPLACE)"""
    ensure_ml_cache_table(conn)
    # Supprimer les anciennes entrées du jour pour permettre le refresh
    conn.execute("DELETE FROM ml_predictions_cache WHERE date = ?", (date,))
    # Calculer le risque par course (grouper les chevaux avec tous leurs scores ML)
    from collections import defaultdict
    race_horses = defaultdict(list)
    for p in predictions:
        key = (p.get("num_reunion"), p.get("num_course"))
        race_horses[key].append({
            "odds":     p.get("odds", 999),
            "ml_score": p.get("ml_score", 0),
            "prob_top1":p.get("prob_top1", 0),
            "prob_top3":p.get("prob_top3", 0),
        })

    race_risque = {}
    for key, partants in race_horses.items():
        label, score = calculate_risque(partants)
        race_risque[key] = (label or "neutral", score or 50)

    for p in predictions:
        rkey = (p.get("num_reunion"), p.get("num_course"))
        rl, rs = race_risque.get(rkey, ("neutral", 50))
        conn.execute("""
            INSERT INTO ml_predictions_cache
                (date, num_reunion, num_course, horse_name, horse_number, odds,
                 prob_top1, prob_top3, ml_score, recommendation, is_value_bet, is_outlier,
                 race_label, race_name, hippodrome, discipline, distance, heure,
                 risque_label, risque_score, model_version)
            VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
        """, (
            date,
            p.get("num_reunion"),
            p.get("num_course"),
            p.get("horse_name"),
            p.get("horse_number"),
            p.get("odds"),
            p.get("prob_top1"),
            p.get("prob_top3"),
            p.get("ml_score"),
            p.get("recommendation"),
            p.get("is_value_bet", 0),
            p.get("is_outlier", 0),
            p.get("race_label"),
            p.get("race_name"),
            p.get("hippodrome"),
            p.get("discipline"),
            p.get("distance"),
            p.get("heure"),
            rl,
            rs,
            model_version,
        ))
    conn.commit()


def calculate_risque(partants):
    """
    Calcule le niveau de risque d'une course à partir des scores ML et des cotes.

    Logique :
      - SAFE  (vert)   : un favori ML domine clairement, écart > 25pts avec le 2e
      - TRAP  (rouge)  : 3+ chevaux avec ml_score > 40 ET aucun ne dépasse 65
                         OU favori de cote < 5 avec prob_top1 < 20% (outsider ML)
      - NEUTRAL (orange) : cas intermédiaires

    Retourne (label, score) où score est une valeur 0-100 (100 = très sûr)
    """
    if not partants:
        return None, None

    # Trier par ml_score desc (ou prob_top1 si ml_score absent)
    sorted_p = sorted(partants, key=lambda x: x.get("ml_score") or x.get("prob_top1") or 0, reverse=True)

    top1_score  = sorted_p[0].get("ml_score") or sorted_p[0].get("prob_top1") or 0
    top2_score  = sorted_p[1].get("ml_score") or sorted_p[1].get("prob_top1") or 0 if len(sorted_p) > 1 else 0
    top3_score  = sorted_p[2].get("ml_score") or sorted_p[2].get("prob_top1") or 0 if len(sorted_p) > 2 else 0

    gap_1_2 = top1_score - top2_score   # écart entre 1er et 2e ML
    gap_1_3 = top1_score - top3_score   # écart entre 1er et 3e ML

    # Nombre de concurrents avec ml_score > 40 (dangereux)
    nb_dangerous = sum(1 for p in sorted_p if (p.get("ml_score") or 0) > 40)

    # Détection favori de cote surpris par le ML
    odds_fav = sorted(partants, key=lambda x: x.get("odds") or 999)
    fav_odds  = odds_fav[0].get("odds") or 999 if odds_fav else 999
    fav_ml    = odds_fav[0].get("ml_score") or odds_fav[0].get("prob_top1") or 0 if odds_fav else 0
    fav_surprise = fav_odds < 5 and fav_ml < 25  # favori de cote ignoré par le ML

    # --- SAFE : domination claire ---
    if top1_score >= 65 and gap_1_2 >= 20:
        score = min(100, int(50 + gap_1_2 * 1.5))
        return "safe", score

    # --- TRAP : course très ouverte ou favori piégé ---
    if fav_surprise:
        return "trap", max(10, int(35 - (25 - fav_ml)))
    if nb_dangerous >= 4 and top1_score < 70:
        return "trap", max(10, int(40 - nb_dangerous * 2))
    if gap_1_2 < 8 and top2_score > 45:
        return "trap", max(15, int(30 + gap_1_2))

    # --- NEUTRAL : cas intermédiaires ---
    # score 35-64 selon l'avantage du leader
    score = min(64, max(35, int(35 + gap_1_2 * 1.2)))
    return "neutral", score


def table_exists(conn, table_name):
    c = conn.execute(
        "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", (table_name,)
    )
    return c.fetchone() is not None


def load_ml_horses(conn, today):
    course_info = {}

    # Get course info
    c = conn.execute(
        """
        SELECT num_reunion, num_course, libelle, libelle_court, discipline, distance, heure_depart_str
        FROM pmu_courses
        WHERE date_programme = ?
        ORDER BY num_reunion, num_course
    """,
        (today,),
    )
    for row in c.fetchall():
        course_info[f"{row['num_reunion']}_{row['num_course']}"] = dict(row)

    if table_exists(conn, "historical_data"):
        c = conn.execute(
            """
            SELECT DISTINCT p.horse_name, p.horse_number, p.odds,
                   h.age, h.sexe, h.nb_courses, h.nb_victoires, h.nb_places,
                   h.tx_victoire, h.tx_place, h.forme_recente, h.reduction_km,
                   h.gains_annee, h.cote_directe, h.distance, h.discipline,
                   h.avis_entraineur, h.oeilleres, h.deferre, h.nb_partants,
                   h.rang_cote, h.ratio_cote_field, h.musique
            FROM predictions p
            LEFT JOIN historical_data h ON h.horse_name = p.horse_name
            WHERE p.date = ? AND p.source = 'canalturf_partants' AND p.odds > 0
        """,
            (today,),
        )
        horses = [dict(row) for row in c.fetchall()]
        return today, horses, course_info

    c = conn.execute(
        """
        SELECT
            p.date_programme AS date,
            p.num_reunion,
            p.num_course,
            p.num_pmu AS horse_number,
            p.nom AS horse_name,
            p.age,
            p.sexe,
            p.musique,
            p.nombre_courses AS nb_courses,
            p.nombre_victoires AS nb_victoires,
            p.nombre_places AS nb_places,
            p.gains_annee_en_cours AS gains_annee,
            COALESCE(p.cote_direct, 0) AS cote_directe,
            COALESCE(c.distance, 0) AS distance,
            COALESCE(c.discipline, 'PLAT') AS discipline,
            COALESCE(c.nb_declares_partants, 0) AS nb_partants,
            COALESCE(p.oeilleres, 'SANS_OEILLERES') AS oeilleres,
            COALESCE(p.tx_victoire, 0) AS tx_victoire,
            COALESCE(p.tx_place, 0) AS tx_place,
            COALESCE(p.forme_recente, 0) AS forme_recente,
            0 AS reduction_km,
            'NEUTRE' AS avis_entraineur,
            'NON' AS deferre,
            0 AS rang_cote,
            0 AS ratio_cote_field
        FROM pmu_partants p
        LEFT JOIN pmu_courses c
            ON c.date_programme = p.date_programme
           AND c.num_reunion = p.num_reunion
           AND c.num_course = p.num_course
        INNER JOIN pmu_reunions r
            ON r.date_programme = p.date_programme
           AND r.num_reunion = p.num_reunion
        WHERE p.date_programme = ? AND r.pays_code = 'FRA'
        ORDER BY p.num_reunion, p.num_course, p.num_pmu
    """,
        (today,),
    )
    horses = [dict(row) for row in c.fetchall()]
    course_info = {}
    if horses:
        c = conn.execute(
            """
            SELECT num_reunion, num_course, libelle, libelle_court, discipline, distance, heure_depart_str
            FROM pmu_courses
            WHERE date_programme = ?
            ORDER BY num_reunion, num_course
        """,
            (today,),
        )
        for row in c.fetchall():
            course_info[f"{row['num_reunion']}_{row['num_course']}"] = dict(row)
        return today, horses, course_info

    c = conn.execute("SELECT MAX(date_programme) FROM pmu_partants")
    fallback_date = c.fetchone()[0]
    if fallback_date:
        c = conn.execute(
            """
            SELECT
                p.date_programme AS date,
                p.num_reunion,
                p.num_course,
                p.num_pmu AS horse_number,
                p.nom AS horse_name,
                p.age,
                p.sexe,
                p.musique,
                p.nombre_courses AS nb_courses,
                p.nombre_victoires AS nb_victoires,
                p.nombre_places AS nb_places,
                p.gains_annee_en_cours AS gains_annee,
                COALESCE(p.cote_direct, 0) AS cote_directe,
                COALESCE(c.distance, 0) AS distance,
                COALESCE(c.discipline, 'PLAT') AS discipline,
                COALESCE(c.nb_declares_partants, 0) AS nb_partants,
                COALESCE(p.oeilleres, 'SANS_OEILLERES') AS oeilleres,
                COALESCE(p.tx_victoire, 0) AS tx_victoire,
                COALESCE(p.tx_place, 0) AS tx_place,
                COALESCE(p.forme_recente, 0) AS forme_recente,
                0 AS reduction_km,
                'NEUTRE' AS avis_entraineur,
                'NON' AS deferre,
                0 AS rang_cote,
                0 AS ratio_cote_field
            FROM pmu_partants p
            LEFT JOIN pmu_courses c
                ON c.date_programme = p.date_programme
               AND c.num_reunion = p.num_reunion
               AND c.num_course = p.num_course
            WHERE p.date_programme = ?
            ORDER BY p.num_reunion, p.num_course, p.num_pmu
        """,
            (fallback_date,),
        )
        return fallback_date, [dict(row) for row in c.fetchall()], {}

    return today, [], {}


def enrich_ml_horses(horses):
    races = {}
    for horse in horses:
        race_key = (
            horse.get("date") or horse.get("date_programme"),
            horse.get("num_reunion"),
            horse.get("num_course"),
        )
        races.setdefault(race_key, []).append(horse)

    for group in races.values():
        odds_values = []
        for horse in group:
            raw_odds = horse.get("odds", horse.get("cote_directe", 0))
            try:
                odds = float(raw_odds or 0)
            except (TypeError, ValueError):
                odds = 0.0
            horse["odds"] = odds
            horse["cote_directe"] = float(horse.get("cote_directe", odds) or odds or 0)
            if odds > 0:
                odds_values.append(odds)

        avg_odds = sum(odds_values) / len(odds_values) if odds_values else 0
        ranked = sorted(
            group, key=lambda h: h.get("odds", h.get("cote_directe", 0)) or 999999
        )

        for idx, horse in enumerate(ranked, start=1):
            horse.setdefault("horse_number", horse.get("num_pmu"))
            horse.setdefault("horse_name", horse.get("nom"))
            horse.setdefault("age", 0)
            horse.setdefault("sexe", "U")
            horse.setdefault("nb_courses", 0)
            horse.setdefault("nb_victoires", 0)
            horse.setdefault("nb_places", 0)
            horse.setdefault("tx_victoire", 0)
            horse.setdefault("tx_place", 0)
            horse.setdefault("forme_recente", 0)
            horse.setdefault("reduction_km", 0)
            horse.setdefault("gains_annee", 0)
            horse.setdefault("distance", 0)
            horse.setdefault("discipline", "PLAT")
            horse.setdefault("avis_entraineur", "NEUTRE")
            horse.setdefault("oeilleres", "SANS")
            horse.setdefault("deferre", "NON")
            horse.setdefault("nb_partants", len(group))
            horse.setdefault("musique", "")
            horse.setdefault("rang_cote", idx)
            if not horse.get("ratio_cote_field"):
                horse["ratio_cote_field"] = (
                    round(horse.get("odds", 0) / avg_odds, 3) if avg_odds > 0 else 0
                )

    return horses


def prepare_features_from_db(horse_data):
    """Convert database rows to ML features"""
    df = pd.DataFrame([horse_data])

    # Encode categorical
    for col in ["discipline", "sexe", "avis_entraineur", "oeilleres", "deferre"]:
        if col in df.columns:
            df[col] = df[col].fillna("UNKNOWN")

    return df


@app.route("/")
def index():
    return send_file("/home/h3r7/turf_saas/dashboard.html")


@app.route("/turf/")
@app.route("/turf")
def turf_index():
    return send_file("/home/h3r7/turf_saas/dashboard.html")


@app.route("/turf/<path:filename>")
def turf_static(filename):
    return send_from_directory("/home/h3r7/turf_saas", filename)


@app.route("/api/today")
@app.route("/turf/api")
def api_today():
    conn = get_db()
    today = datetime.now().strftime("%Y-%m-%d")

    race_filter = request.args.get("race", "")

    data = {
        "date": today,
        "races": [],
        "race": {},
        "predictions": {},
        "results": [],
        "weather": {},
        "scores": {},
    }

    # Construire la condition de filtre
    if race_filter:
        race_condition = "AND race_name = ?"
        race_params = (race_filter,)
    else:
        race_condition = ""
        race_params = ()

    # Récupérer toutes les courses du jour
    try:
        query_params = (today,) + race_params if race_condition else (today,)
        c = conn.execute(
            f"""
            SELECT DISTINCT race_name, race_hippodrome, race_time
            FROM predictions
            WHERE date=? AND source='canalturf_partants' {race_condition}
            ORDER BY race_time ASC
        """,
            query_params,
        )
        races = c.fetchall()

        data["races"] = [
            {"name": r[0], "hippodrome": r[1], "time": r[2]} for r in races
        ]

        if races:
            data["race"] = {
                "name": f"{races[0][1]} - {races[0][2]} {races[0][0]}",
                "hippodrome": races[0][1] if len(races[0]) > 1 else "",
                "time": races[0][2] if len(races[0]) > 2 else "",
            }
    except Exception as e:
        print(f"Erreur races: {e}")

    # Prédictions du jour — partants avec cotes uniquement
    try:
        if race_filter:
            c = conn.execute(
                """
                SELECT horse_name, horse_number, AVG(odds) as odds, prediction_rank, source, jockey
                FROM predictions
                WHERE date = ? AND source = 'canalturf_partants' AND odds > 0 AND race_name = ?
                GROUP BY horse_name
                ORDER BY odds ASC
            """,
                (today, race_filter),
            )
        else:
            c = conn.execute(
                """
                SELECT horse_name, horse_number, AVG(odds) as odds, prediction_rank, source, jockey
                FROM predictions
                WHERE date = ? AND source = 'canalturf_partants' AND odds > 0
                GROUP BY horse_name
                ORDER BY odds ASC
            """,
                (today,),
            )
        data["predictions"]["partants"] = [dict(row) for row in c.fetchall()]
    except Exception as e:
        print(f"Erreur partants: {e}")
        data["predictions"]["partants"] = []

    # Pronostic (bases, chances, outsiders)
    for cat, src in [
        ("bases", "canalturf_prono_bases"),
        ("chances", "canalturf_prono_chances"),
        ("outsiders", "canalturf_prono_outsiders"),
    ]:
        try:
            if race_filter:
                c = conn.execute(
                    """
                    SELECT DISTINCT horse_name, horse_number, prediction_rank
                    FROM predictions WHERE date = ? AND source = ? AND race_name = ?
                    ORDER BY prediction_rank
                """,
                    (today, src, race_filter),
                )
            else:
                c = conn.execute(
                    """
                    SELECT DISTINCT horse_name, horse_number, prediction_rank
                    FROM predictions WHERE date = ? AND source = ?
                    ORDER BY prediction_rank
                """,
                    (today, src),
                )
            data["predictions"][cat] = [dict(row) for row in c.fetchall()]
        except:
            data["predictions"][cat] = []

    # Résultats du jour
    c = conn.execute(
        "SELECT horse_name, position, odds FROM results WHERE date = ? ORDER BY position LIMIT 5",
        (today,),
    )
    data["results"] = [dict(row) for row in c.fetchall()]

    # Weather
    c = conn.execute("SELECT * FROM weather ORDER BY id DESC LIMIT 1")
    row = c.fetchone()
    if row:
        data["weather"] = dict(row)
    partants_list = data.get("predictions", {}).get("partants", [])
    if partants_list:
        print("DEBUG: partants found")
        risque_label, risque_course = calculate_risque(partants_list)
        data["risque_label"] = risque_label
        data["risque_course"] = risque_course

    # Score hier
    yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
    data["scores"]["date"] = yesterday
    c = conn.execute(
        "SELECT horse_name FROM results WHERE date = ? AND position <= 3", (yesterday,)
    )
    result_names = [r[0] for r in c.fetchall()]
    c = conn.execute(
        "SELECT DISTINCT horse_name FROM predictions WHERE date = ? AND source='canalturf_prono_bases'",
        (yesterday,),
    )
    our_preds = [r[0] for r in c.fetchall()]
    our_score = sum(1 for p in our_preds if p in result_names)
    data["scores"]["bases"] = f"{our_score}/{len(our_preds)}" if our_preds else "-"

    conn.close()
    return jsonify(data)


@app.route("/api/odds_history")
@app.route("/turf/api/odds_history")
def api_odds_history():
    conn = get_db()
    today = datetime.now().strftime("%Y-%m-%d")

    c = conn.execute(
        """
        SELECT horse_name, horse_number, odds, scraped_at
        FROM odds_history
        WHERE date = ?
        ORDER BY horse_name, scraped_at ASC
    """,
        (today,),
    )
    rows = c.fetchall()
    conn.close()

    horses = {}
    for row in rows:
        h = row["horse_name"]
        if h not in horses:
            horses[h] = {
                "horse_name": h,
                "horse_number": row["horse_number"],
                "snapshots": [],
            }
        horses[h]["snapshots"].append(
            {"odds": row["odds"], "time": row["scraped_at"][11:16]}
        )

    result = []
    for h, data in horses.items():
        snaps = data["snapshots"]
        debut = snaps[0]["odds"] if snaps else 0
        actuel = snaps[-1]["odds"] if snaps else 0
        evol_pct = (
            round(((actuel - debut) / debut) * 100, 1)
            if debut > 0 and len(snaps) > 1
            else 0
        )
        result.append(
            {
                "horse_name": h,
                "horse_number": data["horse_number"],
                "odds_debut": debut,
                "odds_actuel": actuel,
                "evol_pct": evol_pct,
                "nb_snapshots": len(snaps),
                "snapshots": snaps,
                "tendance": "baisse"
                if evol_pct < -5
                else "hausse"
                if evol_pct > 5
                else "stable",
            }
        )

    result.sort(key=lambda x: x["odds_actuel"])
    return jsonify({"date": today, "horses": result})


@app.route("/api/weather")
def api_weather():
    conn = get_db()
    c = conn.execute("SELECT * FROM weather ORDER BY id DESC LIMIT 4")
    weather = [dict(row) for row in c.fetchall()]
    conn.close()
    return jsonify(weather)


@app.route("/api/ml_predictions")
@app.route("/turf/api/ml_predictions")
def api_ml_predictions():
    """ML-powered predictions using XGBoost — cache BDD activé"""
    if not ML_AVAILABLE:
        return jsonify({"error": "ML libraries not available"})

    conn = get_db()
    today = datetime.now().strftime("%Y-%m-%d")
    force_refresh = request.args.get("refresh", "0") == "1"

    # --- LECTURE CACHE ---
    if not force_refresh:
        cached_preds, cached_courses = get_ml_from_cache(conn, today)
        if cached_preds:
            conn.close()
            return jsonify({
                "date": today,
                "model_version": "xgboost_v1",
                "predictions": cached_preds,
                "courses": cached_courses,
                "from_cache": True,
            })

    # --- CALCUL ML ---
    models = load_models()

    if not models or models is True:
        conn.close()
        return jsonify(
            {
                "error": "Models not loaded",
                "message": "Run train_xgboost.py first to train the models",
            }
        )
    date_used, horses, course_info = load_ml_horses(conn, today)
    horses = enrich_ml_horses(horses)

    if not horses:
        conn.close()
        return jsonify(
            {
                "date": date_used,
                "predictions": [],
                "message": "No predictions available",
            }
        )

    # Use exact feature columns from training
    feature_cols = [
        "age",
        "sexe_enc",
        "nb_courses",
        "nb_victoires",
        "nb_places",
        "tx_victoire",
        "tx_place",
        "forme_recente",
        "reduction_km",
        "gains_annee",
        "cote_directe",
        "distance",
        "nb_partants",
        "discipline_enc",
        "avis_enc",
        "oeilleres_enc",
        "deferre_enc",
        "form_1",
        "form_2",
        "form_3",
        "form_4",
        "form_5",
        "form_avg",
        "win_rate_adj",
        "place_rate_adj",
        "implied_prob",
        "victories_per_race",
        "places_per_race",
        "earnings_per_race",
        "age_win_interact",
        "distance_cat",
        "is_favorite",
        "rang_cote",
        "ratio_cote_field",
    ]

    # Get all unique values for encoding
    all_sexes = set(h.get("sexe", "U") or "U" for h in horses)
    all_avis = set(h.get("avis_entraineur", "NEUTRE") or "NEUTRE" for h in horses)
    all_oeilleres = set(h.get("oeilleres", "SANS") or "SANS" for h in horses)
    all_deferre = set(h.get("deferre", "NON") or "NON" for h in horses)
    all_discipline = set(h.get("discipline", "PLAT") or "PLAT" for h in horses)

    le_sexe = LabelEncoder()
    le_sexe.fit(list(all_sexes) + ["U"])
    le_avis = LabelEncoder()
    le_avis.fit(list(all_avis) + ["NEUTRE"])
    le_oeilleres = LabelEncoder()
    le_oeilleres.fit(list(all_oeilleres) + ["SANS"])
    le_deferre = LabelEncoder()
    le_deferre.fit(list(all_deferre) + ["NON"])
    le_discipline = LabelEncoder()
    le_discipline.fit(list(all_discipline) + ["PLAT"])

    predictions = []

    for horse in horses:
        features = {}

        # Numeric features
        for col in [
            "age",
            "nb_courses",
            "nb_victoires",
            "nb_places",
            "tx_victoire",
            "tx_place",
            "forme_recente",
            "reduction_km",
            "gains_annee",
            "cote_directe",
            "distance",
            "nb_partants",
            "rang_cote",
            "ratio_cote_field",
        ]:
            features[col] = float(horse.get(col, 0) or 0)

        # Encoded categorical
        features["sexe_enc"] = le_sexe.transform([horse.get("sexe", "U") or "U"])[0]
        features["avis_enc"] = le_avis.transform(
            [horse.get("avis_entraineur", "NEUTRE") or "NEUTRE"]
        )[0]
        features["oeilleres_enc"] = le_oeilleres.transform(
            [horse.get("oeilleres", "SANS") or "SANS"]
        )[0]
        features["deferre_enc"] = le_deferre.transform(
            [horse.get("deferre", "NON") or "NON"]
        )[0]
        features["discipline_enc"] = le_discipline.transform(
            [horse.get("discipline", "PLAT") or "PLAT"]
        )[0]

        # Form features (parse from musique)
        musique = horse.get("musique", "")
        import re

        form_nums = re.findall(r"\d+", str(musique))[:5]
        for i, fn in enumerate(form_nums):
            features[f"form_{i + 1}"] = float(fn)
        for i in range(len(form_nums) + 1, 6):
            features[f"form_{i}"] = 0.0
        features["form_avg"] = sum(features[f"form_{i}"] for i in range(1, 6)) / 5

        # Derived features
        features["implied_prob"] = (
            1 / features["cote_directe"] if features["cote_directe"] > 0 else 0
        )
        features["win_rate_adj"] = features["tx_victoire"] * np.log1p(
            features["nb_courses"]
        )
        features["place_rate_adj"] = features["tx_place"] * np.log1p(
            features["nb_courses"]
        )
        features["victories_per_race"] = features["nb_victoires"] / max(
            features["nb_courses"], 1
        )
        features["places_per_race"] = features["nb_places"] / max(
            features["nb_courses"], 1
        )
        features["earnings_per_race"] = features["gains_annee"] / max(
            features["nb_courses"], 1
        )
        features["age_win_interact"] = features["age"] * features["tx_victoire"]
        features["distance_cat"] = (
            2.0
            if 1500 < features["distance"] <= 2000
            else (3.0 if 2000 < features["distance"] <= 2500 else 1.0)
        )
        features["is_favorite"] = 1 if features["cote_directe"] < 5 else 0

        # Make prediction
        try:
            X = pd.DataFrame([features])[feature_cols]
            X = X.fillna(0)

            prob_top1 = float(models["model_top1"].predict_proba(X)[0][1])
            prob_top3 = float(models["model_top3"].predict_proba(X)[0][1])

            predictions.append(
                {
                    "horse_name": horse["horse_name"],
                    "horse_number": horse["horse_number"],
                    "odds": float(horse["odds"]),
                    "prob_top1": round(prob_top1 * 100, 1),
                    "prob_top3": round(prob_top3 * 100, 1),
                    "ml_score": round((prob_top1 * 0.6 + prob_top3 * 0.4) * 100, 1),
                    "recommendation": "top1"
                    if prob_top1 > 0.15
                    else ("top3" if prob_top3 > 0.35 else "pass"),
                    "is_value_bet": 1
                    if (prob_top3 > 0.35 and float(horse.get("odds", 0)) > 10)
                    else 0,
                    "is_outlier": 1
                    if (
                        float(horse.get("odds", 0)) <= 5
                        and (prob_top1 < 0.1 and prob_top3 < 0.25)
                    )
                    else 0,
                    "num_reunion": horse.get("num_reunion"),
                    "num_course": horse.get("num_course"),
                }
            )
        except Exception as e:
            predictions.append(
                {
                    "horse_name": horse["horse_name"],
                    "horse_number": horse["horse_number"],
                    "odds": horse["odds"],
                    "error": str(e),
                }
            )

    # Sort by ML score
    predictions.sort(key=lambda x: x.get("ml_score", 0), reverse=True)

    # Add course info to predictions
    for pred in predictions:
        course_key = f"{pred.get('num_reunion')}_{pred.get('num_course')}"
        if course_key in course_info:
            cinfo = course_info[course_key]
            pred["race_label"] = f"R{pred.get('num_reunion')}C{pred.get('num_course')}"
            pred["race_name"] = cinfo.get("libelle", "")
            pred["hippodrome"] = cinfo.get("libelle_court", "")
            pred["discipline"] = cinfo.get("discipline", "")
            pred["distance"] = cinfo.get("distance", 0)
            pred["heure"] = cinfo.get("heure_depart_str", "")

    # --- CALCUL RISQUE PAR COURSE + INJECTION DANS PREDICTIONS ---
    from collections import defaultdict as _dd
    _race_horses_ml = _dd(list)
    for p in predictions:
        key = (p.get("num_reunion"), p.get("num_course"))
        _race_horses_ml[key].append({
            "odds":      p.get("odds", 999),
            "ml_score":  p.get("ml_score", 0),
            "prob_top1": p.get("prob_top1", 0),
            "prob_top3": p.get("prob_top3", 0),
        })
    _race_risque_map = {}
    for key, partants in _race_horses_ml.items():
        label, score = calculate_risque(partants)
        _race_risque_map[key] = (label or "neutral", score or 50)
    for p in predictions:
        rkey = (p.get("num_reunion"), p.get("num_course"))
        rl, rs = _race_risque_map.get(rkey, ("neutral", 50))
        p["risque_label"] = rl
        p["risque_score"] = rs

    # --- SAUVEGARDE CACHE ---
    try:
        save_ml_to_cache(conn, today, predictions)
    except Exception as e_cache:
        pass  # cache non bloquant

    conn.close()

    return jsonify(
        {
            "date": date_used,
            "model_version": "xgboost_v1",
            "predictions": predictions,
            "courses": course_info,
            "from_cache": False,
        }
    )


@app.route("/api/ml_predictions/refresh")
@app.route("/turf/api/ml_predictions/refresh")
def api_ml_predictions_refresh():
    """Force le recalcul des prédictions ML et met à jour le cache"""
    conn = get_db()
    today = datetime.now().strftime("%Y-%m-%d")
    ensure_ml_cache_table(conn)
    conn.execute("DELETE FROM ml_predictions_cache WHERE date = ?", (today,))
    conn.commit()
    conn.close()
    # Déléguer au endpoint principal avec force_refresh
    from flask import redirect, url_for
    return redirect(url_for("api_ml_predictions") + "?refresh=1")


@app.route("/api/scoring")
@app.route("/turf/api/scoring")
def api_scoring():
    """Get scoring data for dashboard - today only, filtered by race if provided"""
    race = request.args.get("race", "")
    today = datetime.now().strftime("%Y-%m-%d")
    conn = get_db()
    query = """
        SELECT date, race_name, horse_name, horse_number, score,
               score_cote, score_forme, score_victoire, score_place,
               cote, forme_recente, tx_victoire, tx_place,
               rang_scoring, avis_entraineur, musique
        FROM scoring
        WHERE date = ?
    """
    params = [today]
    if race:
        query += " AND race_name LIKE ?"
        params.append(f"%{race}%")
    query += " ORDER BY rang_scoring ASC"
    c = conn.execute(query, params)
    scores = [dict(row) for row in c.fetchall()]
    conn.close()
    return jsonify({"scores": scores, "recommendations": {}})


# === RAPPORTS AUTOMATISÉS ===
try:
    from analytics_reports import (
        get_daily_report,
        get_weekly_report,
        get_monthly_report,
    )

    HAS_ANALYTICS = True
except ImportError:
    HAS_ANALYTICS = False


@app.route("/turf/api/report/daily")
def api_report_daily():
    """Rapport quotidien"""
    if not HAS_ANALYTICS:
        return jsonify({"error": "analytics module not available"}), 500
    date = request.args.get("date")
    return jsonify(get_daily_report(date))


@app.route("/turf/api/report/weekly")
def api_report_weekly():
    """Rapport hebdomadaire"""
    if not HAS_ANALYTICS:
        return jsonify({"error": "analytics module not available"}), 500
    start_date = request.args.get("start")
    end_date = request.args.get("end")
    return jsonify(get_weekly_report(start_date, end_date))


@app.route("/turf/api/report/monthly")
def api_report_monthly():
    """Rapport mensuel"""
    if not HAS_ANALYTICS:
        return jsonify({"error": "analytics module not available"}), 500
    year = request.args.get("year", type=int)
    month = request.args.get("month", type=int)
    return jsonify(get_monthly_report(year, month))


@app.route("/turf/api/suggestions")
def api_suggestions():
    """Suggestions de questions"""
    conn = get_db()
    c = conn.cursor()

    suggestions = []
    try:
        c.execute(
            "SELECT COUNT(*) as cnt FROM pmu_partants WHERE date_programme >= date('now', '-7 days')"
        )
        recent = c.fetchone()[0]

        if recent > 0:
            suggestions = [
                "Quel est mon taux de réussite cette semaine?",
                "Liste les 5 meilleurs jockeys",
                "Quel est le ROI du mois?",
                "Résultats d'hier",
                "Programme du jour",
            ]
        else:
            suggestions = [
                "Derniers gagnants",
                "Meilleurs entraîneurs",
                "Performances à Vincennes",
                "Évolution des cotes",
            ]
    except:
        suggestions = [
            "Quel est le taux de réussite des favoris?",
            "Liste les meilleurs jockeys",
            "Résultats d'hier",
        ]
    finally:
        conn.close()

    return jsonify({"suggestions": suggestions})


@app.route("/turf/api/metrics/summary")
@app.route("/turf/api/metrics/summary/")
def metrics_summary():
    days = min(max(request.args.get("days", 30, type=int), 1), 365)
    try:
        conn = get_db()
        date_filter = f"-{int(days)} days"
        cur = conn.execute(
            "SELECT source, COUNT(*) as nb_courses, SUM(nb_predictions) as total_predictions, "
            "SUM(nb_gagnants) as total_gagnants, SUM(nb_places) as total_places, SUM(nb_top5) as total_top5, "
            "ROUND(AVG(taux_gagnant), 2) as moy_taux_gagnant, ROUND(AVG(taux_place), 2) as moy_taux_place, "
            "ROUND(AVG(taux_top5), 2) as moy_taux_top5, ROUND(SUM(roi_sg_net), 3) as roi_sg_cumul, "
            "ROUND(SUM(roi_sp_net), 3) as roi_sp_cumul, ROUND(AVG(ecart_rang_moyen), 2) as moy_ecart_rang, "
            "SUM(quinte_5sur5) as nb_5sur5, SUM(quinte_4sur5) as nb_4sur5, SUM(quinte_3sur5) as nb_3sur5 "
            "FROM prediction_metrics WHERE date >= date('now', ?) GROUP BY source ORDER BY moy_taux_place DESC",
            (date_filter,))
        cols = [d[0] for d in cur.description]
        rows = [dict(zip(cols, row)) for row in cur.fetchall()]
        conn.close()
        return jsonify({"summary": rows})
    except Exception as e:
        return jsonify({"error": True, "message": str(e)})

@app.route("/turf/api/metrics/daily")
@app.route("/turf/api/metrics/daily/")
def metrics_daily():
    days = min(max(request.args.get("days", 30, type=int), 1), 365)
    try:
        conn = get_db()
        date_filter = f"-{int(days)} days"
        cur = conn.execute(
            "SELECT date, source, SUM(nb_predictions) as predictions, SUM(nb_gagnants) as gagnants, "
            "SUM(nb_places) as places, SUM(nb_top5) as top5, ROUND(AVG(taux_gagnant), 2) as taux_gagnant, "
            "ROUND(AVG(taux_place), 2) as taux_place, ROUND(AVG(roi_sg_net), 3) as roi_sg, "
            "ROUND(AVG(roi_sp_net), 3) as roi_sp, SUM(quinte_5sur5) as quinte_5sur5, "
            "SUM(quinte_4sur5) as quinte_4sur5 "
            "FROM prediction_metrics WHERE date >= date('now', ?) GROUP BY date, source ORDER BY date DESC",
            (date_filter,))
        cols = [d[0] for d in cur.description]
        rows = [dict(zip(cols, row)) for row in cur.fetchall()]
        conn.close()
        return jsonify({"daily": rows})
    except Exception as e:
        return jsonify({"error": True, "message": str(e)})

if __name__ == "__main__":
    load_models()
    app.run(host="0.0.0.0", port=8791, debug=False)