Files
turf_saas/dashboard_api.py
DevOps Engineer 793ee82c29 fix(qa): add /health endpoints to Flask apps for Docker healthchecks
Docker compose healthchecks target /health on combined-api, dashboard-api
and portal, but these endpoints did not exist (returned 404). This caused
all dependent services (condition: service_healthy) to fail startup.

- combined_api.py: GET /health + /turf/health with DB connectivity check
- dashboard_api.py: GET /health + /turf/health with DB connectivity check
- portal_server.py: GET /health (lightweight, no DB)

QA Finding 1 from HRT-34 review of HRT-33 branch feature/devops-cicd.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-25 17:44:21 +02:00

1217 lines
40 KiB
Python
Executable File

#!/usr/bin/env python3
print("STARTING API...")
"""
API serve - Turf Dashboard Data
Includes ML predictions using XGBoost models
"""
from flask import Flask, jsonify, send_file, send_from_directory, request
import sqlite3
from datetime import datetime, timedelta
import pickle
import os
try:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
ML_AVAILABLE = True
except ImportError:
ML_AVAILABLE = False
pd = None
np = None
LabelEncoder = None
app = Flask(__name__)
DB_PATH = "/home/h3r7/turf_saas/turf_saas.db"
MODEL_PATH = "/home/h3r7/turf_saas/xgboost_models.pkl"
ml_models = None
def load_models():
"""Load XGBoost models"""
global ml_models
if ml_models is None and os.path.exists(MODEL_PATH):
try:
with open(MODEL_PATH, "rb") as f:
ml_models = pickle.load(f)
print("✅ XGBoost models loaded")
except Exception as e:
print(f"⚠️ Failed to load models: {e}")
ml_models = False
return ml_models
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def ensure_ml_cache_table(conn):
"""Crée la table ml_predictions_cache si elle n'existe pas, et ajoute les colonnes manquantes"""
conn.execute("""
CREATE TABLE IF NOT EXISTS ml_predictions_cache (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date TEXT NOT NULL,
num_reunion INTEGER,
num_course INTEGER,
horse_name TEXT,
horse_number INTEGER,
odds REAL,
prob_top1 REAL,
prob_top3 REAL,
ml_score REAL,
recommendation TEXT,
is_value_bet INTEGER DEFAULT 0,
is_outlier INTEGER DEFAULT 0,
race_label TEXT,
race_name TEXT,
hippodrome TEXT,
discipline TEXT,
distance REAL,
heure TEXT,
risque_label TEXT DEFAULT 'neutral',
risque_score INTEGER DEFAULT 50,
model_version TEXT DEFAULT 'xgboost_v1',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(date, num_reunion, num_course, horse_name)
)
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_ml_cache_date
ON ml_predictions_cache(date)
""")
# Migration : ajouter colonnes risque si table existante sans elles
try:
conn.execute(
"ALTER TABLE ml_predictions_cache ADD COLUMN risque_label TEXT DEFAULT 'neutral'"
)
except Exception:
pass
try:
conn.execute(
"ALTER TABLE ml_predictions_cache ADD COLUMN risque_score INTEGER DEFAULT 50"
)
except Exception:
pass
conn.commit()
def get_ml_from_cache(conn, date):
"""Lit les prédictions ML depuis le cache BDD. Retourne (predictions, course_info) ou (None, None)"""
ensure_ml_cache_table(conn)
cursor = conn.execute(
"""SELECT * FROM ml_predictions_cache WHERE date = ? ORDER BY ml_score DESC""",
(date,),
)
rows = cursor.fetchall()
if not rows:
return None, None
predictions = []
course_info = {}
for row in rows:
r = dict(row)
pred = {
"horse_name": r["horse_name"],
"horse_number": r["horse_number"],
"odds": r["odds"],
"prob_top1": r["prob_top1"],
"prob_top3": r["prob_top3"],
"ml_score": r["ml_score"],
"recommendation": r["recommendation"],
"is_value_bet": r["is_value_bet"],
"is_outlier": r["is_outlier"],
"num_reunion": r["num_reunion"],
"num_course": r["num_course"],
"race_label": r["race_label"],
"race_name": r["race_name"],
"hippodrome": r["hippodrome"],
"discipline": r["discipline"],
"distance": r["distance"],
"heure": r["heure"],
"risque_label": r["risque_label"]
if "risque_label" in r.keys()
else "neutral",
"risque_score": r["risque_score"] if "risque_score" in r.keys() else 50,
}
predictions.append(pred)
key = f"{r['num_reunion']}_{r['num_course']}"
if key not in course_info:
course_info[key] = {
"libelle": r["race_name"],
"libelle_court": r["hippodrome"],
"discipline": r["discipline"],
"distance": r["distance"],
"heure_depart_str": r["heure"],
}
return predictions, course_info
def save_ml_to_cache(conn, date, predictions, model_version="xgboost_v1"):
"""Sauvegarde les prédictions ML dans le cache BDD (INSERT OR REPLACE)"""
ensure_ml_cache_table(conn)
# Supprimer les anciennes entrées du jour pour permettre le refresh
conn.execute("DELETE FROM ml_predictions_cache WHERE date = ?", (date,))
# Calculer le risque par course (grouper les chevaux avec tous leurs scores ML)
from collections import defaultdict
race_horses = defaultdict(list)
for p in predictions:
key = (p.get("num_reunion"), p.get("num_course"))
race_horses[key].append(
{
"odds": p.get("odds", 999),
"ml_score": p.get("ml_score", 0),
"prob_top1": p.get("prob_top1", 0),
"prob_top3": p.get("prob_top3", 0),
}
)
race_risque = {}
for key, partants in race_horses.items():
label, score = calculate_risque(partants)
race_risque[key] = (label or "neutral", score or 50)
for p in predictions:
rkey = (p.get("num_reunion"), p.get("num_course"))
rl, rs = race_risque.get(rkey, ("neutral", 50))
conn.execute(
"""
INSERT INTO ml_predictions_cache
(date, num_reunion, num_course, horse_name, horse_number, odds,
prob_top1, prob_top3, ml_score, recommendation, is_value_bet, is_outlier,
race_label, race_name, hippodrome, discipline, distance, heure,
risque_label, risque_score, model_version)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
""",
(
date,
p.get("num_reunion"),
p.get("num_course"),
p.get("horse_name"),
p.get("horse_number"),
p.get("odds"),
p.get("prob_top1"),
p.get("prob_top3"),
p.get("ml_score"),
p.get("recommendation"),
p.get("is_value_bet", 0),
p.get("is_outlier", 0),
p.get("race_label"),
p.get("race_name"),
p.get("hippodrome"),
p.get("discipline"),
p.get("distance"),
p.get("heure"),
rl,
rs,
model_version,
),
)
conn.commit()
def calculate_risque(partants):
"""
Calcule le niveau de risque d'une course à partir des scores ML et des cotes.
Logique :
- SAFE (vert) : un favori ML domine clairement, écart > 25pts avec le 2e
- TRAP (rouge) : 3+ chevaux avec ml_score > 40 ET aucun ne dépasse 65
OU favori de cote < 5 avec prob_top1 < 20% (outsider ML)
- NEUTRAL (orange) : cas intermédiaires
Retourne (label, score) où score est une valeur 0-100 (100 = très sûr)
"""
if not partants:
return None, None
# Trier par ml_score desc (ou prob_top1 si ml_score absent)
sorted_p = sorted(
partants,
key=lambda x: x.get("ml_score") or x.get("prob_top1") or 0,
reverse=True,
)
top1_score = sorted_p[0].get("ml_score") or sorted_p[0].get("prob_top1") or 0
top2_score = (
sorted_p[1].get("ml_score") or sorted_p[1].get("prob_top1") or 0
if len(sorted_p) > 1
else 0
)
top3_score = (
sorted_p[2].get("ml_score") or sorted_p[2].get("prob_top1") or 0
if len(sorted_p) > 2
else 0
)
gap_1_2 = top1_score - top2_score # écart entre 1er et 2e ML
gap_1_3 = top1_score - top3_score # écart entre 1er et 3e ML
# Nombre de concurrents avec ml_score > 40 (dangereux)
nb_dangerous = sum(1 for p in sorted_p if (p.get("ml_score") or 0) > 40)
# Détection favori de cote surpris par le ML
odds_fav = sorted(partants, key=lambda x: x.get("odds") or 999)
fav_odds = odds_fav[0].get("odds") or 999 if odds_fav else 999
fav_ml = (
odds_fav[0].get("ml_score") or odds_fav[0].get("prob_top1") or 0
if odds_fav
else 0
)
fav_surprise = fav_odds < 5 and fav_ml < 25 # favori de cote ignoré par le ML
# --- SAFE : domination claire ---
if top1_score >= 65 and gap_1_2 >= 20:
score = min(100, int(50 + gap_1_2 * 1.5))
return "safe", score
# --- TRAP : course très ouverte ou favori piégé ---
if fav_surprise:
return "trap", max(10, int(35 - (25 - fav_ml)))
if nb_dangerous >= 4 and top1_score < 70:
return "trap", max(10, int(40 - nb_dangerous * 2))
if gap_1_2 < 8 and top2_score > 45:
return "trap", max(15, int(30 + gap_1_2))
# --- NEUTRAL : cas intermédiaires ---
# score 35-64 selon l'avantage du leader
score = min(64, max(35, int(35 + gap_1_2 * 1.2)))
return "neutral", score
def table_exists(conn, table_name):
c = conn.execute(
"SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", (table_name,)
)
return c.fetchone() is not None
def load_ml_horses(conn, today):
course_info = {}
# Get course info
c = conn.execute(
"""
SELECT num_reunion, num_course, libelle, libelle_court, discipline, distance, heure_depart_str
FROM pmu_courses
WHERE date_programme = ?
ORDER BY num_reunion, num_course
""",
(today,),
)
for row in c.fetchall():
course_info[f"{row['num_reunion']}_{row['num_course']}"] = dict(row)
if table_exists(conn, "historical_data"):
c = conn.execute(
"""
SELECT DISTINCT p.horse_name, p.horse_number, p.odds,
h.age, h.sexe, h.nb_courses, h.nb_victoires, h.nb_places,
h.tx_victoire, h.tx_place, h.forme_recente, h.reduction_km,
h.gains_annee, h.cote_directe, h.distance, h.discipline,
h.avis_entraineur, h.oeilleres, h.deferre, h.nb_partants,
h.rang_cote, h.ratio_cote_field, h.musique
FROM predictions p
LEFT JOIN historical_data h ON h.horse_name = p.horse_name
WHERE p.date = ? AND p.source = 'canalturf_partants' AND p.odds > 0
""",
(today,),
)
horses = [dict(row) for row in c.fetchall()]
return today, horses, course_info
c = conn.execute(
"""
SELECT
p.date_programme AS date,
p.num_reunion,
p.num_course,
p.num_pmu AS horse_number,
p.nom AS horse_name,
p.age,
p.sexe,
p.musique,
p.nombre_courses AS nb_courses,
p.nombre_victoires AS nb_victoires,
p.nombre_places AS nb_places,
p.gains_annee_en_cours AS gains_annee,
COALESCE(p.cote_direct, 0) AS cote_directe,
COALESCE(c.distance, 0) AS distance,
COALESCE(c.discipline, 'PLAT') AS discipline,
COALESCE(c.nb_declares_partants, 0) AS nb_partants,
COALESCE(p.oeilleres, 'SANS_OEILLERES') AS oeilleres,
COALESCE(p.tx_victoire, 0) AS tx_victoire,
COALESCE(p.tx_place, 0) AS tx_place,
COALESCE(p.forme_recente, 0) AS forme_recente,
0 AS reduction_km,
'NEUTRE' AS avis_entraineur,
'NON' AS deferre,
0 AS rang_cote,
0 AS ratio_cote_field
FROM pmu_partants p
LEFT JOIN pmu_courses c
ON c.date_programme = p.date_programme
AND c.num_reunion = p.num_reunion
AND c.num_course = p.num_course
INNER JOIN pmu_reunions r
ON r.date_programme = p.date_programme
AND r.num_reunion = p.num_reunion
WHERE p.date_programme = ? AND r.pays_code = 'FRA'
ORDER BY p.num_reunion, p.num_course, p.num_pmu
""",
(today,),
)
horses = [dict(row) for row in c.fetchall()]
course_info = {}
if horses:
c = conn.execute(
"""
SELECT num_reunion, num_course, libelle, libelle_court, discipline, distance, heure_depart_str
FROM pmu_courses
WHERE date_programme = ?
ORDER BY num_reunion, num_course
""",
(today,),
)
for row in c.fetchall():
course_info[f"{row['num_reunion']}_{row['num_course']}"] = dict(row)
return today, horses, course_info
c = conn.execute("SELECT MAX(date_programme) FROM pmu_partants")
fallback_date = c.fetchone()[0]
if fallback_date:
c = conn.execute(
"""
SELECT
p.date_programme AS date,
p.num_reunion,
p.num_course,
p.num_pmu AS horse_number,
p.nom AS horse_name,
p.age,
p.sexe,
p.musique,
p.nombre_courses AS nb_courses,
p.nombre_victoires AS nb_victoires,
p.nombre_places AS nb_places,
p.gains_annee_en_cours AS gains_annee,
COALESCE(p.cote_direct, 0) AS cote_directe,
COALESCE(c.distance, 0) AS distance,
COALESCE(c.discipline, 'PLAT') AS discipline,
COALESCE(c.nb_declares_partants, 0) AS nb_partants,
COALESCE(p.oeilleres, 'SANS_OEILLERES') AS oeilleres,
COALESCE(p.tx_victoire, 0) AS tx_victoire,
COALESCE(p.tx_place, 0) AS tx_place,
COALESCE(p.forme_recente, 0) AS forme_recente,
0 AS reduction_km,
'NEUTRE' AS avis_entraineur,
'NON' AS deferre,
0 AS rang_cote,
0 AS ratio_cote_field
FROM pmu_partants p
LEFT JOIN pmu_courses c
ON c.date_programme = p.date_programme
AND c.num_reunion = p.num_reunion
AND c.num_course = p.num_course
WHERE p.date_programme = ?
ORDER BY p.num_reunion, p.num_course, p.num_pmu
""",
(fallback_date,),
)
return fallback_date, [dict(row) for row in c.fetchall()], {}
return today, [], {}
def enrich_ml_horses(horses):
races = {}
for horse in horses:
race_key = (
horse.get("date") or horse.get("date_programme"),
horse.get("num_reunion"),
horse.get("num_course"),
)
races.setdefault(race_key, []).append(horse)
for group in races.values():
odds_values = []
for horse in group:
raw_odds = horse.get("odds", horse.get("cote_directe", 0))
try:
odds = float(raw_odds or 0)
except (TypeError, ValueError):
odds = 0.0
horse["odds"] = odds
horse["cote_directe"] = float(horse.get("cote_directe", odds) or odds or 0)
if odds > 0:
odds_values.append(odds)
avg_odds = sum(odds_values) / len(odds_values) if odds_values else 0
ranked = sorted(
group, key=lambda h: h.get("odds", h.get("cote_directe", 0)) or 999999
)
for idx, horse in enumerate(ranked, start=1):
horse.setdefault("horse_number", horse.get("num_pmu"))
horse.setdefault("horse_name", horse.get("nom"))
horse.setdefault("age", 0)
horse.setdefault("sexe", "U")
horse.setdefault("nb_courses", 0)
horse.setdefault("nb_victoires", 0)
horse.setdefault("nb_places", 0)
horse.setdefault("tx_victoire", 0)
horse.setdefault("tx_place", 0)
horse.setdefault("forme_recente", 0)
horse.setdefault("reduction_km", 0)
horse.setdefault("gains_annee", 0)
horse.setdefault("distance", 0)
horse.setdefault("discipline", "PLAT")
horse.setdefault("avis_entraineur", "NEUTRE")
horse.setdefault("oeilleres", "SANS")
horse.setdefault("deferre", "NON")
horse.setdefault("nb_partants", len(group))
horse.setdefault("musique", "")
horse.setdefault("rang_cote", idx)
if not horse.get("ratio_cote_field"):
horse["ratio_cote_field"] = (
round(horse.get("odds", 0) / avg_odds, 3) if avg_odds > 0 else 0
)
return horses
def prepare_features_from_db(horse_data):
"""Convert database rows to ML features"""
df = pd.DataFrame([horse_data])
# Encode categorical
for col in ["discipline", "sexe", "avis_entraineur", "oeilleres", "deferre"]:
if col in df.columns:
df[col] = df[col].fillna("UNKNOWN")
return df
@app.route("/health")
@app.route("/turf/health")
def health():
"""Health check endpoint for Docker/load balancer. Returns 200 if app is running."""
import sqlite3 as _sqlite3
db_ok = True
try:
conn = _sqlite3.connect(
DB_FILE if "DB_FILE" in dir() else "/home/h3r7/turf_saas/turf_saas.db",
timeout=2,
)
conn.execute("SELECT 1")
conn.close()
except Exception:
db_ok = False
status = "ok" if db_ok else "degraded"
return {"status": status, "service": "dashboard-api", "db": db_ok}, (
200 if db_ok else 503
)
@app.route("/")
def index():
return send_file("/home/h3r7/turf_saas/dashboard.html")
@app.route("/turf/")
@app.route("/turf")
def turf_index():
return send_file("/home/h3r7/turf_saas/dashboard.html")
@app.route("/turf/<path:filename>")
def turf_static(filename):
return send_from_directory("/home/h3r7/turf_saas", filename)
@app.route("/api/today")
@app.route("/turf/api")
def api_today():
conn = get_db()
today = datetime.now().strftime("%Y-%m-%d")
race_filter = request.args.get("race", "")
data = {
"date": today,
"races": [],
"race": {},
"predictions": {},
"results": [],
"weather": {},
"scores": {},
}
# Construire la condition de filtre
if race_filter:
race_condition = "AND race_name = ?"
race_params = (race_filter,)
else:
race_condition = ""
race_params = ()
# Récupérer toutes les courses du jour
try:
query_params = (today,) + race_params if race_condition else (today,)
c = conn.execute(
f"""
SELECT DISTINCT race_name, race_hippodrome, race_time
FROM predictions
WHERE date=? AND source='canalturf_partants' {race_condition}
ORDER BY race_time ASC
""",
query_params,
)
races = c.fetchall()
data["races"] = [
{"name": r[0], "hippodrome": r[1], "time": r[2]} for r in races
]
if races:
data["race"] = {
"name": f"{races[0][1]} - {races[0][2]} {races[0][0]}",
"hippodrome": races[0][1] if len(races[0]) > 1 else "",
"time": races[0][2] if len(races[0]) > 2 else "",
}
except Exception as e:
print(f"Erreur races: {e}")
# Prédictions du jour — partants avec cotes uniquement
try:
if race_filter:
c = conn.execute(
"""
SELECT horse_name, horse_number, AVG(odds) as odds, prediction_rank, source, jockey
FROM predictions
WHERE date = ? AND source = 'canalturf_partants' AND odds > 0 AND race_name = ?
GROUP BY horse_name
ORDER BY odds ASC
""",
(today, race_filter),
)
else:
c = conn.execute(
"""
SELECT horse_name, horse_number, AVG(odds) as odds, prediction_rank, source, jockey
FROM predictions
WHERE date = ? AND source = 'canalturf_partants' AND odds > 0
GROUP BY horse_name
ORDER BY odds ASC
""",
(today,),
)
data["predictions"]["partants"] = [dict(row) for row in c.fetchall()]
except Exception as e:
print(f"Erreur partants: {e}")
data["predictions"]["partants"] = []
# Pronostic (bases, chances, outsiders)
for cat, src in [
("bases", "canalturf_prono_bases"),
("chances", "canalturf_prono_chances"),
("outsiders", "canalturf_prono_outsiders"),
]:
try:
if race_filter:
c = conn.execute(
"""
SELECT DISTINCT horse_name, horse_number, prediction_rank
FROM predictions WHERE date = ? AND source = ? AND race_name = ?
ORDER BY prediction_rank
""",
(today, src, race_filter),
)
else:
c = conn.execute(
"""
SELECT DISTINCT horse_name, horse_number, prediction_rank
FROM predictions WHERE date = ? AND source = ?
ORDER BY prediction_rank
""",
(today, src),
)
data["predictions"][cat] = [dict(row) for row in c.fetchall()]
except:
data["predictions"][cat] = []
# Résultats du jour
c = conn.execute(
"SELECT horse_name, position, odds FROM results WHERE date = ? ORDER BY position LIMIT 5",
(today,),
)
data["results"] = [dict(row) for row in c.fetchall()]
# Weather
c = conn.execute("SELECT * FROM weather ORDER BY id DESC LIMIT 1")
row = c.fetchone()
if row:
data["weather"] = dict(row)
partants_list = data.get("predictions", {}).get("partants", [])
if partants_list:
print("DEBUG: partants found")
risque_label, risque_course = calculate_risque(partants_list)
data["risque_label"] = risque_label
data["risque_course"] = risque_course
# Score hier
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
data["scores"]["date"] = yesterday
c = conn.execute(
"SELECT horse_name FROM results WHERE date = ? AND position <= 3", (yesterday,)
)
result_names = [r[0] for r in c.fetchall()]
c = conn.execute(
"SELECT DISTINCT horse_name FROM predictions WHERE date = ? AND source='canalturf_prono_bases'",
(yesterday,),
)
our_preds = [r[0] for r in c.fetchall()]
our_score = sum(1 for p in our_preds if p in result_names)
data["scores"]["bases"] = f"{our_score}/{len(our_preds)}" if our_preds else "-"
conn.close()
return jsonify(data)
@app.route("/api/odds_history")
@app.route("/turf/api/odds_history")
def api_odds_history():
conn = get_db()
today = datetime.now().strftime("%Y-%m-%d")
c = conn.execute(
"""
SELECT horse_name, horse_number, odds, scraped_at
FROM odds_history
WHERE date = ?
ORDER BY horse_name, scraped_at ASC
""",
(today,),
)
rows = c.fetchall()
conn.close()
horses = {}
for row in rows:
h = row["horse_name"]
if h not in horses:
horses[h] = {
"horse_name": h,
"horse_number": row["horse_number"],
"snapshots": [],
}
horses[h]["snapshots"].append(
{"odds": row["odds"], "time": row["scraped_at"][11:16]}
)
result = []
for h, data in horses.items():
snaps = data["snapshots"]
debut = snaps[0]["odds"] if snaps else 0
actuel = snaps[-1]["odds"] if snaps else 0
evol_pct = (
round(((actuel - debut) / debut) * 100, 1)
if debut > 0 and len(snaps) > 1
else 0
)
result.append(
{
"horse_name": h,
"horse_number": data["horse_number"],
"odds_debut": debut,
"odds_actuel": actuel,
"evol_pct": evol_pct,
"nb_snapshots": len(snaps),
"snapshots": snaps,
"tendance": "baisse"
if evol_pct < -5
else "hausse"
if evol_pct > 5
else "stable",
}
)
result.sort(key=lambda x: x["odds_actuel"])
return jsonify({"date": today, "horses": result})
@app.route("/api/weather")
def api_weather():
conn = get_db()
c = conn.execute("SELECT * FROM weather ORDER BY id DESC LIMIT 4")
weather = [dict(row) for row in c.fetchall()]
conn.close()
return jsonify(weather)
@app.route("/api/ml_predictions")
@app.route("/turf/api/ml_predictions")
def api_ml_predictions():
"""ML-powered predictions using XGBoost — cache BDD activé"""
if not ML_AVAILABLE:
return jsonify({"error": "ML libraries not available"})
conn = get_db()
today = datetime.now().strftime("%Y-%m-%d")
force_refresh = request.args.get("refresh", "0") == "1"
# --- LECTURE CACHE ---
if not force_refresh:
cached_preds, cached_courses = get_ml_from_cache(conn, today)
if cached_preds:
conn.close()
return jsonify(
{
"date": today,
"model_version": "xgboost_v1",
"predictions": cached_preds,
"courses": cached_courses,
"from_cache": True,
}
)
# --- CALCUL ML ---
models = load_models()
if not models or models is True:
conn.close()
return jsonify(
{
"error": "Models not loaded",
"message": "Run train_xgboost.py first to train the models",
}
)
date_used, horses, course_info = load_ml_horses(conn, today)
horses = enrich_ml_horses(horses)
if not horses:
conn.close()
return jsonify(
{
"date": date_used,
"predictions": [],
"message": "No predictions available",
}
)
# Use exact feature columns from training
feature_cols = [
"age",
"sexe_enc",
"nb_courses",
"nb_victoires",
"nb_places",
"tx_victoire",
"tx_place",
"forme_recente",
"reduction_km",
"gains_annee",
"cote_directe",
"distance",
"nb_partants",
"discipline_enc",
"avis_enc",
"oeilleres_enc",
"deferre_enc",
"form_1",
"form_2",
"form_3",
"form_4",
"form_5",
"form_avg",
"win_rate_adj",
"place_rate_adj",
"implied_prob",
"victories_per_race",
"places_per_race",
"earnings_per_race",
"age_win_interact",
"distance_cat",
"is_favorite",
"rang_cote",
"ratio_cote_field",
]
# Get all unique values for encoding
all_sexes = set(h.get("sexe", "U") or "U" for h in horses)
all_avis = set(h.get("avis_entraineur", "NEUTRE") or "NEUTRE" for h in horses)
all_oeilleres = set(h.get("oeilleres", "SANS") or "SANS" for h in horses)
all_deferre = set(h.get("deferre", "NON") or "NON" for h in horses)
all_discipline = set(h.get("discipline", "PLAT") or "PLAT" for h in horses)
le_sexe = LabelEncoder()
le_sexe.fit(list(all_sexes) + ["U"])
le_avis = LabelEncoder()
le_avis.fit(list(all_avis) + ["NEUTRE"])
le_oeilleres = LabelEncoder()
le_oeilleres.fit(list(all_oeilleres) + ["SANS"])
le_deferre = LabelEncoder()
le_deferre.fit(list(all_deferre) + ["NON"])
le_discipline = LabelEncoder()
le_discipline.fit(list(all_discipline) + ["PLAT"])
predictions = []
for horse in horses:
features = {}
# Numeric features
for col in [
"age",
"nb_courses",
"nb_victoires",
"nb_places",
"tx_victoire",
"tx_place",
"forme_recente",
"reduction_km",
"gains_annee",
"cote_directe",
"distance",
"nb_partants",
"rang_cote",
"ratio_cote_field",
]:
features[col] = float(horse.get(col, 0) or 0)
# Encoded categorical
features["sexe_enc"] = le_sexe.transform([horse.get("sexe", "U") or "U"])[0]
features["avis_enc"] = le_avis.transform(
[horse.get("avis_entraineur", "NEUTRE") or "NEUTRE"]
)[0]
features["oeilleres_enc"] = le_oeilleres.transform(
[horse.get("oeilleres", "SANS") or "SANS"]
)[0]
features["deferre_enc"] = le_deferre.transform(
[horse.get("deferre", "NON") or "NON"]
)[0]
features["discipline_enc"] = le_discipline.transform(
[horse.get("discipline", "PLAT") or "PLAT"]
)[0]
# Form features (parse from musique)
musique = horse.get("musique", "")
import re
form_nums = re.findall(r"\d+", str(musique))[:5]
for i, fn in enumerate(form_nums):
features[f"form_{i + 1}"] = float(fn)
for i in range(len(form_nums) + 1, 6):
features[f"form_{i}"] = 0.0
features["form_avg"] = sum(features[f"form_{i}"] for i in range(1, 6)) / 5
# Derived features
features["implied_prob"] = (
1 / features["cote_directe"] if features["cote_directe"] > 0 else 0
)
features["win_rate_adj"] = features["tx_victoire"] * np.log1p(
features["nb_courses"]
)
features["place_rate_adj"] = features["tx_place"] * np.log1p(
features["nb_courses"]
)
features["victories_per_race"] = features["nb_victoires"] / max(
features["nb_courses"], 1
)
features["places_per_race"] = features["nb_places"] / max(
features["nb_courses"], 1
)
features["earnings_per_race"] = features["gains_annee"] / max(
features["nb_courses"], 1
)
features["age_win_interact"] = features["age"] * features["tx_victoire"]
features["distance_cat"] = (
2.0
if 1500 < features["distance"] <= 2000
else (3.0 if 2000 < features["distance"] <= 2500 else 1.0)
)
features["is_favorite"] = 1 if features["cote_directe"] < 5 else 0
# Make prediction
try:
X = pd.DataFrame([features])[feature_cols]
X = X.fillna(0)
prob_top1 = float(models["model_top1"].predict_proba(X)[0][1])
prob_top3 = float(models["model_top3"].predict_proba(X)[0][1])
predictions.append(
{
"horse_name": horse["horse_name"],
"horse_number": horse["horse_number"],
"odds": float(horse["odds"]),
"prob_top1": round(prob_top1 * 100, 1),
"prob_top3": round(prob_top3 * 100, 1),
"ml_score": round((prob_top1 * 0.6 + prob_top3 * 0.4) * 100, 1),
"recommendation": "top1"
if prob_top1 > 0.15
else ("top3" if prob_top3 > 0.35 else "pass"),
"is_value_bet": 1
if (prob_top3 > 0.35 and float(horse.get("odds", 0)) > 10)
else 0,
"is_outlier": 1
if (
float(horse.get("odds", 0)) <= 5
and (prob_top1 < 0.1 and prob_top3 < 0.25)
)
else 0,
"num_reunion": horse.get("num_reunion"),
"num_course": horse.get("num_course"),
}
)
except Exception as e:
predictions.append(
{
"horse_name": horse["horse_name"],
"horse_number": horse["horse_number"],
"odds": horse["odds"],
"error": str(e),
}
)
# Sort by ML score
predictions.sort(key=lambda x: x.get("ml_score", 0), reverse=True)
# Add course info to predictions
for pred in predictions:
course_key = f"{pred.get('num_reunion')}_{pred.get('num_course')}"
if course_key in course_info:
cinfo = course_info[course_key]
pred["race_label"] = f"R{pred.get('num_reunion')}C{pred.get('num_course')}"
pred["race_name"] = cinfo.get("libelle", "")
pred["hippodrome"] = cinfo.get("libelle_court", "")
pred["discipline"] = cinfo.get("discipline", "")
pred["distance"] = cinfo.get("distance", 0)
pred["heure"] = cinfo.get("heure_depart_str", "")
# --- CALCUL RISQUE PAR COURSE + INJECTION DANS PREDICTIONS ---
from collections import defaultdict as _dd
_race_horses_ml = _dd(list)
for p in predictions:
key = (p.get("num_reunion"), p.get("num_course"))
_race_horses_ml[key].append(
{
"odds": p.get("odds", 999),
"ml_score": p.get("ml_score", 0),
"prob_top1": p.get("prob_top1", 0),
"prob_top3": p.get("prob_top3", 0),
}
)
_race_risque_map = {}
for key, partants in _race_horses_ml.items():
label, score = calculate_risque(partants)
_race_risque_map[key] = (label or "neutral", score or 50)
for p in predictions:
rkey = (p.get("num_reunion"), p.get("num_course"))
rl, rs = _race_risque_map.get(rkey, ("neutral", 50))
p["risque_label"] = rl
p["risque_score"] = rs
# --- SAUVEGARDE CACHE ---
try:
save_ml_to_cache(conn, today, predictions)
except Exception as e_cache:
pass # cache non bloquant
conn.close()
return jsonify(
{
"date": date_used,
"model_version": "xgboost_v1",
"predictions": predictions,
"courses": course_info,
"from_cache": False,
}
)
@app.route("/api/ml_predictions/refresh")
@app.route("/turf/api/ml_predictions/refresh")
def api_ml_predictions_refresh():
"""Force le recalcul des prédictions ML et met à jour le cache"""
conn = get_db()
today = datetime.now().strftime("%Y-%m-%d")
ensure_ml_cache_table(conn)
conn.execute("DELETE FROM ml_predictions_cache WHERE date = ?", (today,))
conn.commit()
conn.close()
# Déléguer au endpoint principal avec force_refresh
from flask import redirect, url_for
return redirect(url_for("api_ml_predictions") + "?refresh=1")
@app.route("/api/scoring")
@app.route("/turf/api/scoring")
def api_scoring():
"""Get scoring data for dashboard - today only, filtered by race if provided"""
race = request.args.get("race", "")
today = datetime.now().strftime("%Y-%m-%d")
conn = get_db()
query = """
SELECT date, race_name, horse_name, horse_number, score,
score_cote, score_forme, score_victoire, score_place,
cote, forme_recente, tx_victoire, tx_place,
rang_scoring, avis_entraineur, musique
FROM scoring
WHERE date = ?
"""
params = [today]
if race:
query += " AND race_name LIKE ?"
params.append(f"%{race}%")
query += " ORDER BY rang_scoring ASC"
c = conn.execute(query, params)
scores = [dict(row) for row in c.fetchall()]
conn.close()
return jsonify({"scores": scores, "recommendations": {}})
# === RAPPORTS AUTOMATISÉS ===
try:
from analytics_reports import (
get_daily_report,
get_weekly_report,
get_monthly_report,
)
HAS_ANALYTICS = True
except ImportError:
HAS_ANALYTICS = False
@app.route("/turf/api/report/daily")
def api_report_daily():
"""Rapport quotidien"""
if not HAS_ANALYTICS:
return jsonify({"error": "analytics module not available"}), 500
date = request.args.get("date")
return jsonify(get_daily_report(date))
@app.route("/turf/api/report/weekly")
def api_report_weekly():
"""Rapport hebdomadaire"""
if not HAS_ANALYTICS:
return jsonify({"error": "analytics module not available"}), 500
start_date = request.args.get("start")
end_date = request.args.get("end")
return jsonify(get_weekly_report(start_date, end_date))
@app.route("/turf/api/report/monthly")
def api_report_monthly():
"""Rapport mensuel"""
if not HAS_ANALYTICS:
return jsonify({"error": "analytics module not available"}), 500
year = request.args.get("year", type=int)
month = request.args.get("month", type=int)
return jsonify(get_monthly_report(year, month))
@app.route("/turf/api/suggestions")
def api_suggestions():
"""Suggestions de questions"""
conn = get_db()
c = conn.cursor()
suggestions = []
try:
c.execute(
"SELECT COUNT(*) as cnt FROM pmu_partants WHERE date_programme >= date('now', '-7 days')"
)
recent = c.fetchone()[0]
if recent > 0:
suggestions = [
"Quel est mon taux de réussite cette semaine?",
"Liste les 5 meilleurs jockeys",
"Quel est le ROI du mois?",
"Résultats d'hier",
"Programme du jour",
]
else:
suggestions = [
"Derniers gagnants",
"Meilleurs entraîneurs",
"Performances à Vincennes",
"Évolution des cotes",
]
except:
suggestions = [
"Quel est le taux de réussite des favoris?",
"Liste les meilleurs jockeys",
"Résultats d'hier",
]
finally:
conn.close()
return jsonify({"suggestions": suggestions})
@app.route("/turf/api/metrics/summary")
@app.route("/turf/api/metrics/summary/")
def metrics_summary():
days = min(max(request.args.get("days", 30, type=int), 1), 365)
try:
conn = get_db()
date_filter = f"-{int(days)} days"
cur = conn.execute(
"SELECT source, COUNT(*) as nb_courses, SUM(nb_predictions) as total_predictions, "
"SUM(nb_gagnants) as total_gagnants, SUM(nb_places) as total_places, SUM(nb_top5) as total_top5, "
"ROUND(AVG(taux_gagnant), 2) as moy_taux_gagnant, ROUND(AVG(taux_place), 2) as moy_taux_place, "
"ROUND(AVG(taux_top5), 2) as moy_taux_top5, ROUND(SUM(roi_sg_net), 3) as roi_sg_cumul, "
"ROUND(SUM(roi_sp_net), 3) as roi_sp_cumul, ROUND(AVG(ecart_rang_moyen), 2) as moy_ecart_rang, "
"SUM(quinte_5sur5) as nb_5sur5, SUM(quinte_4sur5) as nb_4sur5, SUM(quinte_3sur5) as nb_3sur5 "
"FROM prediction_metrics WHERE date >= date('now', ?) GROUP BY source ORDER BY moy_taux_place DESC",
(date_filter,),
)
cols = [d[0] for d in cur.description]
rows = [dict(zip(cols, row)) for row in cur.fetchall()]
conn.close()
return jsonify({"summary": rows})
except Exception as e:
return jsonify({"error": True, "message": str(e)})
@app.route("/turf/api/metrics/daily")
@app.route("/turf/api/metrics/daily/")
def metrics_daily():
days = min(max(request.args.get("days", 30, type=int), 1), 365)
try:
conn = get_db()
date_filter = f"-{int(days)} days"
cur = conn.execute(
"SELECT date, source, SUM(nb_predictions) as predictions, SUM(nb_gagnants) as gagnants, "
"SUM(nb_places) as places, SUM(nb_top5) as top5, ROUND(AVG(taux_gagnant), 2) as taux_gagnant, "
"ROUND(AVG(taux_place), 2) as taux_place, ROUND(AVG(roi_sg_net), 3) as roi_sg, "
"ROUND(AVG(roi_sp_net), 3) as roi_sp, SUM(quinte_5sur5) as quinte_5sur5, "
"SUM(quinte_4sur5) as quinte_4sur5 "
"FROM prediction_metrics WHERE date >= date('now', ?) GROUP BY date, source ORDER BY date DESC",
(date_filter,),
)
cols = [d[0] for d in cur.description]
rows = [dict(zip(cols, row)) for row in cur.fetchall()]
conn.close()
return jsonify({"daily": rows})
except Exception as e:
return jsonify({"error": True, "message": str(e)})
if __name__ == "__main__":
load_models()
app.run(host="0.0.0.0", port=8791, debug=False)