1160 lines
40 KiB
Python
Executable File
1160 lines
40 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
print("STARTING API...")
|
|
"""
|
|
API serve - Turf Dashboard Data
|
|
Includes ML predictions using XGBoost models
|
|
"""
|
|
|
|
from flask import Flask, jsonify, send_file, send_from_directory, request
|
|
import sqlite3
|
|
from datetime import datetime, timedelta
|
|
import pickle
|
|
import os
|
|
|
|
try:
|
|
import pandas as pd
|
|
import numpy as np
|
|
from sklearn.preprocessing import LabelEncoder
|
|
|
|
ML_AVAILABLE = True
|
|
except ImportError:
|
|
ML_AVAILABLE = False
|
|
pd = None
|
|
np = None
|
|
LabelEncoder = None
|
|
|
|
app = Flask(__name__)
|
|
DB_PATH = "/home/h3r7/turf_saas/turf_saas.db"
|
|
MODEL_PATH = "/home/h3r7/turf_saas/xgboost_models.pkl"
|
|
|
|
ml_models = None
|
|
|
|
|
|
def load_models():
|
|
"""Load XGBoost models"""
|
|
global ml_models
|
|
if ml_models is None and os.path.exists(MODEL_PATH):
|
|
try:
|
|
with open(MODEL_PATH, "rb") as f:
|
|
ml_models = pickle.load(f)
|
|
print("✅ XGBoost models loaded")
|
|
except Exception as e:
|
|
print(f"⚠️ Failed to load models: {e}")
|
|
ml_models = False
|
|
return ml_models
|
|
|
|
|
|
def get_db():
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
def ensure_ml_cache_table(conn):
|
|
"""Crée la table ml_predictions_cache si elle n'existe pas, et ajoute les colonnes manquantes"""
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS ml_predictions_cache (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
num_reunion INTEGER,
|
|
num_course INTEGER,
|
|
horse_name TEXT,
|
|
horse_number INTEGER,
|
|
odds REAL,
|
|
prob_top1 REAL,
|
|
prob_top3 REAL,
|
|
ml_score REAL,
|
|
recommendation TEXT,
|
|
is_value_bet INTEGER DEFAULT 0,
|
|
is_outlier INTEGER DEFAULT 0,
|
|
race_label TEXT,
|
|
race_name TEXT,
|
|
hippodrome TEXT,
|
|
discipline TEXT,
|
|
distance REAL,
|
|
heure TEXT,
|
|
risque_label TEXT DEFAULT 'neutral',
|
|
risque_score INTEGER DEFAULT 50,
|
|
model_version TEXT DEFAULT 'xgboost_v1',
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, num_reunion, num_course, horse_name)
|
|
)
|
|
""")
|
|
conn.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_ml_cache_date
|
|
ON ml_predictions_cache(date)
|
|
""")
|
|
# Migration : ajouter colonnes risque si table existante sans elles
|
|
try:
|
|
conn.execute("ALTER TABLE ml_predictions_cache ADD COLUMN risque_label TEXT DEFAULT 'neutral'")
|
|
except Exception:
|
|
pass
|
|
try:
|
|
conn.execute("ALTER TABLE ml_predictions_cache ADD COLUMN risque_score INTEGER DEFAULT 50")
|
|
except Exception:
|
|
pass
|
|
conn.commit()
|
|
|
|
|
|
def get_ml_from_cache(conn, date):
|
|
"""Lit les prédictions ML depuis le cache BDD. Retourne (predictions, course_info) ou (None, None)"""
|
|
ensure_ml_cache_table(conn)
|
|
cursor = conn.execute(
|
|
"""SELECT * FROM ml_predictions_cache WHERE date = ? ORDER BY ml_score DESC""",
|
|
(date,)
|
|
)
|
|
rows = cursor.fetchall()
|
|
if not rows:
|
|
return None, None
|
|
|
|
predictions = []
|
|
course_info = {}
|
|
for row in rows:
|
|
r = dict(row)
|
|
pred = {
|
|
"horse_name": r["horse_name"],
|
|
"horse_number": r["horse_number"],
|
|
"odds": r["odds"],
|
|
"prob_top1": r["prob_top1"],
|
|
"prob_top3": r["prob_top3"],
|
|
"ml_score": r["ml_score"],
|
|
"recommendation": r["recommendation"],
|
|
"is_value_bet": r["is_value_bet"],
|
|
"is_outlier": r["is_outlier"],
|
|
"num_reunion": r["num_reunion"],
|
|
"num_course": r["num_course"],
|
|
"race_label": r["race_label"],
|
|
"race_name": r["race_name"],
|
|
"hippodrome": r["hippodrome"],
|
|
"discipline": r["discipline"],
|
|
"distance": r["distance"],
|
|
"heure": r["heure"],
|
|
"risque_label": r["risque_label"] if "risque_label" in r.keys() else "neutral",
|
|
"risque_score": r["risque_score"] if "risque_score" in r.keys() else 50,
|
|
}
|
|
predictions.append(pred)
|
|
key = f"{r['num_reunion']}_{r['num_course']}"
|
|
if key not in course_info:
|
|
course_info[key] = {
|
|
"libelle": r["race_name"],
|
|
"libelle_court": r["hippodrome"],
|
|
"discipline": r["discipline"],
|
|
"distance": r["distance"],
|
|
"heure_depart_str": r["heure"],
|
|
}
|
|
return predictions, course_info
|
|
|
|
|
|
def save_ml_to_cache(conn, date, predictions, model_version="xgboost_v1"):
|
|
"""Sauvegarde les prédictions ML dans le cache BDD (INSERT OR REPLACE)"""
|
|
ensure_ml_cache_table(conn)
|
|
# Supprimer les anciennes entrées du jour pour permettre le refresh
|
|
conn.execute("DELETE FROM ml_predictions_cache WHERE date = ?", (date,))
|
|
# Calculer le risque par course (grouper les chevaux avec tous leurs scores ML)
|
|
from collections import defaultdict
|
|
race_horses = defaultdict(list)
|
|
for p in predictions:
|
|
key = (p.get("num_reunion"), p.get("num_course"))
|
|
race_horses[key].append({
|
|
"odds": p.get("odds", 999),
|
|
"ml_score": p.get("ml_score", 0),
|
|
"prob_top1":p.get("prob_top1", 0),
|
|
"prob_top3":p.get("prob_top3", 0),
|
|
})
|
|
|
|
race_risque = {}
|
|
for key, partants in race_horses.items():
|
|
label, score = calculate_risque(partants)
|
|
race_risque[key] = (label or "neutral", score or 50)
|
|
|
|
for p in predictions:
|
|
rkey = (p.get("num_reunion"), p.get("num_course"))
|
|
rl, rs = race_risque.get(rkey, ("neutral", 50))
|
|
conn.execute("""
|
|
INSERT INTO ml_predictions_cache
|
|
(date, num_reunion, num_course, horse_name, horse_number, odds,
|
|
prob_top1, prob_top3, ml_score, recommendation, is_value_bet, is_outlier,
|
|
race_label, race_name, hippodrome, discipline, distance, heure,
|
|
risque_label, risque_score, model_version)
|
|
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
|
""", (
|
|
date,
|
|
p.get("num_reunion"),
|
|
p.get("num_course"),
|
|
p.get("horse_name"),
|
|
p.get("horse_number"),
|
|
p.get("odds"),
|
|
p.get("prob_top1"),
|
|
p.get("prob_top3"),
|
|
p.get("ml_score"),
|
|
p.get("recommendation"),
|
|
p.get("is_value_bet", 0),
|
|
p.get("is_outlier", 0),
|
|
p.get("race_label"),
|
|
p.get("race_name"),
|
|
p.get("hippodrome"),
|
|
p.get("discipline"),
|
|
p.get("distance"),
|
|
p.get("heure"),
|
|
rl,
|
|
rs,
|
|
model_version,
|
|
))
|
|
conn.commit()
|
|
|
|
|
|
def calculate_risque(partants):
|
|
"""
|
|
Calcule le niveau de risque d'une course à partir des scores ML et des cotes.
|
|
|
|
Logique :
|
|
- SAFE (vert) : un favori ML domine clairement, écart > 25pts avec le 2e
|
|
- TRAP (rouge) : 3+ chevaux avec ml_score > 40 ET aucun ne dépasse 65
|
|
OU favori de cote < 5 avec prob_top1 < 20% (outsider ML)
|
|
- NEUTRAL (orange) : cas intermédiaires
|
|
|
|
Retourne (label, score) où score est une valeur 0-100 (100 = très sûr)
|
|
"""
|
|
if not partants:
|
|
return None, None
|
|
|
|
# Trier par ml_score desc (ou prob_top1 si ml_score absent)
|
|
sorted_p = sorted(partants, key=lambda x: x.get("ml_score") or x.get("prob_top1") or 0, reverse=True)
|
|
|
|
top1_score = sorted_p[0].get("ml_score") or sorted_p[0].get("prob_top1") or 0
|
|
top2_score = sorted_p[1].get("ml_score") or sorted_p[1].get("prob_top1") or 0 if len(sorted_p) > 1 else 0
|
|
top3_score = sorted_p[2].get("ml_score") or sorted_p[2].get("prob_top1") or 0 if len(sorted_p) > 2 else 0
|
|
|
|
gap_1_2 = top1_score - top2_score # écart entre 1er et 2e ML
|
|
gap_1_3 = top1_score - top3_score # écart entre 1er et 3e ML
|
|
|
|
# Nombre de concurrents avec ml_score > 40 (dangereux)
|
|
nb_dangerous = sum(1 for p in sorted_p if (p.get("ml_score") or 0) > 40)
|
|
|
|
# Détection favori de cote surpris par le ML
|
|
odds_fav = sorted(partants, key=lambda x: x.get("odds") or 999)
|
|
fav_odds = odds_fav[0].get("odds") or 999 if odds_fav else 999
|
|
fav_ml = odds_fav[0].get("ml_score") or odds_fav[0].get("prob_top1") or 0 if odds_fav else 0
|
|
fav_surprise = fav_odds < 5 and fav_ml < 25 # favori de cote ignoré par le ML
|
|
|
|
# --- SAFE : domination claire ---
|
|
if top1_score >= 65 and gap_1_2 >= 20:
|
|
score = min(100, int(50 + gap_1_2 * 1.5))
|
|
return "safe", score
|
|
|
|
# --- TRAP : course très ouverte ou favori piégé ---
|
|
if fav_surprise:
|
|
return "trap", max(10, int(35 - (25 - fav_ml)))
|
|
if nb_dangerous >= 4 and top1_score < 70:
|
|
return "trap", max(10, int(40 - nb_dangerous * 2))
|
|
if gap_1_2 < 8 and top2_score > 45:
|
|
return "trap", max(15, int(30 + gap_1_2))
|
|
|
|
# --- NEUTRAL : cas intermédiaires ---
|
|
# score 35-64 selon l'avantage du leader
|
|
score = min(64, max(35, int(35 + gap_1_2 * 1.2)))
|
|
return "neutral", score
|
|
|
|
|
|
|
|
def table_exists(conn, table_name):
|
|
c = conn.execute(
|
|
"SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", (table_name,)
|
|
)
|
|
return c.fetchone() is not None
|
|
|
|
|
|
def load_ml_horses(conn, today):
|
|
course_info = {}
|
|
|
|
# Get course info
|
|
c = conn.execute(
|
|
"""
|
|
SELECT num_reunion, num_course, libelle, libelle_court, discipline, distance, heure_depart_str
|
|
FROM pmu_courses
|
|
WHERE date_programme = ?
|
|
ORDER BY num_reunion, num_course
|
|
""",
|
|
(today,),
|
|
)
|
|
for row in c.fetchall():
|
|
course_info[f"{row['num_reunion']}_{row['num_course']}"] = dict(row)
|
|
|
|
if table_exists(conn, "historical_data"):
|
|
c = conn.execute(
|
|
"""
|
|
SELECT DISTINCT p.horse_name, p.horse_number, p.odds,
|
|
h.age, h.sexe, h.nb_courses, h.nb_victoires, h.nb_places,
|
|
h.tx_victoire, h.tx_place, h.forme_recente, h.reduction_km,
|
|
h.gains_annee, h.cote_directe, h.distance, h.discipline,
|
|
h.avis_entraineur, h.oeilleres, h.deferre, h.nb_partants,
|
|
h.rang_cote, h.ratio_cote_field, h.musique
|
|
FROM predictions p
|
|
LEFT JOIN historical_data h ON h.horse_name = p.horse_name
|
|
WHERE p.date = ? AND p.source = 'canalturf_partants' AND p.odds > 0
|
|
""",
|
|
(today,),
|
|
)
|
|
horses = [dict(row) for row in c.fetchall()]
|
|
return today, horses, course_info
|
|
|
|
c = conn.execute(
|
|
"""
|
|
SELECT
|
|
p.date_programme AS date,
|
|
p.num_reunion,
|
|
p.num_course,
|
|
p.num_pmu AS horse_number,
|
|
p.nom AS horse_name,
|
|
p.age,
|
|
p.sexe,
|
|
p.musique,
|
|
p.nombre_courses AS nb_courses,
|
|
p.nombre_victoires AS nb_victoires,
|
|
p.nombre_places AS nb_places,
|
|
p.gains_annee_en_cours AS gains_annee,
|
|
COALESCE(p.cote_direct, 0) AS cote_directe,
|
|
COALESCE(c.distance, 0) AS distance,
|
|
COALESCE(c.discipline, 'PLAT') AS discipline,
|
|
COALESCE(c.nb_declares_partants, 0) AS nb_partants,
|
|
COALESCE(p.oeilleres, 'SANS_OEILLERES') AS oeilleres,
|
|
COALESCE(p.tx_victoire, 0) AS tx_victoire,
|
|
COALESCE(p.tx_place, 0) AS tx_place,
|
|
COALESCE(p.forme_recente, 0) AS forme_recente,
|
|
0 AS reduction_km,
|
|
'NEUTRE' AS avis_entraineur,
|
|
'NON' AS deferre,
|
|
0 AS rang_cote,
|
|
0 AS ratio_cote_field
|
|
FROM pmu_partants p
|
|
LEFT JOIN pmu_courses c
|
|
ON c.date_programme = p.date_programme
|
|
AND c.num_reunion = p.num_reunion
|
|
AND c.num_course = p.num_course
|
|
INNER JOIN pmu_reunions r
|
|
ON r.date_programme = p.date_programme
|
|
AND r.num_reunion = p.num_reunion
|
|
WHERE p.date_programme = ? AND r.pays_code = 'FRA'
|
|
ORDER BY p.num_reunion, p.num_course, p.num_pmu
|
|
""",
|
|
(today,),
|
|
)
|
|
horses = [dict(row) for row in c.fetchall()]
|
|
course_info = {}
|
|
if horses:
|
|
c = conn.execute(
|
|
"""
|
|
SELECT num_reunion, num_course, libelle, libelle_court, discipline, distance, heure_depart_str
|
|
FROM pmu_courses
|
|
WHERE date_programme = ?
|
|
ORDER BY num_reunion, num_course
|
|
""",
|
|
(today,),
|
|
)
|
|
for row in c.fetchall():
|
|
course_info[f"{row['num_reunion']}_{row['num_course']}"] = dict(row)
|
|
return today, horses, course_info
|
|
|
|
c = conn.execute("SELECT MAX(date_programme) FROM pmu_partants")
|
|
fallback_date = c.fetchone()[0]
|
|
if fallback_date:
|
|
c = conn.execute(
|
|
"""
|
|
SELECT
|
|
p.date_programme AS date,
|
|
p.num_reunion,
|
|
p.num_course,
|
|
p.num_pmu AS horse_number,
|
|
p.nom AS horse_name,
|
|
p.age,
|
|
p.sexe,
|
|
p.musique,
|
|
p.nombre_courses AS nb_courses,
|
|
p.nombre_victoires AS nb_victoires,
|
|
p.nombre_places AS nb_places,
|
|
p.gains_annee_en_cours AS gains_annee,
|
|
COALESCE(p.cote_direct, 0) AS cote_directe,
|
|
COALESCE(c.distance, 0) AS distance,
|
|
COALESCE(c.discipline, 'PLAT') AS discipline,
|
|
COALESCE(c.nb_declares_partants, 0) AS nb_partants,
|
|
COALESCE(p.oeilleres, 'SANS_OEILLERES') AS oeilleres,
|
|
COALESCE(p.tx_victoire, 0) AS tx_victoire,
|
|
COALESCE(p.tx_place, 0) AS tx_place,
|
|
COALESCE(p.forme_recente, 0) AS forme_recente,
|
|
0 AS reduction_km,
|
|
'NEUTRE' AS avis_entraineur,
|
|
'NON' AS deferre,
|
|
0 AS rang_cote,
|
|
0 AS ratio_cote_field
|
|
FROM pmu_partants p
|
|
LEFT JOIN pmu_courses c
|
|
ON c.date_programme = p.date_programme
|
|
AND c.num_reunion = p.num_reunion
|
|
AND c.num_course = p.num_course
|
|
WHERE p.date_programme = ?
|
|
ORDER BY p.num_reunion, p.num_course, p.num_pmu
|
|
""",
|
|
(fallback_date,),
|
|
)
|
|
return fallback_date, [dict(row) for row in c.fetchall()], {}
|
|
|
|
return today, [], {}
|
|
|
|
|
|
def enrich_ml_horses(horses):
|
|
races = {}
|
|
for horse in horses:
|
|
race_key = (
|
|
horse.get("date") or horse.get("date_programme"),
|
|
horse.get("num_reunion"),
|
|
horse.get("num_course"),
|
|
)
|
|
races.setdefault(race_key, []).append(horse)
|
|
|
|
for group in races.values():
|
|
odds_values = []
|
|
for horse in group:
|
|
raw_odds = horse.get("odds", horse.get("cote_directe", 0))
|
|
try:
|
|
odds = float(raw_odds or 0)
|
|
except (TypeError, ValueError):
|
|
odds = 0.0
|
|
horse["odds"] = odds
|
|
horse["cote_directe"] = float(horse.get("cote_directe", odds) or odds or 0)
|
|
if odds > 0:
|
|
odds_values.append(odds)
|
|
|
|
avg_odds = sum(odds_values) / len(odds_values) if odds_values else 0
|
|
ranked = sorted(
|
|
group, key=lambda h: h.get("odds", h.get("cote_directe", 0)) or 999999
|
|
)
|
|
|
|
for idx, horse in enumerate(ranked, start=1):
|
|
horse.setdefault("horse_number", horse.get("num_pmu"))
|
|
horse.setdefault("horse_name", horse.get("nom"))
|
|
horse.setdefault("age", 0)
|
|
horse.setdefault("sexe", "U")
|
|
horse.setdefault("nb_courses", 0)
|
|
horse.setdefault("nb_victoires", 0)
|
|
horse.setdefault("nb_places", 0)
|
|
horse.setdefault("tx_victoire", 0)
|
|
horse.setdefault("tx_place", 0)
|
|
horse.setdefault("forme_recente", 0)
|
|
horse.setdefault("reduction_km", 0)
|
|
horse.setdefault("gains_annee", 0)
|
|
horse.setdefault("distance", 0)
|
|
horse.setdefault("discipline", "PLAT")
|
|
horse.setdefault("avis_entraineur", "NEUTRE")
|
|
horse.setdefault("oeilleres", "SANS")
|
|
horse.setdefault("deferre", "NON")
|
|
horse.setdefault("nb_partants", len(group))
|
|
horse.setdefault("musique", "")
|
|
horse.setdefault("rang_cote", idx)
|
|
if not horse.get("ratio_cote_field"):
|
|
horse["ratio_cote_field"] = (
|
|
round(horse.get("odds", 0) / avg_odds, 3) if avg_odds > 0 else 0
|
|
)
|
|
|
|
return horses
|
|
|
|
|
|
def prepare_features_from_db(horse_data):
|
|
"""Convert database rows to ML features"""
|
|
df = pd.DataFrame([horse_data])
|
|
|
|
# Encode categorical
|
|
for col in ["discipline", "sexe", "avis_entraineur", "oeilleres", "deferre"]:
|
|
if col in df.columns:
|
|
df[col] = df[col].fillna("UNKNOWN")
|
|
|
|
return df
|
|
|
|
|
|
@app.route("/")
|
|
def index():
|
|
return send_file("/home/h3r7/turf_saas/dashboard.html")
|
|
|
|
|
|
@app.route("/turf/")
|
|
@app.route("/turf")
|
|
def turf_index():
|
|
return send_file("/home/h3r7/turf_saas/dashboard.html")
|
|
|
|
|
|
@app.route("/turf/<path:filename>")
|
|
def turf_static(filename):
|
|
return send_from_directory("/home/h3r7/turf_saas", filename)
|
|
|
|
|
|
@app.route("/api/today")
|
|
@app.route("/turf/api")
|
|
def api_today():
|
|
conn = get_db()
|
|
today = datetime.now().strftime("%Y-%m-%d")
|
|
|
|
race_filter = request.args.get("race", "")
|
|
|
|
data = {
|
|
"date": today,
|
|
"races": [],
|
|
"race": {},
|
|
"predictions": {},
|
|
"results": [],
|
|
"weather": {},
|
|
"scores": {},
|
|
}
|
|
|
|
# Construire la condition de filtre
|
|
if race_filter:
|
|
race_condition = "AND race_name = ?"
|
|
race_params = (race_filter,)
|
|
else:
|
|
race_condition = ""
|
|
race_params = ()
|
|
|
|
# Récupérer toutes les courses du jour
|
|
try:
|
|
query_params = (today,) + race_params if race_condition else (today,)
|
|
c = conn.execute(
|
|
f"""
|
|
SELECT DISTINCT race_name, race_hippodrome, race_time
|
|
FROM predictions
|
|
WHERE date=? AND source='canalturf_partants' {race_condition}
|
|
ORDER BY race_time ASC
|
|
""",
|
|
query_params,
|
|
)
|
|
races = c.fetchall()
|
|
|
|
data["races"] = [
|
|
{"name": r[0], "hippodrome": r[1], "time": r[2]} for r in races
|
|
]
|
|
|
|
if races:
|
|
data["race"] = {
|
|
"name": f"{races[0][1]} - {races[0][2]} {races[0][0]}",
|
|
"hippodrome": races[0][1] if len(races[0]) > 1 else "",
|
|
"time": races[0][2] if len(races[0]) > 2 else "",
|
|
}
|
|
except Exception as e:
|
|
print(f"Erreur races: {e}")
|
|
|
|
# Prédictions du jour — partants avec cotes uniquement
|
|
try:
|
|
if race_filter:
|
|
c = conn.execute(
|
|
"""
|
|
SELECT horse_name, horse_number, AVG(odds) as odds, prediction_rank, source, jockey
|
|
FROM predictions
|
|
WHERE date = ? AND source = 'canalturf_partants' AND odds > 0 AND race_name = ?
|
|
GROUP BY horse_name
|
|
ORDER BY odds ASC
|
|
""",
|
|
(today, race_filter),
|
|
)
|
|
else:
|
|
c = conn.execute(
|
|
"""
|
|
SELECT horse_name, horse_number, AVG(odds) as odds, prediction_rank, source, jockey
|
|
FROM predictions
|
|
WHERE date = ? AND source = 'canalturf_partants' AND odds > 0
|
|
GROUP BY horse_name
|
|
ORDER BY odds ASC
|
|
""",
|
|
(today,),
|
|
)
|
|
data["predictions"]["partants"] = [dict(row) for row in c.fetchall()]
|
|
except Exception as e:
|
|
print(f"Erreur partants: {e}")
|
|
data["predictions"]["partants"] = []
|
|
|
|
# Pronostic (bases, chances, outsiders)
|
|
for cat, src in [
|
|
("bases", "canalturf_prono_bases"),
|
|
("chances", "canalturf_prono_chances"),
|
|
("outsiders", "canalturf_prono_outsiders"),
|
|
]:
|
|
try:
|
|
if race_filter:
|
|
c = conn.execute(
|
|
"""
|
|
SELECT DISTINCT horse_name, horse_number, prediction_rank
|
|
FROM predictions WHERE date = ? AND source = ? AND race_name = ?
|
|
ORDER BY prediction_rank
|
|
""",
|
|
(today, src, race_filter),
|
|
)
|
|
else:
|
|
c = conn.execute(
|
|
"""
|
|
SELECT DISTINCT horse_name, horse_number, prediction_rank
|
|
FROM predictions WHERE date = ? AND source = ?
|
|
ORDER BY prediction_rank
|
|
""",
|
|
(today, src),
|
|
)
|
|
data["predictions"][cat] = [dict(row) for row in c.fetchall()]
|
|
except:
|
|
data["predictions"][cat] = []
|
|
|
|
# Résultats du jour
|
|
c = conn.execute(
|
|
"SELECT horse_name, position, odds FROM results WHERE date = ? ORDER BY position LIMIT 5",
|
|
(today,),
|
|
)
|
|
data["results"] = [dict(row) for row in c.fetchall()]
|
|
|
|
# Weather
|
|
c = conn.execute("SELECT * FROM weather ORDER BY id DESC LIMIT 1")
|
|
row = c.fetchone()
|
|
if row:
|
|
data["weather"] = dict(row)
|
|
partants_list = data.get("predictions", {}).get("partants", [])
|
|
if partants_list:
|
|
print("DEBUG: partants found")
|
|
risque_label, risque_course = calculate_risque(partants_list)
|
|
data["risque_label"] = risque_label
|
|
data["risque_course"] = risque_course
|
|
|
|
# Score hier
|
|
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
|
|
data["scores"]["date"] = yesterday
|
|
c = conn.execute(
|
|
"SELECT horse_name FROM results WHERE date = ? AND position <= 3", (yesterday,)
|
|
)
|
|
result_names = [r[0] for r in c.fetchall()]
|
|
c = conn.execute(
|
|
"SELECT DISTINCT horse_name FROM predictions WHERE date = ? AND source='canalturf_prono_bases'",
|
|
(yesterday,),
|
|
)
|
|
our_preds = [r[0] for r in c.fetchall()]
|
|
our_score = sum(1 for p in our_preds if p in result_names)
|
|
data["scores"]["bases"] = f"{our_score}/{len(our_preds)}" if our_preds else "-"
|
|
|
|
conn.close()
|
|
return jsonify(data)
|
|
|
|
|
|
@app.route("/api/odds_history")
|
|
@app.route("/turf/api/odds_history")
|
|
def api_odds_history():
|
|
conn = get_db()
|
|
today = datetime.now().strftime("%Y-%m-%d")
|
|
|
|
c = conn.execute(
|
|
"""
|
|
SELECT horse_name, horse_number, odds, scraped_at
|
|
FROM odds_history
|
|
WHERE date = ?
|
|
ORDER BY horse_name, scraped_at ASC
|
|
""",
|
|
(today,),
|
|
)
|
|
rows = c.fetchall()
|
|
conn.close()
|
|
|
|
horses = {}
|
|
for row in rows:
|
|
h = row["horse_name"]
|
|
if h not in horses:
|
|
horses[h] = {
|
|
"horse_name": h,
|
|
"horse_number": row["horse_number"],
|
|
"snapshots": [],
|
|
}
|
|
horses[h]["snapshots"].append(
|
|
{"odds": row["odds"], "time": row["scraped_at"][11:16]}
|
|
)
|
|
|
|
result = []
|
|
for h, data in horses.items():
|
|
snaps = data["snapshots"]
|
|
debut = snaps[0]["odds"] if snaps else 0
|
|
actuel = snaps[-1]["odds"] if snaps else 0
|
|
evol_pct = (
|
|
round(((actuel - debut) / debut) * 100, 1)
|
|
if debut > 0 and len(snaps) > 1
|
|
else 0
|
|
)
|
|
result.append(
|
|
{
|
|
"horse_name": h,
|
|
"horse_number": data["horse_number"],
|
|
"odds_debut": debut,
|
|
"odds_actuel": actuel,
|
|
"evol_pct": evol_pct,
|
|
"nb_snapshots": len(snaps),
|
|
"snapshots": snaps,
|
|
"tendance": "baisse"
|
|
if evol_pct < -5
|
|
else "hausse"
|
|
if evol_pct > 5
|
|
else "stable",
|
|
}
|
|
)
|
|
|
|
result.sort(key=lambda x: x["odds_actuel"])
|
|
return jsonify({"date": today, "horses": result})
|
|
|
|
|
|
@app.route("/api/weather")
|
|
def api_weather():
|
|
conn = get_db()
|
|
c = conn.execute("SELECT * FROM weather ORDER BY id DESC LIMIT 4")
|
|
weather = [dict(row) for row in c.fetchall()]
|
|
conn.close()
|
|
return jsonify(weather)
|
|
|
|
|
|
@app.route("/api/ml_predictions")
|
|
@app.route("/turf/api/ml_predictions")
|
|
def api_ml_predictions():
|
|
"""ML-powered predictions using XGBoost — cache BDD activé"""
|
|
if not ML_AVAILABLE:
|
|
return jsonify({"error": "ML libraries not available"})
|
|
|
|
conn = get_db()
|
|
today = datetime.now().strftime("%Y-%m-%d")
|
|
force_refresh = request.args.get("refresh", "0") == "1"
|
|
|
|
# --- LECTURE CACHE ---
|
|
if not force_refresh:
|
|
cached_preds, cached_courses = get_ml_from_cache(conn, today)
|
|
if cached_preds:
|
|
conn.close()
|
|
return jsonify({
|
|
"date": today,
|
|
"model_version": "xgboost_v1",
|
|
"predictions": cached_preds,
|
|
"courses": cached_courses,
|
|
"from_cache": True,
|
|
})
|
|
|
|
# --- CALCUL ML ---
|
|
models = load_models()
|
|
|
|
if not models or models is True:
|
|
conn.close()
|
|
return jsonify(
|
|
{
|
|
"error": "Models not loaded",
|
|
"message": "Run train_xgboost.py first to train the models",
|
|
}
|
|
)
|
|
date_used, horses, course_info = load_ml_horses(conn, today)
|
|
horses = enrich_ml_horses(horses)
|
|
|
|
if not horses:
|
|
conn.close()
|
|
return jsonify(
|
|
{
|
|
"date": date_used,
|
|
"predictions": [],
|
|
"message": "No predictions available",
|
|
}
|
|
)
|
|
|
|
# Use exact feature columns from training
|
|
feature_cols = [
|
|
"age",
|
|
"sexe_enc",
|
|
"nb_courses",
|
|
"nb_victoires",
|
|
"nb_places",
|
|
"tx_victoire",
|
|
"tx_place",
|
|
"forme_recente",
|
|
"reduction_km",
|
|
"gains_annee",
|
|
"cote_directe",
|
|
"distance",
|
|
"nb_partants",
|
|
"discipline_enc",
|
|
"avis_enc",
|
|
"oeilleres_enc",
|
|
"deferre_enc",
|
|
"form_1",
|
|
"form_2",
|
|
"form_3",
|
|
"form_4",
|
|
"form_5",
|
|
"form_avg",
|
|
"win_rate_adj",
|
|
"place_rate_adj",
|
|
"implied_prob",
|
|
"victories_per_race",
|
|
"places_per_race",
|
|
"earnings_per_race",
|
|
"age_win_interact",
|
|
"distance_cat",
|
|
"is_favorite",
|
|
"rang_cote",
|
|
"ratio_cote_field",
|
|
]
|
|
|
|
# Get all unique values for encoding
|
|
all_sexes = set(h.get("sexe", "U") or "U" for h in horses)
|
|
all_avis = set(h.get("avis_entraineur", "NEUTRE") or "NEUTRE" for h in horses)
|
|
all_oeilleres = set(h.get("oeilleres", "SANS") or "SANS" for h in horses)
|
|
all_deferre = set(h.get("deferre", "NON") or "NON" for h in horses)
|
|
all_discipline = set(h.get("discipline", "PLAT") or "PLAT" for h in horses)
|
|
|
|
le_sexe = LabelEncoder()
|
|
le_sexe.fit(list(all_sexes) + ["U"])
|
|
le_avis = LabelEncoder()
|
|
le_avis.fit(list(all_avis) + ["NEUTRE"])
|
|
le_oeilleres = LabelEncoder()
|
|
le_oeilleres.fit(list(all_oeilleres) + ["SANS"])
|
|
le_deferre = LabelEncoder()
|
|
le_deferre.fit(list(all_deferre) + ["NON"])
|
|
le_discipline = LabelEncoder()
|
|
le_discipline.fit(list(all_discipline) + ["PLAT"])
|
|
|
|
predictions = []
|
|
|
|
for horse in horses:
|
|
features = {}
|
|
|
|
# Numeric features
|
|
for col in [
|
|
"age",
|
|
"nb_courses",
|
|
"nb_victoires",
|
|
"nb_places",
|
|
"tx_victoire",
|
|
"tx_place",
|
|
"forme_recente",
|
|
"reduction_km",
|
|
"gains_annee",
|
|
"cote_directe",
|
|
"distance",
|
|
"nb_partants",
|
|
"rang_cote",
|
|
"ratio_cote_field",
|
|
]:
|
|
features[col] = float(horse.get(col, 0) or 0)
|
|
|
|
# Encoded categorical
|
|
features["sexe_enc"] = le_sexe.transform([horse.get("sexe", "U") or "U"])[0]
|
|
features["avis_enc"] = le_avis.transform(
|
|
[horse.get("avis_entraineur", "NEUTRE") or "NEUTRE"]
|
|
)[0]
|
|
features["oeilleres_enc"] = le_oeilleres.transform(
|
|
[horse.get("oeilleres", "SANS") or "SANS"]
|
|
)[0]
|
|
features["deferre_enc"] = le_deferre.transform(
|
|
[horse.get("deferre", "NON") or "NON"]
|
|
)[0]
|
|
features["discipline_enc"] = le_discipline.transform(
|
|
[horse.get("discipline", "PLAT") or "PLAT"]
|
|
)[0]
|
|
|
|
# Form features (parse from musique)
|
|
musique = horse.get("musique", "")
|
|
import re
|
|
|
|
form_nums = re.findall(r"\d+", str(musique))[:5]
|
|
for i, fn in enumerate(form_nums):
|
|
features[f"form_{i + 1}"] = float(fn)
|
|
for i in range(len(form_nums) + 1, 6):
|
|
features[f"form_{i}"] = 0.0
|
|
features["form_avg"] = sum(features[f"form_{i}"] for i in range(1, 6)) / 5
|
|
|
|
# Derived features
|
|
features["implied_prob"] = (
|
|
1 / features["cote_directe"] if features["cote_directe"] > 0 else 0
|
|
)
|
|
features["win_rate_adj"] = features["tx_victoire"] * np.log1p(
|
|
features["nb_courses"]
|
|
)
|
|
features["place_rate_adj"] = features["tx_place"] * np.log1p(
|
|
features["nb_courses"]
|
|
)
|
|
features["victories_per_race"] = features["nb_victoires"] / max(
|
|
features["nb_courses"], 1
|
|
)
|
|
features["places_per_race"] = features["nb_places"] / max(
|
|
features["nb_courses"], 1
|
|
)
|
|
features["earnings_per_race"] = features["gains_annee"] / max(
|
|
features["nb_courses"], 1
|
|
)
|
|
features["age_win_interact"] = features["age"] * features["tx_victoire"]
|
|
features["distance_cat"] = (
|
|
2.0
|
|
if 1500 < features["distance"] <= 2000
|
|
else (3.0 if 2000 < features["distance"] <= 2500 else 1.0)
|
|
)
|
|
features["is_favorite"] = 1 if features["cote_directe"] < 5 else 0
|
|
|
|
# Make prediction
|
|
try:
|
|
X = pd.DataFrame([features])[feature_cols]
|
|
X = X.fillna(0)
|
|
|
|
prob_top1 = float(models["model_top1"].predict_proba(X)[0][1])
|
|
prob_top3 = float(models["model_top3"].predict_proba(X)[0][1])
|
|
|
|
predictions.append(
|
|
{
|
|
"horse_name": horse["horse_name"],
|
|
"horse_number": horse["horse_number"],
|
|
"odds": float(horse["odds"]),
|
|
"prob_top1": round(prob_top1 * 100, 1),
|
|
"prob_top3": round(prob_top3 * 100, 1),
|
|
"ml_score": round((prob_top1 * 0.6 + prob_top3 * 0.4) * 100, 1),
|
|
"recommendation": "top1"
|
|
if prob_top1 > 0.15
|
|
else ("top3" if prob_top3 > 0.35 else "pass"),
|
|
"is_value_bet": 1
|
|
if (prob_top3 > 0.35 and float(horse.get("odds", 0)) > 10)
|
|
else 0,
|
|
"is_outlier": 1
|
|
if (
|
|
float(horse.get("odds", 0)) <= 5
|
|
and (prob_top1 < 0.1 and prob_top3 < 0.25)
|
|
)
|
|
else 0,
|
|
"num_reunion": horse.get("num_reunion"),
|
|
"num_course": horse.get("num_course"),
|
|
}
|
|
)
|
|
except Exception as e:
|
|
predictions.append(
|
|
{
|
|
"horse_name": horse["horse_name"],
|
|
"horse_number": horse["horse_number"],
|
|
"odds": horse["odds"],
|
|
"error": str(e),
|
|
}
|
|
)
|
|
|
|
# Sort by ML score
|
|
predictions.sort(key=lambda x: x.get("ml_score", 0), reverse=True)
|
|
|
|
# Add course info to predictions
|
|
for pred in predictions:
|
|
course_key = f"{pred.get('num_reunion')}_{pred.get('num_course')}"
|
|
if course_key in course_info:
|
|
cinfo = course_info[course_key]
|
|
pred["race_label"] = f"R{pred.get('num_reunion')}C{pred.get('num_course')}"
|
|
pred["race_name"] = cinfo.get("libelle", "")
|
|
pred["hippodrome"] = cinfo.get("libelle_court", "")
|
|
pred["discipline"] = cinfo.get("discipline", "")
|
|
pred["distance"] = cinfo.get("distance", 0)
|
|
pred["heure"] = cinfo.get("heure_depart_str", "")
|
|
|
|
# --- CALCUL RISQUE PAR COURSE + INJECTION DANS PREDICTIONS ---
|
|
from collections import defaultdict as _dd
|
|
_race_horses_ml = _dd(list)
|
|
for p in predictions:
|
|
key = (p.get("num_reunion"), p.get("num_course"))
|
|
_race_horses_ml[key].append({
|
|
"odds": p.get("odds", 999),
|
|
"ml_score": p.get("ml_score", 0),
|
|
"prob_top1": p.get("prob_top1", 0),
|
|
"prob_top3": p.get("prob_top3", 0),
|
|
})
|
|
_race_risque_map = {}
|
|
for key, partants in _race_horses_ml.items():
|
|
label, score = calculate_risque(partants)
|
|
_race_risque_map[key] = (label or "neutral", score or 50)
|
|
for p in predictions:
|
|
rkey = (p.get("num_reunion"), p.get("num_course"))
|
|
rl, rs = _race_risque_map.get(rkey, ("neutral", 50))
|
|
p["risque_label"] = rl
|
|
p["risque_score"] = rs
|
|
|
|
# --- SAUVEGARDE CACHE ---
|
|
try:
|
|
save_ml_to_cache(conn, today, predictions)
|
|
except Exception as e_cache:
|
|
pass # cache non bloquant
|
|
|
|
conn.close()
|
|
|
|
return jsonify(
|
|
{
|
|
"date": date_used,
|
|
"model_version": "xgboost_v1",
|
|
"predictions": predictions,
|
|
"courses": course_info,
|
|
"from_cache": False,
|
|
}
|
|
)
|
|
|
|
|
|
@app.route("/api/ml_predictions/refresh")
|
|
@app.route("/turf/api/ml_predictions/refresh")
|
|
def api_ml_predictions_refresh():
|
|
"""Force le recalcul des prédictions ML et met à jour le cache"""
|
|
conn = get_db()
|
|
today = datetime.now().strftime("%Y-%m-%d")
|
|
ensure_ml_cache_table(conn)
|
|
conn.execute("DELETE FROM ml_predictions_cache WHERE date = ?", (today,))
|
|
conn.commit()
|
|
conn.close()
|
|
# Déléguer au endpoint principal avec force_refresh
|
|
from flask import redirect, url_for
|
|
return redirect(url_for("api_ml_predictions") + "?refresh=1")
|
|
|
|
|
|
@app.route("/api/scoring")
|
|
@app.route("/turf/api/scoring")
|
|
def api_scoring():
|
|
"""Get scoring data for dashboard - today only, filtered by race if provided"""
|
|
race = request.args.get("race", "")
|
|
today = datetime.now().strftime("%Y-%m-%d")
|
|
conn = get_db()
|
|
query = """
|
|
SELECT date, race_name, horse_name, horse_number, score,
|
|
score_cote, score_forme, score_victoire, score_place,
|
|
cote, forme_recente, tx_victoire, tx_place,
|
|
rang_scoring, avis_entraineur, musique
|
|
FROM scoring
|
|
WHERE date = ?
|
|
"""
|
|
params = [today]
|
|
if race:
|
|
query += " AND race_name LIKE ?"
|
|
params.append(f"%{race}%")
|
|
query += " ORDER BY rang_scoring ASC"
|
|
c = conn.execute(query, params)
|
|
scores = [dict(row) for row in c.fetchall()]
|
|
conn.close()
|
|
return jsonify({"scores": scores, "recommendations": {}})
|
|
|
|
|
|
# === RAPPORTS AUTOMATISÉS ===
|
|
try:
|
|
from analytics_reports import (
|
|
get_daily_report,
|
|
get_weekly_report,
|
|
get_monthly_report,
|
|
)
|
|
|
|
HAS_ANALYTICS = True
|
|
except ImportError:
|
|
HAS_ANALYTICS = False
|
|
|
|
|
|
@app.route("/turf/api/report/daily")
|
|
def api_report_daily():
|
|
"""Rapport quotidien"""
|
|
if not HAS_ANALYTICS:
|
|
return jsonify({"error": "analytics module not available"}), 500
|
|
date = request.args.get("date")
|
|
return jsonify(get_daily_report(date))
|
|
|
|
|
|
@app.route("/turf/api/report/weekly")
|
|
def api_report_weekly():
|
|
"""Rapport hebdomadaire"""
|
|
if not HAS_ANALYTICS:
|
|
return jsonify({"error": "analytics module not available"}), 500
|
|
start_date = request.args.get("start")
|
|
end_date = request.args.get("end")
|
|
return jsonify(get_weekly_report(start_date, end_date))
|
|
|
|
|
|
@app.route("/turf/api/report/monthly")
|
|
def api_report_monthly():
|
|
"""Rapport mensuel"""
|
|
if not HAS_ANALYTICS:
|
|
return jsonify({"error": "analytics module not available"}), 500
|
|
year = request.args.get("year", type=int)
|
|
month = request.args.get("month", type=int)
|
|
return jsonify(get_monthly_report(year, month))
|
|
|
|
|
|
@app.route("/turf/api/suggestions")
|
|
def api_suggestions():
|
|
"""Suggestions de questions"""
|
|
conn = get_db()
|
|
c = conn.cursor()
|
|
|
|
suggestions = []
|
|
try:
|
|
c.execute(
|
|
"SELECT COUNT(*) as cnt FROM pmu_partants WHERE date_programme >= date('now', '-7 days')"
|
|
)
|
|
recent = c.fetchone()[0]
|
|
|
|
if recent > 0:
|
|
suggestions = [
|
|
"Quel est mon taux de réussite cette semaine?",
|
|
"Liste les 5 meilleurs jockeys",
|
|
"Quel est le ROI du mois?",
|
|
"Résultats d'hier",
|
|
"Programme du jour",
|
|
]
|
|
else:
|
|
suggestions = [
|
|
"Derniers gagnants",
|
|
"Meilleurs entraîneurs",
|
|
"Performances à Vincennes",
|
|
"Évolution des cotes",
|
|
]
|
|
except:
|
|
suggestions = [
|
|
"Quel est le taux de réussite des favoris?",
|
|
"Liste les meilleurs jockeys",
|
|
"Résultats d'hier",
|
|
]
|
|
finally:
|
|
conn.close()
|
|
|
|
return jsonify({"suggestions": suggestions})
|
|
|
|
|
|
|
|
|
|
@app.route("/turf/api/metrics/summary")
|
|
@app.route("/turf/api/metrics/summary/")
|
|
def metrics_summary():
|
|
days = min(max(request.args.get("days", 30, type=int), 1), 365)
|
|
try:
|
|
conn = get_db()
|
|
date_filter = f"-{int(days)} days"
|
|
cur = conn.execute(
|
|
"SELECT source, COUNT(*) as nb_courses, SUM(nb_predictions) as total_predictions, "
|
|
"SUM(nb_gagnants) as total_gagnants, SUM(nb_places) as total_places, SUM(nb_top5) as total_top5, "
|
|
"ROUND(AVG(taux_gagnant), 2) as moy_taux_gagnant, ROUND(AVG(taux_place), 2) as moy_taux_place, "
|
|
"ROUND(AVG(taux_top5), 2) as moy_taux_top5, ROUND(SUM(roi_sg_net), 3) as roi_sg_cumul, "
|
|
"ROUND(SUM(roi_sp_net), 3) as roi_sp_cumul, ROUND(AVG(ecart_rang_moyen), 2) as moy_ecart_rang, "
|
|
"SUM(quinte_5sur5) as nb_5sur5, SUM(quinte_4sur5) as nb_4sur5, SUM(quinte_3sur5) as nb_3sur5 "
|
|
"FROM prediction_metrics WHERE date >= date('now', ?) GROUP BY source ORDER BY moy_taux_place DESC",
|
|
(date_filter,))
|
|
cols = [d[0] for d in cur.description]
|
|
rows = [dict(zip(cols, row)) for row in cur.fetchall()]
|
|
conn.close()
|
|
return jsonify({"summary": rows})
|
|
except Exception as e:
|
|
return jsonify({"error": True, "message": str(e)})
|
|
|
|
@app.route("/turf/api/metrics/daily")
|
|
@app.route("/turf/api/metrics/daily/")
|
|
def metrics_daily():
|
|
days = min(max(request.args.get("days", 30, type=int), 1), 365)
|
|
try:
|
|
conn = get_db()
|
|
date_filter = f"-{int(days)} days"
|
|
cur = conn.execute(
|
|
"SELECT date, source, SUM(nb_predictions) as predictions, SUM(nb_gagnants) as gagnants, "
|
|
"SUM(nb_places) as places, SUM(nb_top5) as top5, ROUND(AVG(taux_gagnant), 2) as taux_gagnant, "
|
|
"ROUND(AVG(taux_place), 2) as taux_place, ROUND(AVG(roi_sg_net), 3) as roi_sg, "
|
|
"ROUND(AVG(roi_sp_net), 3) as roi_sp, SUM(quinte_5sur5) as quinte_5sur5, "
|
|
"SUM(quinte_4sur5) as quinte_4sur5 "
|
|
"FROM prediction_metrics WHERE date >= date('now', ?) GROUP BY date, source ORDER BY date DESC",
|
|
(date_filter,))
|
|
cols = [d[0] for d in cur.description]
|
|
rows = [dict(zip(cols, row)) for row in cur.fetchall()]
|
|
conn.close()
|
|
return jsonify({"daily": rows})
|
|
except Exception as e:
|
|
return jsonify({"error": True, "message": str(e)})
|
|
|
|
if __name__ == "__main__":
|
|
load_models()
|
|
app.run(host="0.0.0.0", port=8791, debug=False)
|