421 lines
14 KiB
Python
Executable File
421 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
calculate_metrics.py - Calcul des métriques de performance prédictions vs résultats
|
|
|
|
Usage:
|
|
python3 calculate_metrics.py # Aujourd'hui
|
|
python3 calculate_metrics.py --date 2026-04-15 # Date spécifique
|
|
python3 calculate_metrics.py --yesterday # Hier
|
|
python3 calculate_metrics.py --backfill 30 # Remplir 30 derniers jours
|
|
|
|
Calculé après 21h (résultats PMU disponibles)
|
|
"""
|
|
|
|
import sqlite3
|
|
import json
|
|
import argparse
|
|
from datetime import datetime, timedelta
|
|
from collections import defaultdict
|
|
|
|
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
|
|
|
|
# =============================================================================
|
|
# SCHÉMA BASE DE DONNÉES
|
|
# =============================================================================
|
|
|
|
METRICS_SCHEMA = """
|
|
-- Table principale des métriques par course/source
|
|
CREATE TABLE IF NOT EXISTS prediction_metrics (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
race_time TEXT,
|
|
race_name TEXT,
|
|
race_hippodrome TEXT,
|
|
source TEXT NOT NULL,
|
|
discipline TEXT,
|
|
|
|
-- Comptages
|
|
nb_predictions INTEGER DEFAULT 0,
|
|
nb_gagnants INTEGER DEFAULT 0,
|
|
nb_places INTEGER DEFAULT 0,
|
|
nb_top5 INTEGER DEFAULT 0,
|
|
nb_hors_top5 INTEGER DEFAULT 0,
|
|
|
|
-- Taux
|
|
taux_gagnant REAL,
|
|
taux_place REAL,
|
|
taux_top5 REAL,
|
|
|
|
-- Rangs
|
|
rang_moyen REAL,
|
|
ecart_rang_moyen REAL,
|
|
|
|
-- ROI avec dividendes réels PMU
|
|
roi_sg_brut REAL,
|
|
roi_sg_net REAL,
|
|
roi_sp_brut REAL,
|
|
roi_sp_net REAL,
|
|
|
|
-- Quinté
|
|
quinte_5sur5 INTEGER DEFAULT 0,
|
|
quinte_4sur5 INTEGER DEFAULT 0,
|
|
quinte_3sur5 INTEGER DEFAULT 0,
|
|
quinte_2sur5 INTEGER DEFAULT 0,
|
|
|
|
-- Value
|
|
value_bet_score REAL,
|
|
top_cote_gagnante REAL,
|
|
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(date, race_time, source)
|
|
);
|
|
|
|
-- Vue détaillée des performances
|
|
CREATE VIEW IF NOT EXISTS v_predictions_performance AS
|
|
SELECT
|
|
pr.date,
|
|
pr.race_time,
|
|
pr.race_name,
|
|
pr.race_hippodrome,
|
|
pr.horse_number,
|
|
pr.horse_name,
|
|
pr.prediction_rank,
|
|
pr.odds AS cote_prediction,
|
|
pr.source,
|
|
pa.ordre_arrivee,
|
|
pa.cote_direct AS cote_finale,
|
|
pa.driver,
|
|
|
|
-- Indicateurs binaires
|
|
CASE WHEN pa.ordre_arrivee = 1 THEN 1 ELSE 0 END AS is_gagnant,
|
|
CASE WHEN pa.ordre_arrivee <= 3 THEN 1 ELSE 0 END AS is_place,
|
|
CASE WHEN pa.ordre_arrivee <= 5 THEN 1 ELSE 0 END AS is_top5,
|
|
|
|
-- Écarts
|
|
ABS(COALESCE(pr.prediction_rank, 99) - COALESCE(pa.ordre_arrivee, 99)) AS ecart_rang,
|
|
|
|
-- Value réalisée
|
|
CASE
|
|
WHEN pa.ordre_arrivee = 1 AND pa.cote_direct > pr.odds
|
|
THEN ROUND((pa.cote_direct - pr.odds) / pr.odds * 100, 1)
|
|
ELSE 0
|
|
END AS value_realized
|
|
|
|
FROM predictions pr
|
|
LEFT JOIN pmu_partants pa
|
|
ON pa.date_programme = pr.date
|
|
AND pa.nom = pr.horse_name;
|
|
|
|
-- Vue résumé par source (30 jours glissants)
|
|
CREATE VIEW IF NOT EXISTS v_metrics_summary_30d AS
|
|
SELECT
|
|
source,
|
|
COUNT(*) as nb_courses,
|
|
SUM(nb_predictions) as total_predictions,
|
|
SUM(nb_gagnants) as total_gagnants,
|
|
SUM(nb_places) as total_places,
|
|
SUM(nb_top5) as total_top5,
|
|
ROUND(AVG(taux_gagnant), 2) as moy_taux_gagnant,
|
|
ROUND(AVG(taux_place), 2) as moy_taux_place,
|
|
ROUND(AVG(taux_top5), 2) as moy_taux_top5,
|
|
ROUND(AVG(roi_sg_net), 3) as moy_roi_sg,
|
|
ROUND(AVG(roi_sp_net), 3) as moy_roi_sp,
|
|
ROUND(AVG(ecart_rang_moyen), 2) as moy_ecart_rang,
|
|
SUM(quinte_5sur5) as nb_5sur5,
|
|
SUM(quinte_4sur5) as nb_4sur5,
|
|
SUM(quinte_3sur5) as nb_3sur5,
|
|
ROUND(SUM(quinte_5sur5) * 100.0 / NULLIF(COUNT(*), 0), 1) as pct_5sur5,
|
|
ROUND(SUM(quinte_4sur5) * 100.0 / NULLIF(COUNT(*), 0), 1) as pct_4sur5
|
|
FROM prediction_metrics
|
|
WHERE date >= date('now', '-30 days')
|
|
GROUP BY source
|
|
ORDER BY moy_taux_place DESC;
|
|
|
|
-- Vue évolution quotidienne
|
|
CREATE VIEW IF NOT EXISTS v_metrics_daily AS
|
|
SELECT
|
|
date,
|
|
source,
|
|
SUM(nb_predictions) as predictions,
|
|
SUM(nb_gagnants) as gagnants,
|
|
SUM(nb_places) as places,
|
|
SUM(nb_top5) as top5,
|
|
ROUND(AVG(taux_gagnant), 2) as taux_gagnant,
|
|
ROUND(AVG(taux_place), 2) as taux_place,
|
|
ROUND(AVG(roi_sg_net), 3) as roi_sg,
|
|
ROUND(AVG(roi_sp_net), 3) as roi_sp,
|
|
SUM(quinte_5sur5) as quinte_5sur5,
|
|
SUM(quinte_4sur5) as quinte_4sur5
|
|
FROM prediction_metrics
|
|
GROUP BY date, source
|
|
ORDER BY date DESC;
|
|
"""
|
|
|
|
# =============================================================================
|
|
# FONCTIONS UTILITAIRES
|
|
# =============================================================================
|
|
|
|
def get_db():
|
|
"""Connexion à la base de données"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
def init_db():
|
|
"""Initialise les tables et vues"""
|
|
conn = get_db()
|
|
conn.executescript(METRICS_SCHEMA)
|
|
conn.commit()
|
|
conn.close()
|
|
print("✅ Tables et vues initialisées")
|
|
|
|
def get_dividende_sg(conn, date, num_reunion, num_course, horse_number):
|
|
"""Récupère le dividende Simple Gagnant pour un cheval"""
|
|
try:
|
|
row = conn.execute("""
|
|
SELECT dividende_euro
|
|
FROM pmu_rapports
|
|
WHERE date_programme = ?
|
|
AND num_reunion = ?
|
|
AND num_course = ?
|
|
AND type_pari = 'SIMPLE_GAGNANT'
|
|
AND combinaison = ?
|
|
""", (date, num_reunion, num_course, str(horse_number))).fetchone()
|
|
return row['dividende_euro'] if row else None
|
|
except:
|
|
return None
|
|
|
|
def get_dividende_sp(conn, date, num_reunion, num_course, horse_number):
|
|
"""Récupère le dividende Simple Placé pour un cheval"""
|
|
try:
|
|
row = conn.execute("""
|
|
SELECT dividende_euro
|
|
FROM pmu_rapports
|
|
WHERE date_programme = ?
|
|
AND num_reunion = ?
|
|
AND num_course = ?
|
|
AND type_pari = 'SIMPLE_PLACE'
|
|
AND combinaison = ?
|
|
""", (date, num_reunion, num_course, str(horse_number))).fetchone()
|
|
return row['dividende_euro'] if row else None
|
|
except:
|
|
return None
|
|
|
|
# =============================================================================
|
|
# CALCUL DES MÉTRIQUES
|
|
# =============================================================================
|
|
|
|
def calculate_course_metrics(conn, date, race_time, race_name, source):
|
|
"""Calcule les métriques pour une course/source donnée"""
|
|
|
|
# Récupérer les prédictions pour cette course/source
|
|
preds = conn.execute("""
|
|
SELECT
|
|
pr.horse_number,
|
|
pr.horse_name,
|
|
pr.prediction_rank,
|
|
pr.odds,
|
|
pa.ordre_arrivee,
|
|
pa.cote_direct,
|
|
pa.num_reunion,
|
|
pa.num_course
|
|
FROM predictions pr
|
|
LEFT JOIN pmu_partants pa
|
|
ON pa.date_programme = pr.date
|
|
AND pa.nom = pr.horse_name
|
|
WHERE pr.date = ?
|
|
AND pr.race_time = ?
|
|
AND pr.source = ?
|
|
""", (date, race_time, source)).fetchall()
|
|
|
|
if not preds:
|
|
return
|
|
|
|
# Métadonnées
|
|
first_pred = preds[0]
|
|
hippodrome = conn.execute("""
|
|
SELECT race_hippodrome FROM predictions
|
|
WHERE date = ? AND race_time = ?
|
|
LIMIT 1
|
|
""", (date, race_time)).fetchone()
|
|
|
|
race_hippodrome = hippodrome['race_hippodrome'] if hippodrome else None
|
|
num_reunion = first_pred['num_reunion'] if first_pred['num_reunion'] else None
|
|
num_course = first_pred['num_course'] if first_pred['num_course'] else None
|
|
|
|
# Récupérer discipline depuis pmu_courses
|
|
discipline = None
|
|
if num_reunion and num_course:
|
|
disc_row = conn.execute("""
|
|
SELECT discipline FROM pmu_courses
|
|
WHERE date_programme = ? AND num_reunion = ? AND num_course = ?
|
|
""", (date, num_reunion, num_course)).fetchone()
|
|
discipline = disc_row['discipline'] if disc_row else None
|
|
|
|
# Comptages
|
|
nb_predictions = len(preds)
|
|
nb_gagnants = sum(1 for p in preds if p['ordre_arrivee'] == 1)
|
|
nb_places = sum(1 for p in preds if p['ordre_arrivee'] and p['ordre_arrivee'] <= 3)
|
|
nb_top5 = sum(1 for p in preds if p['ordre_arrivee'] and p['ordre_arrivee'] <= 5)
|
|
nb_hors_top5 = nb_predictions - nb_top5
|
|
|
|
# Taux
|
|
taux_gagnant = round(nb_gagnants / nb_predictions * 100, 2) if nb_predictions > 0 else 0
|
|
taux_place = round(nb_places / nb_predictions * 100, 2) if nb_predictions > 0 else 0
|
|
taux_top5 = round(nb_top5 / nb_predictions * 100, 2) if nb_predictions > 0 else 0
|
|
|
|
# Rang moyen
|
|
rangs = [p['ordre_arrivee'] for p in preds if p['ordre_arrivee']]
|
|
rang_moyen = round(sum(rangs) / len(rangs), 2) if rangs else None
|
|
|
|
# Écart rang moyen
|
|
ecarts = [abs((p['prediction_rank'] or 99) - (p['ordre_arrivee'] or 99)) for p in preds]
|
|
ecart_rang_moyen = round(sum(ecarts) / len(ecarts), 2) if ecarts else None
|
|
|
|
# ROI avec dividendes réels
|
|
roi_sg_values = []
|
|
roi_sp_values = []
|
|
|
|
for p in preds:
|
|
if p['ordre_arrivee'] == 1 and num_reunion and num_course:
|
|
div_sg = get_dividende_sg(conn, date, num_reunion, num_course, p['horse_number'])
|
|
if div_sg and div_sg > 0:
|
|
roi_sg_values.append(div_sg - 1)
|
|
else:
|
|
roi_sg_values.append(-1)
|
|
elif p['ordre_arrivee'] and p['ordre_arrivee'] > 1:
|
|
roi_sg_values.append(-1)
|
|
|
|
if p['ordre_arrivee'] and p['ordre_arrivee'] <= 3 and num_reunion and num_course:
|
|
div_sp = get_dividende_sp(conn, date, num_reunion, num_course, p['horse_number'])
|
|
if div_sp and div_sp > 0:
|
|
roi_sp_values.append(div_sp - 1)
|
|
else:
|
|
roi_sp_values.append(-1)
|
|
elif p['ordre_arrivee'] and p['ordre_arrivee'] > 3:
|
|
roi_sp_values.append(-1)
|
|
|
|
roi_sg_brut = sum(roi_sg_values) if roi_sg_values else 0
|
|
roi_sg_net = round(roi_sg_brut / len(roi_sg_values), 3) if roi_sg_values else 0
|
|
roi_sp_brut = sum(roi_sp_values) if roi_sp_values else 0
|
|
roi_sp_net = round(roi_sp_brut / len(roi_sp_values), 3) if roi_sp_values else 0
|
|
|
|
# Quinté (5 chevaux dans le top 5)
|
|
quinte_5sur5 = 1 if nb_top5 >= 5 else 0
|
|
quinte_4sur5 = 1 if nb_top5 >= 4 else 0
|
|
quinte_3sur5 = 1 if nb_top5 >= 3 else 0
|
|
quinte_2sur5 = 1 if nb_top5 >= 2 else 0
|
|
|
|
# Value bet score
|
|
value_scores = []
|
|
for p in preds:
|
|
if p['ordre_arrivee'] == 1 and p['cote_direct'] and p['odds']:
|
|
value = (p['cote_direct'] - p['odds']) / p['odds'] * 100
|
|
value_scores.append(value)
|
|
value_bet_score = round(sum(value_scores) / len(value_scores), 2) if value_scores else 0
|
|
|
|
# Top cote gagnante
|
|
top_cotes = [p['cote_direct'] for p in preds if p['ordre_arrivee'] == 1 and p['cote_direct']]
|
|
top_cote_gagnante = max(top_cotes) if top_cotes else None
|
|
|
|
# Insérer ou mettre à jour
|
|
conn.execute("""
|
|
INSERT OR REPLACE INTO prediction_metrics (
|
|
date, race_time, race_name, race_hippodrome, source, discipline,
|
|
nb_predictions, nb_gagnants, nb_places, nb_top5, nb_hors_top5,
|
|
taux_gagnant, taux_place, taux_top5,
|
|
rang_moyen, ecart_rang_moyen,
|
|
roi_sg_brut, roi_sg_net, roi_sp_brut, roi_sp_net,
|
|
quinte_5sur5, quinte_4sur5, quinte_3sur5, quinte_2sur5,
|
|
value_bet_score, top_cote_gagnante
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
date, race_time, race_name, race_hippodrome, source, discipline,
|
|
nb_predictions, nb_gagnants, nb_places, nb_top5, nb_hors_top5,
|
|
taux_gagnant, taux_place, taux_top5,
|
|
rang_moyen, ecart_rang_moyen,
|
|
roi_sg_brut, roi_sg_net, roi_sp_brut, roi_sp_net,
|
|
quinte_5sur5, quinte_4sur5, quinte_3sur5, quinte_2sur5,
|
|
value_bet_score, top_cote_gagnante
|
|
))
|
|
|
|
def calculate_metrics(date_str):
|
|
"""Calcule les métriques pour une date donnée"""
|
|
init_db()
|
|
conn = get_db()
|
|
|
|
# Récupérer les courses avec prédictions ET résultats
|
|
courses = conn.execute("""
|
|
SELECT DISTINCT
|
|
pr.date,
|
|
pr.race_time,
|
|
pr.race_name
|
|
FROM predictions pr
|
|
JOIN pmu_partants pa
|
|
ON pa.date_programme = pr.date
|
|
AND pa.nom = pr.horse_name
|
|
WHERE pr.date = ?
|
|
AND pa.ordre_arrivee IS NOT NULL
|
|
""", (date_str,)).fetchall()
|
|
|
|
if not courses:
|
|
print(f"⚠️ Aucune course avec résultats pour {date_str}")
|
|
conn.close()
|
|
return
|
|
|
|
sources = [
|
|
'canalturf_selections',
|
|
'canalturf_prono_bases',
|
|
'canalturf_prono_chances',
|
|
'canalturf_prono_outsiders',
|
|
'canalturf_partants'
|
|
]
|
|
|
|
total_calculated = 0
|
|
for course in courses:
|
|
for source in sources:
|
|
try:
|
|
calculate_course_metrics(conn, course['date'], course['race_time'], course['race_name'], source)
|
|
total_calculated += 1
|
|
except Exception as e:
|
|
print(f"⚠️ Erreur {course['race_time']} {source}: {e}")
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
print(f"✅ Métriques calculées pour {date_str}: {total_calculated} combinaisons course/source")
|
|
|
|
def backfill_metrics(days=30):
|
|
"""Remplit les métriques sur plusieurs jours"""
|
|
print(f"📊 Backfill sur {days} jours...")
|
|
for i in range(days):
|
|
date = (datetime.now() - timedelta(days=i)).strftime('%Y-%m-%d')
|
|
print(f" → {date}")
|
|
try:
|
|
calculate_metrics(date)
|
|
except Exception as e:
|
|
print(f" ⚠️ Erreur: {e}")
|
|
print("✅ Backfill terminé")
|
|
|
|
# =============================================================================
|
|
# POINT D'ENTRÉE
|
|
# =============================================================================
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Calcul des métriques de performance")
|
|
parser.add_argument("--date", "-d", help="Date YYYY-MM-DD")
|
|
parser.add_argument("--yesterday", "-y", action="store_true", help="Calculer hier")
|
|
parser.add_argument("--backfill", "-b", type=int, help="Remplir N derniers jours")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.backfill:
|
|
backfill_metrics(args.backfill)
|
|
elif args.yesterday:
|
|
date_str = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
|
|
calculate_metrics(date_str)
|
|
elif args.date:
|
|
calculate_metrics(args.date)
|
|
else:
|
|
calculate_metrics(datetime.now().strftime('%Y-%m-%d'))
|