Files
turf_saas/calculate_metrics.py
2026-04-25 17:18:43 +02:00

421 lines
14 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
calculate_metrics.py - Calcul des métriques de performance prédictions vs résultats
Usage:
python3 calculate_metrics.py # Aujourd'hui
python3 calculate_metrics.py --date 2026-04-15 # Date spécifique
python3 calculate_metrics.py --yesterday # Hier
python3 calculate_metrics.py --backfill 30 # Remplir 30 derniers jours
Calculé après 21h (résultats PMU disponibles)
"""
import sqlite3
import json
import argparse
from datetime import datetime, timedelta
from collections import defaultdict
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
# =============================================================================
# SCHÉMA BASE DE DONNÉES
# =============================================================================
METRICS_SCHEMA = """
-- Table principale des métriques par course/source
CREATE TABLE IF NOT EXISTS prediction_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date TEXT NOT NULL,
race_time TEXT,
race_name TEXT,
race_hippodrome TEXT,
source TEXT NOT NULL,
discipline TEXT,
-- Comptages
nb_predictions INTEGER DEFAULT 0,
nb_gagnants INTEGER DEFAULT 0,
nb_places INTEGER DEFAULT 0,
nb_top5 INTEGER DEFAULT 0,
nb_hors_top5 INTEGER DEFAULT 0,
-- Taux
taux_gagnant REAL,
taux_place REAL,
taux_top5 REAL,
-- Rangs
rang_moyen REAL,
ecart_rang_moyen REAL,
-- ROI avec dividendes réels PMU
roi_sg_brut REAL,
roi_sg_net REAL,
roi_sp_brut REAL,
roi_sp_net REAL,
-- Quinté
quinte_5sur5 INTEGER DEFAULT 0,
quinte_4sur5 INTEGER DEFAULT 0,
quinte_3sur5 INTEGER DEFAULT 0,
quinte_2sur5 INTEGER DEFAULT 0,
-- Value
value_bet_score REAL,
top_cote_gagnante REAL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(date, race_time, source)
);
-- Vue détaillée des performances
CREATE VIEW IF NOT EXISTS v_predictions_performance AS
SELECT
pr.date,
pr.race_time,
pr.race_name,
pr.race_hippodrome,
pr.horse_number,
pr.horse_name,
pr.prediction_rank,
pr.odds AS cote_prediction,
pr.source,
pa.ordre_arrivee,
pa.cote_direct AS cote_finale,
pa.driver,
-- Indicateurs binaires
CASE WHEN pa.ordre_arrivee = 1 THEN 1 ELSE 0 END AS is_gagnant,
CASE WHEN pa.ordre_arrivee <= 3 THEN 1 ELSE 0 END AS is_place,
CASE WHEN pa.ordre_arrivee <= 5 THEN 1 ELSE 0 END AS is_top5,
-- Écarts
ABS(COALESCE(pr.prediction_rank, 99) - COALESCE(pa.ordre_arrivee, 99)) AS ecart_rang,
-- Value réalisée
CASE
WHEN pa.ordre_arrivee = 1 AND pa.cote_direct > pr.odds
THEN ROUND((pa.cote_direct - pr.odds) / pr.odds * 100, 1)
ELSE 0
END AS value_realized
FROM predictions pr
LEFT JOIN pmu_partants pa
ON pa.date_programme = pr.date
AND pa.nom = pr.horse_name;
-- Vue résumé par source (30 jours glissants)
CREATE VIEW IF NOT EXISTS v_metrics_summary_30d AS
SELECT
source,
COUNT(*) as nb_courses,
SUM(nb_predictions) as total_predictions,
SUM(nb_gagnants) as total_gagnants,
SUM(nb_places) as total_places,
SUM(nb_top5) as total_top5,
ROUND(AVG(taux_gagnant), 2) as moy_taux_gagnant,
ROUND(AVG(taux_place), 2) as moy_taux_place,
ROUND(AVG(taux_top5), 2) as moy_taux_top5,
ROUND(AVG(roi_sg_net), 3) as moy_roi_sg,
ROUND(AVG(roi_sp_net), 3) as moy_roi_sp,
ROUND(AVG(ecart_rang_moyen), 2) as moy_ecart_rang,
SUM(quinte_5sur5) as nb_5sur5,
SUM(quinte_4sur5) as nb_4sur5,
SUM(quinte_3sur5) as nb_3sur5,
ROUND(SUM(quinte_5sur5) * 100.0 / NULLIF(COUNT(*), 0), 1) as pct_5sur5,
ROUND(SUM(quinte_4sur5) * 100.0 / NULLIF(COUNT(*), 0), 1) as pct_4sur5
FROM prediction_metrics
WHERE date >= date('now', '-30 days')
GROUP BY source
ORDER BY moy_taux_place DESC;
-- Vue évolution quotidienne
CREATE VIEW IF NOT EXISTS v_metrics_daily AS
SELECT
date,
source,
SUM(nb_predictions) as predictions,
SUM(nb_gagnants) as gagnants,
SUM(nb_places) as places,
SUM(nb_top5) as top5,
ROUND(AVG(taux_gagnant), 2) as taux_gagnant,
ROUND(AVG(taux_place), 2) as taux_place,
ROUND(AVG(roi_sg_net), 3) as roi_sg,
ROUND(AVG(roi_sp_net), 3) as roi_sp,
SUM(quinte_5sur5) as quinte_5sur5,
SUM(quinte_4sur5) as quinte_4sur5
FROM prediction_metrics
GROUP BY date, source
ORDER BY date DESC;
"""
# =============================================================================
# FONCTIONS UTILITAIRES
# =============================================================================
def get_db():
"""Connexion à la base de données"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def init_db():
"""Initialise les tables et vues"""
conn = get_db()
conn.executescript(METRICS_SCHEMA)
conn.commit()
conn.close()
print("✅ Tables et vues initialisées")
def get_dividende_sg(conn, date, num_reunion, num_course, horse_number):
"""Récupère le dividende Simple Gagnant pour un cheval"""
try:
row = conn.execute("""
SELECT dividende_euro
FROM pmu_rapports
WHERE date_programme = ?
AND num_reunion = ?
AND num_course = ?
AND type_pari = 'SIMPLE_GAGNANT'
AND combinaison = ?
""", (date, num_reunion, num_course, str(horse_number))).fetchone()
return row['dividende_euro'] if row else None
except:
return None
def get_dividende_sp(conn, date, num_reunion, num_course, horse_number):
"""Récupère le dividende Simple Placé pour un cheval"""
try:
row = conn.execute("""
SELECT dividende_euro
FROM pmu_rapports
WHERE date_programme = ?
AND num_reunion = ?
AND num_course = ?
AND type_pari = 'SIMPLE_PLACE'
AND combinaison = ?
""", (date, num_reunion, num_course, str(horse_number))).fetchone()
return row['dividende_euro'] if row else None
except:
return None
# =============================================================================
# CALCUL DES MÉTRIQUES
# =============================================================================
def calculate_course_metrics(conn, date, race_time, race_name, source):
"""Calcule les métriques pour une course/source donnée"""
# Récupérer les prédictions pour cette course/source
preds = conn.execute("""
SELECT
pr.horse_number,
pr.horse_name,
pr.prediction_rank,
pr.odds,
pa.ordre_arrivee,
pa.cote_direct,
pa.num_reunion,
pa.num_course
FROM predictions pr
LEFT JOIN pmu_partants pa
ON pa.date_programme = pr.date
AND pa.nom = pr.horse_name
WHERE pr.date = ?
AND pr.race_time = ?
AND pr.source = ?
""", (date, race_time, source)).fetchall()
if not preds:
return
# Métadonnées
first_pred = preds[0]
hippodrome = conn.execute("""
SELECT race_hippodrome FROM predictions
WHERE date = ? AND race_time = ?
LIMIT 1
""", (date, race_time)).fetchone()
race_hippodrome = hippodrome['race_hippodrome'] if hippodrome else None
num_reunion = first_pred['num_reunion'] if first_pred['num_reunion'] else None
num_course = first_pred['num_course'] if first_pred['num_course'] else None
# Récupérer discipline depuis pmu_courses
discipline = None
if num_reunion and num_course:
disc_row = conn.execute("""
SELECT discipline FROM pmu_courses
WHERE date_programme = ? AND num_reunion = ? AND num_course = ?
""", (date, num_reunion, num_course)).fetchone()
discipline = disc_row['discipline'] if disc_row else None
# Comptages
nb_predictions = len(preds)
nb_gagnants = sum(1 for p in preds if p['ordre_arrivee'] == 1)
nb_places = sum(1 for p in preds if p['ordre_arrivee'] and p['ordre_arrivee'] <= 3)
nb_top5 = sum(1 for p in preds if p['ordre_arrivee'] and p['ordre_arrivee'] <= 5)
nb_hors_top5 = nb_predictions - nb_top5
# Taux
taux_gagnant = round(nb_gagnants / nb_predictions * 100, 2) if nb_predictions > 0 else 0
taux_place = round(nb_places / nb_predictions * 100, 2) if nb_predictions > 0 else 0
taux_top5 = round(nb_top5 / nb_predictions * 100, 2) if nb_predictions > 0 else 0
# Rang moyen
rangs = [p['ordre_arrivee'] for p in preds if p['ordre_arrivee']]
rang_moyen = round(sum(rangs) / len(rangs), 2) if rangs else None
# Écart rang moyen
ecarts = [abs((p['prediction_rank'] or 99) - (p['ordre_arrivee'] or 99)) for p in preds]
ecart_rang_moyen = round(sum(ecarts) / len(ecarts), 2) if ecarts else None
# ROI avec dividendes réels
roi_sg_values = []
roi_sp_values = []
for p in preds:
if p['ordre_arrivee'] == 1 and num_reunion and num_course:
div_sg = get_dividende_sg(conn, date, num_reunion, num_course, p['horse_number'])
if div_sg and div_sg > 0:
roi_sg_values.append(div_sg - 1)
else:
roi_sg_values.append(-1)
elif p['ordre_arrivee'] and p['ordre_arrivee'] > 1:
roi_sg_values.append(-1)
if p['ordre_arrivee'] and p['ordre_arrivee'] <= 3 and num_reunion and num_course:
div_sp = get_dividende_sp(conn, date, num_reunion, num_course, p['horse_number'])
if div_sp and div_sp > 0:
roi_sp_values.append(div_sp - 1)
else:
roi_sp_values.append(-1)
elif p['ordre_arrivee'] and p['ordre_arrivee'] > 3:
roi_sp_values.append(-1)
roi_sg_brut = sum(roi_sg_values) if roi_sg_values else 0
roi_sg_net = round(roi_sg_brut / len(roi_sg_values), 3) if roi_sg_values else 0
roi_sp_brut = sum(roi_sp_values) if roi_sp_values else 0
roi_sp_net = round(roi_sp_brut / len(roi_sp_values), 3) if roi_sp_values else 0
# Quinté (5 chevaux dans le top 5)
quinte_5sur5 = 1 if nb_top5 >= 5 else 0
quinte_4sur5 = 1 if nb_top5 >= 4 else 0
quinte_3sur5 = 1 if nb_top5 >= 3 else 0
quinte_2sur5 = 1 if nb_top5 >= 2 else 0
# Value bet score
value_scores = []
for p in preds:
if p['ordre_arrivee'] == 1 and p['cote_direct'] and p['odds']:
value = (p['cote_direct'] - p['odds']) / p['odds'] * 100
value_scores.append(value)
value_bet_score = round(sum(value_scores) / len(value_scores), 2) if value_scores else 0
# Top cote gagnante
top_cotes = [p['cote_direct'] for p in preds if p['ordre_arrivee'] == 1 and p['cote_direct']]
top_cote_gagnante = max(top_cotes) if top_cotes else None
# Insérer ou mettre à jour
conn.execute("""
INSERT OR REPLACE INTO prediction_metrics (
date, race_time, race_name, race_hippodrome, source, discipline,
nb_predictions, nb_gagnants, nb_places, nb_top5, nb_hors_top5,
taux_gagnant, taux_place, taux_top5,
rang_moyen, ecart_rang_moyen,
roi_sg_brut, roi_sg_net, roi_sp_brut, roi_sp_net,
quinte_5sur5, quinte_4sur5, quinte_3sur5, quinte_2sur5,
value_bet_score, top_cote_gagnante
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
date, race_time, race_name, race_hippodrome, source, discipline,
nb_predictions, nb_gagnants, nb_places, nb_top5, nb_hors_top5,
taux_gagnant, taux_place, taux_top5,
rang_moyen, ecart_rang_moyen,
roi_sg_brut, roi_sg_net, roi_sp_brut, roi_sp_net,
quinte_5sur5, quinte_4sur5, quinte_3sur5, quinte_2sur5,
value_bet_score, top_cote_gagnante
))
def calculate_metrics(date_str):
"""Calcule les métriques pour une date donnée"""
init_db()
conn = get_db()
# Récupérer les courses avec prédictions ET résultats
courses = conn.execute("""
SELECT DISTINCT
pr.date,
pr.race_time,
pr.race_name
FROM predictions pr
JOIN pmu_partants pa
ON pa.date_programme = pr.date
AND pa.nom = pr.horse_name
WHERE pr.date = ?
AND pa.ordre_arrivee IS NOT NULL
""", (date_str,)).fetchall()
if not courses:
print(f"⚠️ Aucune course avec résultats pour {date_str}")
conn.close()
return
sources = [
'canalturf_selections',
'canalturf_prono_bases',
'canalturf_prono_chances',
'canalturf_prono_outsiders',
'canalturf_partants'
]
total_calculated = 0
for course in courses:
for source in sources:
try:
calculate_course_metrics(conn, course['date'], course['race_time'], course['race_name'], source)
total_calculated += 1
except Exception as e:
print(f"⚠️ Erreur {course['race_time']} {source}: {e}")
conn.commit()
conn.close()
print(f"✅ Métriques calculées pour {date_str}: {total_calculated} combinaisons course/source")
def backfill_metrics(days=30):
"""Remplit les métriques sur plusieurs jours"""
print(f"📊 Backfill sur {days} jours...")
for i in range(days):
date = (datetime.now() - timedelta(days=i)).strftime('%Y-%m-%d')
print(f"{date}")
try:
calculate_metrics(date)
except Exception as e:
print(f" ⚠️ Erreur: {e}")
print("✅ Backfill terminé")
# =============================================================================
# POINT D'ENTRÉE
# =============================================================================
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Calcul des métriques de performance")
parser.add_argument("--date", "-d", help="Date YYYY-MM-DD")
parser.add_argument("--yesterday", "-y", action="store_true", help="Calculer hier")
parser.add_argument("--backfill", "-b", type=int, help="Remplir N derniers jours")
args = parser.parse_args()
if args.backfill:
backfill_metrics(args.backfill)
elif args.yesterday:
date_str = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
calculate_metrics(date_str)
elif args.date:
calculate_metrics(args.date)
else:
calculate_metrics(datetime.now().strftime('%Y-%m-%d'))