#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ calculate_metrics.py - Calcul des métriques de performance prédictions vs résultats Usage: python3 calculate_metrics.py # Aujourd'hui python3 calculate_metrics.py --date 2026-04-15 # Date spécifique python3 calculate_metrics.py --yesterday # Hier python3 calculate_metrics.py --backfill 30 # Remplir 30 derniers jours Calculé après 21h (résultats PMU disponibles) """ import sqlite3 import json import argparse from datetime import datetime, timedelta from collections import defaultdict DB_PATH = "/home/h3r7/turf_scraper/turf.db" # ============================================================================= # SCHÉMA BASE DE DONNÉES # ============================================================================= METRICS_SCHEMA = """ -- Table principale des métriques par course/source CREATE TABLE IF NOT EXISTS prediction_metrics ( id INTEGER PRIMARY KEY AUTOINCREMENT, date TEXT NOT NULL, race_time TEXT, race_name TEXT, race_hippodrome TEXT, source TEXT NOT NULL, discipline TEXT, -- Comptages nb_predictions INTEGER DEFAULT 0, nb_gagnants INTEGER DEFAULT 0, nb_places INTEGER DEFAULT 0, nb_top5 INTEGER DEFAULT 0, nb_hors_top5 INTEGER DEFAULT 0, -- Taux taux_gagnant REAL, taux_place REAL, taux_top5 REAL, -- Rangs rang_moyen REAL, ecart_rang_moyen REAL, -- ROI avec dividendes réels PMU roi_sg_brut REAL, roi_sg_net REAL, roi_sp_brut REAL, roi_sp_net REAL, -- Quinté quinte_5sur5 INTEGER DEFAULT 0, quinte_4sur5 INTEGER DEFAULT 0, quinte_3sur5 INTEGER DEFAULT 0, quinte_2sur5 INTEGER DEFAULT 0, -- Value value_bet_score REAL, top_cote_gagnante REAL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(date, race_time, source) ); -- Vue détaillée des performances CREATE VIEW IF NOT EXISTS v_predictions_performance AS SELECT pr.date, pr.race_time, pr.race_name, pr.race_hippodrome, pr.horse_number, pr.horse_name, pr.prediction_rank, pr.odds AS cote_prediction, pr.source, pa.ordre_arrivee, pa.cote_direct AS cote_finale, pa.driver, -- Indicateurs binaires CASE WHEN pa.ordre_arrivee = 1 THEN 1 ELSE 0 END AS is_gagnant, CASE WHEN pa.ordre_arrivee <= 3 THEN 1 ELSE 0 END AS is_place, CASE WHEN pa.ordre_arrivee <= 5 THEN 1 ELSE 0 END AS is_top5, -- Écarts ABS(COALESCE(pr.prediction_rank, 99) - COALESCE(pa.ordre_arrivee, 99)) AS ecart_rang, -- Value réalisée CASE WHEN pa.ordre_arrivee = 1 AND pa.cote_direct > pr.odds THEN ROUND((pa.cote_direct - pr.odds) / pr.odds * 100, 1) ELSE 0 END AS value_realized FROM predictions pr LEFT JOIN pmu_partants pa ON pa.date_programme = pr.date AND pa.nom = pr.horse_name; -- Vue résumé par source (30 jours glissants) CREATE VIEW IF NOT EXISTS v_metrics_summary_30d AS SELECT source, COUNT(*) as nb_courses, SUM(nb_predictions) as total_predictions, SUM(nb_gagnants) as total_gagnants, SUM(nb_places) as total_places, SUM(nb_top5) as total_top5, ROUND(AVG(taux_gagnant), 2) as moy_taux_gagnant, ROUND(AVG(taux_place), 2) as moy_taux_place, ROUND(AVG(taux_top5), 2) as moy_taux_top5, ROUND(AVG(roi_sg_net), 3) as moy_roi_sg, ROUND(AVG(roi_sp_net), 3) as moy_roi_sp, ROUND(AVG(ecart_rang_moyen), 2) as moy_ecart_rang, SUM(quinte_5sur5) as nb_5sur5, SUM(quinte_4sur5) as nb_4sur5, SUM(quinte_3sur5) as nb_3sur5, ROUND(SUM(quinte_5sur5) * 100.0 / NULLIF(COUNT(*), 0), 1) as pct_5sur5, ROUND(SUM(quinte_4sur5) * 100.0 / NULLIF(COUNT(*), 0), 1) as pct_4sur5 FROM prediction_metrics WHERE date >= date('now', '-30 days') GROUP BY source ORDER BY moy_taux_place DESC; -- Vue évolution quotidienne CREATE VIEW IF NOT EXISTS v_metrics_daily AS SELECT date, source, SUM(nb_predictions) as predictions, SUM(nb_gagnants) as gagnants, SUM(nb_places) as places, SUM(nb_top5) as top5, ROUND(AVG(taux_gagnant), 2) as taux_gagnant, ROUND(AVG(taux_place), 2) as taux_place, ROUND(AVG(roi_sg_net), 3) as roi_sg, ROUND(AVG(roi_sp_net), 3) as roi_sp, SUM(quinte_5sur5) as quinte_5sur5, SUM(quinte_4sur5) as quinte_4sur5 FROM prediction_metrics GROUP BY date, source ORDER BY date DESC; """ # ============================================================================= # FONCTIONS UTILITAIRES # ============================================================================= def get_db(): """Connexion à la base de données""" conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row return conn def init_db(): """Initialise les tables et vues""" conn = get_db() conn.executescript(METRICS_SCHEMA) conn.commit() conn.close() print("✅ Tables et vues initialisées") def get_dividende_sg(conn, date, num_reunion, num_course, horse_number): """Récupère le dividende Simple Gagnant pour un cheval""" try: row = conn.execute(""" SELECT dividende_euro FROM pmu_rapports WHERE date_programme = ? AND num_reunion = ? AND num_course = ? AND type_pari = 'SIMPLE_GAGNANT' AND combinaison = ? """, (date, num_reunion, num_course, str(horse_number))).fetchone() return row['dividende_euro'] if row else None except: return None def get_dividende_sp(conn, date, num_reunion, num_course, horse_number): """Récupère le dividende Simple Placé pour un cheval""" try: row = conn.execute(""" SELECT dividende_euro FROM pmu_rapports WHERE date_programme = ? AND num_reunion = ? AND num_course = ? AND type_pari = 'SIMPLE_PLACE' AND combinaison = ? """, (date, num_reunion, num_course, str(horse_number))).fetchone() return row['dividende_euro'] if row else None except: return None # ============================================================================= # CALCUL DES MÉTRIQUES # ============================================================================= def calculate_course_metrics(conn, date, race_time, race_name, source): """Calcule les métriques pour une course/source donnée""" # Récupérer les prédictions pour cette course/source preds = conn.execute(""" SELECT pr.horse_number, pr.horse_name, pr.prediction_rank, pr.odds, pa.ordre_arrivee, pa.cote_direct, pa.num_reunion, pa.num_course FROM predictions pr LEFT JOIN pmu_partants pa ON pa.date_programme = pr.date AND pa.nom = pr.horse_name WHERE pr.date = ? AND pr.race_time = ? AND pr.source = ? """, (date, race_time, source)).fetchall() if not preds: return # Métadonnées first_pred = preds[0] hippodrome = conn.execute(""" SELECT race_hippodrome FROM predictions WHERE date = ? AND race_time = ? LIMIT 1 """, (date, race_time)).fetchone() race_hippodrome = hippodrome['race_hippodrome'] if hippodrome else None num_reunion = first_pred['num_reunion'] if first_pred['num_reunion'] else None num_course = first_pred['num_course'] if first_pred['num_course'] else None # Récupérer discipline depuis pmu_courses discipline = None if num_reunion and num_course: disc_row = conn.execute(""" SELECT discipline FROM pmu_courses WHERE date_programme = ? AND num_reunion = ? AND num_course = ? """, (date, num_reunion, num_course)).fetchone() discipline = disc_row['discipline'] if disc_row else None # Comptages nb_predictions = len(preds) nb_gagnants = sum(1 for p in preds if p['ordre_arrivee'] == 1) nb_places = sum(1 for p in preds if p['ordre_arrivee'] and p['ordre_arrivee'] <= 3) nb_top5 = sum(1 for p in preds if p['ordre_arrivee'] and p['ordre_arrivee'] <= 5) nb_hors_top5 = nb_predictions - nb_top5 # Taux taux_gagnant = round(nb_gagnants / nb_predictions * 100, 2) if nb_predictions > 0 else 0 taux_place = round(nb_places / nb_predictions * 100, 2) if nb_predictions > 0 else 0 taux_top5 = round(nb_top5 / nb_predictions * 100, 2) if nb_predictions > 0 else 0 # Rang moyen rangs = [p['ordre_arrivee'] for p in preds if p['ordre_arrivee']] rang_moyen = round(sum(rangs) / len(rangs), 2) if rangs else None # Écart rang moyen ecarts = [abs((p['prediction_rank'] or 99) - (p['ordre_arrivee'] or 99)) for p in preds] ecart_rang_moyen = round(sum(ecarts) / len(ecarts), 2) if ecarts else None # ROI avec dividendes réels roi_sg_values = [] roi_sp_values = [] for p in preds: if p['ordre_arrivee'] == 1 and num_reunion and num_course: div_sg = get_dividende_sg(conn, date, num_reunion, num_course, p['horse_number']) if div_sg and div_sg > 0: roi_sg_values.append(div_sg - 1) else: roi_sg_values.append(-1) elif p['ordre_arrivee'] and p['ordre_arrivee'] > 1: roi_sg_values.append(-1) if p['ordre_arrivee'] and p['ordre_arrivee'] <= 3 and num_reunion and num_course: div_sp = get_dividende_sp(conn, date, num_reunion, num_course, p['horse_number']) if div_sp and div_sp > 0: roi_sp_values.append(div_sp - 1) else: roi_sp_values.append(-1) elif p['ordre_arrivee'] and p['ordre_arrivee'] > 3: roi_sp_values.append(-1) roi_sg_brut = sum(roi_sg_values) if roi_sg_values else 0 roi_sg_net = round(roi_sg_brut / len(roi_sg_values), 3) if roi_sg_values else 0 roi_sp_brut = sum(roi_sp_values) if roi_sp_values else 0 roi_sp_net = round(roi_sp_brut / len(roi_sp_values), 3) if roi_sp_values else 0 # Quinté (5 chevaux dans le top 5) quinte_5sur5 = 1 if nb_top5 >= 5 else 0 quinte_4sur5 = 1 if nb_top5 >= 4 else 0 quinte_3sur5 = 1 if nb_top5 >= 3 else 0 quinte_2sur5 = 1 if nb_top5 >= 2 else 0 # Value bet score value_scores = [] for p in preds: if p['ordre_arrivee'] == 1 and p['cote_direct'] and p['odds']: value = (p['cote_direct'] - p['odds']) / p['odds'] * 100 value_scores.append(value) value_bet_score = round(sum(value_scores) / len(value_scores), 2) if value_scores else 0 # Top cote gagnante top_cotes = [p['cote_direct'] for p in preds if p['ordre_arrivee'] == 1 and p['cote_direct']] top_cote_gagnante = max(top_cotes) if top_cotes else None # Insérer ou mettre à jour conn.execute(""" INSERT OR REPLACE INTO prediction_metrics ( date, race_time, race_name, race_hippodrome, source, discipline, nb_predictions, nb_gagnants, nb_places, nb_top5, nb_hors_top5, taux_gagnant, taux_place, taux_top5, rang_moyen, ecart_rang_moyen, roi_sg_brut, roi_sg_net, roi_sp_brut, roi_sp_net, quinte_5sur5, quinte_4sur5, quinte_3sur5, quinte_2sur5, value_bet_score, top_cote_gagnante ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( date, race_time, race_name, race_hippodrome, source, discipline, nb_predictions, nb_gagnants, nb_places, nb_top5, nb_hors_top5, taux_gagnant, taux_place, taux_top5, rang_moyen, ecart_rang_moyen, roi_sg_brut, roi_sg_net, roi_sp_brut, roi_sp_net, quinte_5sur5, quinte_4sur5, quinte_3sur5, quinte_2sur5, value_bet_score, top_cote_gagnante )) def calculate_metrics(date_str): """Calcule les métriques pour une date donnée""" init_db() conn = get_db() # Récupérer les courses avec prédictions ET résultats courses = conn.execute(""" SELECT DISTINCT pr.date, pr.race_time, pr.race_name FROM predictions pr JOIN pmu_partants pa ON pa.date_programme = pr.date AND pa.nom = pr.horse_name WHERE pr.date = ? AND pa.ordre_arrivee IS NOT NULL """, (date_str,)).fetchall() if not courses: print(f"⚠️ Aucune course avec résultats pour {date_str}") conn.close() return sources = [ 'canalturf_selections', 'canalturf_prono_bases', 'canalturf_prono_chances', 'canalturf_prono_outsiders', 'canalturf_partants' ] total_calculated = 0 for course in courses: for source in sources: try: calculate_course_metrics(conn, course['date'], course['race_time'], course['race_name'], source) total_calculated += 1 except Exception as e: print(f"⚠️ Erreur {course['race_time']} {source}: {e}") conn.commit() conn.close() print(f"✅ Métriques calculées pour {date_str}: {total_calculated} combinaisons course/source") def backfill_metrics(days=30): """Remplit les métriques sur plusieurs jours""" print(f"📊 Backfill sur {days} jours...") for i in range(days): date = (datetime.now() - timedelta(days=i)).strftime('%Y-%m-%d') print(f" → {date}") try: calculate_metrics(date) except Exception as e: print(f" ⚠️ Erreur: {e}") print("✅ Backfill terminé") # ============================================================================= # POINT D'ENTRÉE # ============================================================================= if __name__ == "__main__": parser = argparse.ArgumentParser(description="Calcul des métriques de performance") parser.add_argument("--date", "-d", help="Date YYYY-MM-DD") parser.add_argument("--yesterday", "-y", action="store_true", help="Calculer hier") parser.add_argument("--backfill", "-b", type=int, help="Remplir N derniers jours") args = parser.parse_args() if args.backfill: backfill_metrics(args.backfill) elif args.yesterday: date_str = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') calculate_metrics(date_str) elif args.date: calculate_metrics(args.date) else: calculate_metrics(datetime.now().strftime('%Y-%m-%d'))