#!/usr/bin/env python3 """ Compare scoring models performance vs actual results """ import sqlite3 import sys DB_PATH = "/home/h3r7/turf_scraper/turf.db" def get_results(date): """Get actual race results""" conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row c = conn.cursor() c.execute(""" SELECT race_name, horse_name, position FROM results WHERE date = ? ORDER BY race_name, position """, (date,)) results = {} for row in c.fetchall(): race = row['race_name'] if race not in results: results[race] = [] results[race].append({ 'horse': row['horse_name'], 'position': row['position'] }) conn.close() return results def get_predictions_v1(date): """Get scoring v1 predictions""" conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row c = conn.cursor() c.execute(""" SELECT race_name, horse_name, score, rang_scoring FROM scoring WHERE date = ? AND scoring_version = 'v1' ORDER BY race_name, rang_scoring """, (date,)) results = {} for row in c.fetchall(): race = row['race_name'] if race not in results: results[race] = [] results[race].append({ 'horse': row['horse_name'], 'score': row['score'], 'rank': row['rang_scoring'] }) conn.close() return results def get_predictions_v2(date): """Get scoring v2 predictions""" conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row c = conn.cursor() c.execute(""" SELECT race_name, horse_name, score, rang_scoring FROM scoring WHERE date = ? AND scoring_version = 'v2' ORDER BY race_name, rang_scoring """, (date,)) results = {} for row in c.fetchall(): race = row['race_name'] if race not in results: results[race] = [] results[race].append({ 'horse': row['horse_name'], 'score': row['score'], 'rank': row['rang_scoring'] }) conn.close() return results def get_canalturf(date): """Get CanalTurf predictions""" conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row c = conn.cursor() c.execute(""" SELECT race_name, horse_name, prediction_rank FROM predictions WHERE date = ? AND source = 'canalturf_partants' ORDER BY race_name, prediction_rank """, (date,)) results = {} for row in c.fetchall(): race = row['race_name'] if race not in results: results[race] = [] results[race].append({ 'horse': row['horse_name'], 'rank': row['prediction_rank'] }) conn.close() return results def calculate_hits(predictions, actual, top_n=3): """Calculate hits for top N predictions""" hits = 0 for race, actual_horses in actual.items(): if race not in predictions: continue pred_horses = [h['horse'] for h in predictions[race][:top_n]] actual_top = [h['horse'] for h in actual_horses[:top_n]] for p in pred_horses: if p in actual_top: hits += 1 return hits def compare_models(date): print(f"\n{'='*70}") print(f"COMPARAISON DES MODÈLES - {date}") print(f"{'='*70}\n") # Get data actual = get_results(date) v1 = get_predictions_v1(date) v2 = get_predictions_v2(date) canal = get_canalturf(date) if not actual: print("Aucun résultat trouvé pour cette date") return print(f"Courses avec résultats: {len(actual)}") # Calculate hits for each model print(f"\n{'MODÈLE':<20} | {'TOP 1':<8} | {'TOP 3':<8} | {'TOP 5':<8}") print("-" * 50) # Top 1 v1_hits = calculate_hits(v1, actual, 1) v2_hits = calculate_hits(v2, actual, 1) canal_hits = calculate_hits(canal, actual, 1) total_races = len(actual) print(f"Scoring V1 | {v1_hits}/{total_races} ({v1_hits*100/total_races:.0f}%) | - | - ") print(f"Scoring V2 | {v2_hits}/{total_races} ({v2_hits*100/total_races:.0f}%) | - | - ") print(f"CanalTurf | {canal_hits}/{total_races} ({canal_hits*100/total_races:.0f}%) | - | - ") # Top 3 v1_hits = calculate_hits(v1, actual, 3) v2_hits = calculate_hits(v2, actual, 3) canal_hits = calculate_hits(canal, actual, 3) print(f"Scoring V1 | - | {v1_hits}/{total_races*3} ({v1_hits*100/(total_races*3):.0f}%) | - ") print(f"Scoring V2 | - | {v2_hits}/{total_races*3} ({v2_hits*100/(total_races*3):.0f}%) | - ") print(f"CanalTurf | - | {canal_hits}/{total_races*3} ({canal_hits*100/(total_races*3):.0f}%) | - ") # Top 5 v1_hits = calculate_hits(v1, actual, 5) v2_hits = calculate_hits(v2, actual, 5) canal_hits = calculate_hits(canal, actual, 5) print(f"Scoring V1 | - | - | {v1_hits}/{total_races*5} ({v1_hits*100/(total_races*5):.0f}%)") print(f"Scoring V2 | - | - | {v2_hits}/{total_races*5} ({v2_hits*100/(total_races*5):.0f}%)") print(f"CanalTurf | - | - | {canal_hits}/{total_races*5} ({canal_hits*100/(total_races*5):.0f}%)") # Detailed per race print(f"\n{'='*70}") print("DÉTAIL PAR COURSE") print(f"{'='*70}") for race, actual_horses in actual.items(): print(f"\n🏇 {race}") print(f" Résultat: {' / '.join([h['horse'] for h in actual_horses[:5]])}") if race in v1: print(f" V1: {' / '.join([h['horse'] for h in v1[race][:3]])}") else: print(f" V1: Pas de prédictions") if race in v2: print(f" V2: {' / '.join([h['horse'] for h in v2[race][:3]])}") else: print(f" V2: Pas de prédictions") if race in canal: print(f" CT: {' / '.join([h['horse'] for h in canal[race][:3]])}") else: print(f" CT: Pas de prédictions") if __name__ == "__main__": date = sys.argv[1] if len(sys.argv) > 1 else "2026-04-06" compare_models(date)