Files
turf_saas/compare_models.py
2026-04-25 17:18:43 +02:00

207 lines
6.3 KiB
Python

#!/usr/bin/env python3
"""
Compare scoring models performance vs actual results
"""
import sqlite3
import sys
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
def get_results(date):
"""Get actual race results"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("""
SELECT race_name, horse_name, position
FROM results
WHERE date = ?
ORDER BY race_name, position
""", (date,))
results = {}
for row in c.fetchall():
race = row['race_name']
if race not in results:
results[race] = []
results[race].append({
'horse': row['horse_name'],
'position': row['position']
})
conn.close()
return results
def get_predictions_v1(date):
"""Get scoring v1 predictions"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("""
SELECT race_name, horse_name, score, rang_scoring
FROM scoring
WHERE date = ? AND scoring_version = 'v1'
ORDER BY race_name, rang_scoring
""", (date,))
results = {}
for row in c.fetchall():
race = row['race_name']
if race not in results:
results[race] = []
results[race].append({
'horse': row['horse_name'],
'score': row['score'],
'rank': row['rang_scoring']
})
conn.close()
return results
def get_predictions_v2(date):
"""Get scoring v2 predictions"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("""
SELECT race_name, horse_name, score, rang_scoring
FROM scoring
WHERE date = ? AND scoring_version = 'v2'
ORDER BY race_name, rang_scoring
""", (date,))
results = {}
for row in c.fetchall():
race = row['race_name']
if race not in results:
results[race] = []
results[race].append({
'horse': row['horse_name'],
'score': row['score'],
'rank': row['rang_scoring']
})
conn.close()
return results
def get_canalturf(date):
"""Get CanalTurf predictions"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("""
SELECT race_name, horse_name, prediction_rank
FROM predictions
WHERE date = ? AND source = 'canalturf_partants'
ORDER BY race_name, prediction_rank
""", (date,))
results = {}
for row in c.fetchall():
race = row['race_name']
if race not in results:
results[race] = []
results[race].append({
'horse': row['horse_name'],
'rank': row['prediction_rank']
})
conn.close()
return results
def calculate_hits(predictions, actual, top_n=3):
"""Calculate hits for top N predictions"""
hits = 0
for race, actual_horses in actual.items():
if race not in predictions:
continue
pred_horses = [h['horse'] for h in predictions[race][:top_n]]
actual_top = [h['horse'] for h in actual_horses[:top_n]]
for p in pred_horses:
if p in actual_top:
hits += 1
return hits
def compare_models(date):
print(f"\n{'='*70}")
print(f"COMPARAISON DES MODÈLES - {date}")
print(f"{'='*70}\n")
# Get data
actual = get_results(date)
v1 = get_predictions_v1(date)
v2 = get_predictions_v2(date)
canal = get_canalturf(date)
if not actual:
print("Aucun résultat trouvé pour cette date")
return
print(f"Courses avec résultats: {len(actual)}")
# Calculate hits for each model
print(f"\n{'MODÈLE':<20} | {'TOP 1':<8} | {'TOP 3':<8} | {'TOP 5':<8}")
print("-" * 50)
# Top 1
v1_hits = calculate_hits(v1, actual, 1)
v2_hits = calculate_hits(v2, actual, 1)
canal_hits = calculate_hits(canal, actual, 1)
total_races = len(actual)
print(f"Scoring V1 | {v1_hits}/{total_races} ({v1_hits*100/total_races:.0f}%) | - | - ")
print(f"Scoring V2 | {v2_hits}/{total_races} ({v2_hits*100/total_races:.0f}%) | - | - ")
print(f"CanalTurf | {canal_hits}/{total_races} ({canal_hits*100/total_races:.0f}%) | - | - ")
# Top 3
v1_hits = calculate_hits(v1, actual, 3)
v2_hits = calculate_hits(v2, actual, 3)
canal_hits = calculate_hits(canal, actual, 3)
print(f"Scoring V1 | - | {v1_hits}/{total_races*3} ({v1_hits*100/(total_races*3):.0f}%) | - ")
print(f"Scoring V2 | - | {v2_hits}/{total_races*3} ({v2_hits*100/(total_races*3):.0f}%) | - ")
print(f"CanalTurf | - | {canal_hits}/{total_races*3} ({canal_hits*100/(total_races*3):.0f}%) | - ")
# Top 5
v1_hits = calculate_hits(v1, actual, 5)
v2_hits = calculate_hits(v2, actual, 5)
canal_hits = calculate_hits(canal, actual, 5)
print(f"Scoring V1 | - | - | {v1_hits}/{total_races*5} ({v1_hits*100/(total_races*5):.0f}%)")
print(f"Scoring V2 | - | - | {v2_hits}/{total_races*5} ({v2_hits*100/(total_races*5):.0f}%)")
print(f"CanalTurf | - | - | {canal_hits}/{total_races*5} ({canal_hits*100/(total_races*5):.0f}%)")
# Detailed per race
print(f"\n{'='*70}")
print("DÉTAIL PAR COURSE")
print(f"{'='*70}")
for race, actual_horses in actual.items():
print(f"\n🏇 {race}")
print(f" Résultat: {' / '.join([h['horse'] for h in actual_horses[:5]])}")
if race in v1:
print(f" V1: {' / '.join([h['horse'] for h in v1[race][:3]])}")
else:
print(f" V1: Pas de prédictions")
if race in v2:
print(f" V2: {' / '.join([h['horse'] for h in v2[race][:3]])}")
else:
print(f" V2: Pas de prédictions")
if race in canal:
print(f" CT: {' / '.join([h['horse'] for h in canal[race][:3]])}")
else:
print(f" CT: Pas de prédictions")
if __name__ == "__main__":
date = sys.argv[1] if len(sys.argv) > 1 else "2026-04-06"
compare_models(date)