207 lines
6.3 KiB
Python
207 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Compare scoring models performance vs actual results
|
|
"""
|
|
import sqlite3
|
|
import sys
|
|
|
|
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
|
|
|
|
def get_results(date):
|
|
"""Get actual race results"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
c = conn.cursor()
|
|
|
|
c.execute("""
|
|
SELECT race_name, horse_name, position
|
|
FROM results
|
|
WHERE date = ?
|
|
ORDER BY race_name, position
|
|
""", (date,))
|
|
|
|
results = {}
|
|
for row in c.fetchall():
|
|
race = row['race_name']
|
|
if race not in results:
|
|
results[race] = []
|
|
results[race].append({
|
|
'horse': row['horse_name'],
|
|
'position': row['position']
|
|
})
|
|
|
|
conn.close()
|
|
return results
|
|
|
|
def get_predictions_v1(date):
|
|
"""Get scoring v1 predictions"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
c = conn.cursor()
|
|
|
|
c.execute("""
|
|
SELECT race_name, horse_name, score, rang_scoring
|
|
FROM scoring
|
|
WHERE date = ? AND scoring_version = 'v1'
|
|
ORDER BY race_name, rang_scoring
|
|
""", (date,))
|
|
|
|
results = {}
|
|
for row in c.fetchall():
|
|
race = row['race_name']
|
|
if race not in results:
|
|
results[race] = []
|
|
results[race].append({
|
|
'horse': row['horse_name'],
|
|
'score': row['score'],
|
|
'rank': row['rang_scoring']
|
|
})
|
|
|
|
conn.close()
|
|
return results
|
|
|
|
def get_predictions_v2(date):
|
|
"""Get scoring v2 predictions"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
c = conn.cursor()
|
|
|
|
c.execute("""
|
|
SELECT race_name, horse_name, score, rang_scoring
|
|
FROM scoring
|
|
WHERE date = ? AND scoring_version = 'v2'
|
|
ORDER BY race_name, rang_scoring
|
|
""", (date,))
|
|
|
|
results = {}
|
|
for row in c.fetchall():
|
|
race = row['race_name']
|
|
if race not in results:
|
|
results[race] = []
|
|
results[race].append({
|
|
'horse': row['horse_name'],
|
|
'score': row['score'],
|
|
'rank': row['rang_scoring']
|
|
})
|
|
|
|
conn.close()
|
|
return results
|
|
|
|
def get_canalturf(date):
|
|
"""Get CanalTurf predictions"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
conn.row_factory = sqlite3.Row
|
|
c = conn.cursor()
|
|
|
|
c.execute("""
|
|
SELECT race_name, horse_name, prediction_rank
|
|
FROM predictions
|
|
WHERE date = ? AND source = 'canalturf_partants'
|
|
ORDER BY race_name, prediction_rank
|
|
""", (date,))
|
|
|
|
results = {}
|
|
for row in c.fetchall():
|
|
race = row['race_name']
|
|
if race not in results:
|
|
results[race] = []
|
|
results[race].append({
|
|
'horse': row['horse_name'],
|
|
'rank': row['prediction_rank']
|
|
})
|
|
|
|
conn.close()
|
|
return results
|
|
|
|
def calculate_hits(predictions, actual, top_n=3):
|
|
"""Calculate hits for top N predictions"""
|
|
hits = 0
|
|
for race, actual_horses in actual.items():
|
|
if race not in predictions:
|
|
continue
|
|
|
|
pred_horses = [h['horse'] for h in predictions[race][:top_n]]
|
|
actual_top = [h['horse'] for h in actual_horses[:top_n]]
|
|
|
|
for p in pred_horses:
|
|
if p in actual_top:
|
|
hits += 1
|
|
|
|
return hits
|
|
|
|
def compare_models(date):
|
|
print(f"\n{'='*70}")
|
|
print(f"COMPARAISON DES MODÈLES - {date}")
|
|
print(f"{'='*70}\n")
|
|
|
|
# Get data
|
|
actual = get_results(date)
|
|
v1 = get_predictions_v1(date)
|
|
v2 = get_predictions_v2(date)
|
|
canal = get_canalturf(date)
|
|
|
|
if not actual:
|
|
print("Aucun résultat trouvé pour cette date")
|
|
return
|
|
|
|
print(f"Courses avec résultats: {len(actual)}")
|
|
|
|
# Calculate hits for each model
|
|
print(f"\n{'MODÈLE':<20} | {'TOP 1':<8} | {'TOP 3':<8} | {'TOP 5':<8}")
|
|
print("-" * 50)
|
|
|
|
# Top 1
|
|
v1_hits = calculate_hits(v1, actual, 1)
|
|
v2_hits = calculate_hits(v2, actual, 1)
|
|
canal_hits = calculate_hits(canal, actual, 1)
|
|
total_races = len(actual)
|
|
|
|
print(f"Scoring V1 | {v1_hits}/{total_races} ({v1_hits*100/total_races:.0f}%) | - | - ")
|
|
print(f"Scoring V2 | {v2_hits}/{total_races} ({v2_hits*100/total_races:.0f}%) | - | - ")
|
|
print(f"CanalTurf | {canal_hits}/{total_races} ({canal_hits*100/total_races:.0f}%) | - | - ")
|
|
|
|
# Top 3
|
|
v1_hits = calculate_hits(v1, actual, 3)
|
|
v2_hits = calculate_hits(v2, actual, 3)
|
|
canal_hits = calculate_hits(canal, actual, 3)
|
|
|
|
print(f"Scoring V1 | - | {v1_hits}/{total_races*3} ({v1_hits*100/(total_races*3):.0f}%) | - ")
|
|
print(f"Scoring V2 | - | {v2_hits}/{total_races*3} ({v2_hits*100/(total_races*3):.0f}%) | - ")
|
|
print(f"CanalTurf | - | {canal_hits}/{total_races*3} ({canal_hits*100/(total_races*3):.0f}%) | - ")
|
|
|
|
# Top 5
|
|
v1_hits = calculate_hits(v1, actual, 5)
|
|
v2_hits = calculate_hits(v2, actual, 5)
|
|
canal_hits = calculate_hits(canal, actual, 5)
|
|
|
|
print(f"Scoring V1 | - | - | {v1_hits}/{total_races*5} ({v1_hits*100/(total_races*5):.0f}%)")
|
|
print(f"Scoring V2 | - | - | {v2_hits}/{total_races*5} ({v2_hits*100/(total_races*5):.0f}%)")
|
|
print(f"CanalTurf | - | - | {canal_hits}/{total_races*5} ({canal_hits*100/(total_races*5):.0f}%)")
|
|
|
|
# Detailed per race
|
|
print(f"\n{'='*70}")
|
|
print("DÉTAIL PAR COURSE")
|
|
print(f"{'='*70}")
|
|
|
|
for race, actual_horses in actual.items():
|
|
print(f"\n🏇 {race}")
|
|
print(f" Résultat: {' / '.join([h['horse'] for h in actual_horses[:5]])}")
|
|
|
|
if race in v1:
|
|
print(f" V1: {' / '.join([h['horse'] for h in v1[race][:3]])}")
|
|
else:
|
|
print(f" V1: Pas de prédictions")
|
|
|
|
if race in v2:
|
|
print(f" V2: {' / '.join([h['horse'] for h in v2[race][:3]])}")
|
|
else:
|
|
print(f" V2: Pas de prédictions")
|
|
|
|
if race in canal:
|
|
print(f" CT: {' / '.join([h['horse'] for h in canal[race][:3]])}")
|
|
else:
|
|
print(f" CT: Pas de prédictions")
|
|
|
|
if __name__ == "__main__":
|
|
date = sys.argv[1] if len(sys.argv) > 1 else "2026-04-06"
|
|
compare_models(date) |