Files
turf_saas/backtest_analyzer.py
2026-04-25 17:18:43 +02:00

281 lines
9.5 KiB
Python

#!/usr/bin/env python3
"""
Backtest Analyzer - Analyse des prédictions vs résultats
"""
import sqlite3
import json
from datetime import datetime
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
def get_connection():
return sqlite3.connect(DB_PATH)
def get_results_for_date(date):
"""Récupère les résultats d'une date (toutes courses confondues)"""
conn = get_connection()
c = conn.execute("""
SELECT course, cheval, position_finale, num_pmu
FROM v_resultats_complets
WHERE date_programme = ? AND position_finale > 0
ORDER BY course, position_finale
""", (date,))
rows = c.fetchall()
conn.close()
# Grouper par course
courses = {}
for row in rows:
course_name = row[0]
if course_name not in courses:
courses[course_name] = []
courses[course_name].append({
'horse': row[1],
'position': row[2],
'numero': row[3]
})
return courses
def get_canalturf_predictions(date):
"""Récupère les prédictions Canalturf par race"""
conn = get_connection()
# Toutes les prédictions pour la date
c = conn.execute("""
SELECT race_name, horse_name, horse_number, source
FROM predictions
WHERE date = ?
""", (date,))
rows = c.fetchall()
conn.close()
# Grouper par race
races = {}
for row in rows:
race_name = row[0] if row[0] else 'UNKNOWN'
if race_name not in races:
races[race_name] = {'bases': [], 'chances': [], 'outsiders': [], 'all': []}
entry = {'horse': row[1], 'numero': row[2]}
races[race_name]['all'].append(entry)
if row[3] == 'canalturf_prono_bases':
races[race_name]['bases'].append(entry)
elif row[3] == 'canalturf_prono_chances':
races[race_name]['chances'].append(entry)
elif row[3] == 'canalturf_prono_outsiders':
races[race_name]['outsiders'].append(entry)
return races
def get_scoring_predictions(date):
"""Récupère les prédictions du scoring par race"""
conn = get_connection()
c = conn.execute("""
SELECT race_name, horse_name, horse_number, score, rang_scoring
FROM scoring
WHERE date = ?
""", (date,))
rows = c.fetchall()
conn.close()
races = {}
for row in rows:
race_name = row[0] if row[0] else 'UNKNOWN'
if race_name not in races:
races[race_name] = []
races[race_name].append({
'horse': row[1],
'numero': row[2],
'score': row[3],
'rang': row[4]
})
return races
def calculate_metrics(predicted, actual):
"""Calcule les métriques pour une course"""
if not predicted or not actual:
return None
metrics = {}
# Top1
pred_top1 = predicted[0]['horse'].upper() if predicted else None
actual_top1 = actual[0]['horse'].upper() if actual else None
metrics['top1_hit'] = pred_top1 == actual_top1
metrics['top1_predicted'] = pred_top1
# Top3
pred_top3 = set([p['horse'].upper() for p in predicted[:3]])
actual_top3 = set([a['horse'].upper() for a in actual[:3]])
metrics['top3_precision'] = len(pred_top3.intersection(actual_top3)) / 3
# Top5
pred_top5 = set([p['horse'].upper() for p in predicted[:5]])
actual_top5 = set([a['horse'].upper() for a in actual[:5]])
metrics['top5_precision'] = len(pred_top5.intersection(actual_top5)) / 5
# ZE2: 2/4
pred_top4 = set([p['horse'].upper() for p in predicted[:4]])
actual_top4 = set([a['horse'].upper() for a in actual[:4]])
metrics['ze2_hit'] = len(pred_top4.intersection(actual_top4)) >= 2
return metrics
def run_backtest():
"""Lance le backtest"""
conn = get_connection()
c = conn.execute("""
SELECT DISTINCT date_programme
FROM v_resultats_complets
WHERE position_finale > 0
ORDER BY date_programme DESC
""")
dates = [row[0] for row in c.fetchall()]
conn.close()
if not dates:
print("Aucune donnée trouvée")
return None
print(f"\n{'='*60}")
print(f"📊 BACKTEST ANALYZER")
print(f"{'='*60}")
print(f"Période: {dates[-1]} au {dates[0]} ({len(dates)} jours)")
all_results = []
stats = {'canalturf': {'top1': 0, 'top3': 0, 'top5': 0, 'ze2': 0, 'total': 0},
'scoring': {'top1': 0, 'top3': 0, 'top5': 0, 'ze2': 0, 'total': 0}}
for date in dates:
results = get_results_for_date(date)
if not results:
continue
canalturf_preds = get_canalturf_predictions(date)
scoring_preds = get_scoring_predictions(date)
for race_name, race_results in results.items():
# Canalturf
if race_name in canalturf_preds:
pred = canalturf_preds[race_name]['all']
m = calculate_metrics(pred, race_results)
if m:
stats['canalturf']['total'] += 1
stats['canalturf']['top1'] += 1 if m['top1_hit'] else 0
stats['canalturf']['top3'] += m['top3_precision']
stats['canalturf']['top5'] += m['top5_precision']
stats['canalturf']['ze2'] += 1 if m['ze2_hit'] else 0
all_results.append({
'date': date,
'race': race_name,
'source': 'canalturf',
'top1_pred': m['top1_predicted'],
'top1_hit': m['top1_hit'],
'ze2_hit': m['ze2_hit'],
})
# Scoring
if race_name in scoring_preds:
pred = scoring_preds[race_name]
m = calculate_metrics(pred, race_results)
if m:
stats['scoring']['total'] += 1
stats['scoring']['top1'] += 1 if m['top1_hit'] else 0
stats['scoring']['top3'] += m['top3_precision']
stats['scoring']['top5'] += m['top5_precision']
stats['scoring']['ze2'] += 1 if m['ze2_hit'] else 0
all_results.append({
'date': date,
'race': race_name,
'source': 'scoring',
'top1_pred': m['top1_predicted'],
'top1_hit': m['top1_hit'],
'ze2_hit': m['ze2_hit'],
})
# Calcul pourcentages
for source in ['canalturf', 'scoring']:
s = stats[source]
if s['total'] > 0:
s['top1_pct'] = round(s['top1'] / s['total'] * 100, 1)
s['top3_pct'] = round(s['top3'] / s['total'] * 100, 1)
s['top5_pct'] = round(s['top5'] / s['total'] * 100, 1)
s['ze2_pct'] = round(s['ze2'] / s['total'] * 100, 1)
return {
'dates': dates,
'results': all_results,
'stats': stats,
'generated_at': datetime.now().isoformat()
}
def print_report(data):
stats = data['stats']
print(f"\n📈 RÉSUMÉ GLOBAL")
print(f"{'='*60}")
for source, label in [('canalturf', 'CANALTURF'), ('scoring', 'SCORING')]:
s = stats[source]
if s['total'] > 0:
print(f"\n{label} ({s['total']} courses analysées):")
print(f" Top1: {s['top1']}/{s['total']} = {s['top1_pct']}%")
print(f" Top3: {s['top3_pct']}%")
print(f" Top5: {s['top5_pct']}%")
print(f" ZE2: {s['ze2']}/{s['total']} = {s['ze2_pct']}%")
def main():
data = run_backtest()
if data:
print_report(data)
# Sauvegarde JSON
with open('/home/h3r7/turf_scraper/backtest_result.json', 'w') as f:
json.dump(data, f, indent=2, default=str)
# Génère markdown
md = f"""---
date: {datetime.now().strftime('%Y-%m-%d')}
tags: [turf, backtest, analyse]
type: recherche
status: active
---
# Backtest - {data['dates'][-1]} au {data['dates'][0]}
> Analyse des prédictions vs résultats officiels PMU (8 jours)
## Résumé Global
| Source | Courses | Top1 | Top3 | Top5 | ZE2 Hit |
|--------|---------|------|------|------|---------|
| Canalturf | {data['stats']['canalturf']['total']} | {data['stats']['canalturf']['top1_pct']}% | {data['stats']['canalturf']['top3_pct']}% | {data['stats']['canalturf']['top5_pct']}% | {data['stats']['canalturf']['ze2_pct']}% |
| Scoring | {data['stats']['scoring']['total']} | {data['stats']['scoring']['top1_pct']}% | {data['stats']['scoring']['top3_pct']}% | {data['stats']['scoring']['top5_pct']}% | {data['stats']['scoring']['ze2_pct']}% |
## Détail
| Date | Course | Source | Top1 Prédit | Hit | ZE2 |
|------|--------|--------|-------------|-----|-----|
"""
for r in data['results']:
md += f"| {r['date']} | {r['race'][:30]}... | {r['source']} | {r['top1_pred'][:20] if r['top1_pred'] else 'N/A'}... | {'' if r['top1_hit'] else ''} | {'' if r['ze2_hit'] else ''} |\n"
md += f"""\n---
*Généré le {datetime.now().strftime('%Y-%m-%d %H:%M')}*
"""
with open('/home/h3r7/turf_scraper/backtest_result.md', 'w') as f:
f.write(md)
print(f"\n💾 Rapports sauvegardés:")
print(f" - /home/h3r7/turf_scraper/backtest_result.json")
print(f" - /home/h3r7/turf_scraper/backtest_result.md")
if __name__ == "__main__":
main()