281 lines
9.5 KiB
Python
281 lines
9.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Backtest Analyzer - Analyse des prédictions vs résultats
|
|
"""
|
|
|
|
import sqlite3
|
|
import json
|
|
from datetime import datetime
|
|
|
|
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
|
|
|
|
def get_connection():
|
|
return sqlite3.connect(DB_PATH)
|
|
|
|
def get_results_for_date(date):
|
|
"""Récupère les résultats d'une date (toutes courses confondues)"""
|
|
conn = get_connection()
|
|
c = conn.execute("""
|
|
SELECT course, cheval, position_finale, num_pmu
|
|
FROM v_resultats_complets
|
|
WHERE date_programme = ? AND position_finale > 0
|
|
ORDER BY course, position_finale
|
|
""", (date,))
|
|
rows = c.fetchall()
|
|
conn.close()
|
|
|
|
# Grouper par course
|
|
courses = {}
|
|
for row in rows:
|
|
course_name = row[0]
|
|
if course_name not in courses:
|
|
courses[course_name] = []
|
|
courses[course_name].append({
|
|
'horse': row[1],
|
|
'position': row[2],
|
|
'numero': row[3]
|
|
})
|
|
return courses
|
|
|
|
def get_canalturf_predictions(date):
|
|
"""Récupère les prédictions Canalturf par race"""
|
|
conn = get_connection()
|
|
|
|
# Toutes les prédictions pour la date
|
|
c = conn.execute("""
|
|
SELECT race_name, horse_name, horse_number, source
|
|
FROM predictions
|
|
WHERE date = ?
|
|
""", (date,))
|
|
rows = c.fetchall()
|
|
conn.close()
|
|
|
|
# Grouper par race
|
|
races = {}
|
|
for row in rows:
|
|
race_name = row[0] if row[0] else 'UNKNOWN'
|
|
if race_name not in races:
|
|
races[race_name] = {'bases': [], 'chances': [], 'outsiders': [], 'all': []}
|
|
|
|
entry = {'horse': row[1], 'numero': row[2]}
|
|
races[race_name]['all'].append(entry)
|
|
|
|
if row[3] == 'canalturf_prono_bases':
|
|
races[race_name]['bases'].append(entry)
|
|
elif row[3] == 'canalturf_prono_chances':
|
|
races[race_name]['chances'].append(entry)
|
|
elif row[3] == 'canalturf_prono_outsiders':
|
|
races[race_name]['outsiders'].append(entry)
|
|
|
|
return races
|
|
|
|
def get_scoring_predictions(date):
|
|
"""Récupère les prédictions du scoring par race"""
|
|
conn = get_connection()
|
|
c = conn.execute("""
|
|
SELECT race_name, horse_name, horse_number, score, rang_scoring
|
|
FROM scoring
|
|
WHERE date = ?
|
|
""", (date,))
|
|
rows = c.fetchall()
|
|
conn.close()
|
|
|
|
races = {}
|
|
for row in rows:
|
|
race_name = row[0] if row[0] else 'UNKNOWN'
|
|
if race_name not in races:
|
|
races[race_name] = []
|
|
races[race_name].append({
|
|
'horse': row[1],
|
|
'numero': row[2],
|
|
'score': row[3],
|
|
'rang': row[4]
|
|
})
|
|
|
|
return races
|
|
|
|
def calculate_metrics(predicted, actual):
|
|
"""Calcule les métriques pour une course"""
|
|
if not predicted or not actual:
|
|
return None
|
|
|
|
metrics = {}
|
|
|
|
# Top1
|
|
pred_top1 = predicted[0]['horse'].upper() if predicted else None
|
|
actual_top1 = actual[0]['horse'].upper() if actual else None
|
|
metrics['top1_hit'] = pred_top1 == actual_top1
|
|
metrics['top1_predicted'] = pred_top1
|
|
|
|
# Top3
|
|
pred_top3 = set([p['horse'].upper() for p in predicted[:3]])
|
|
actual_top3 = set([a['horse'].upper() for a in actual[:3]])
|
|
metrics['top3_precision'] = len(pred_top3.intersection(actual_top3)) / 3
|
|
|
|
# Top5
|
|
pred_top5 = set([p['horse'].upper() for p in predicted[:5]])
|
|
actual_top5 = set([a['horse'].upper() for a in actual[:5]])
|
|
metrics['top5_precision'] = len(pred_top5.intersection(actual_top5)) / 5
|
|
|
|
# ZE2: 2/4
|
|
pred_top4 = set([p['horse'].upper() for p in predicted[:4]])
|
|
actual_top4 = set([a['horse'].upper() for a in actual[:4]])
|
|
metrics['ze2_hit'] = len(pred_top4.intersection(actual_top4)) >= 2
|
|
|
|
return metrics
|
|
|
|
def run_backtest():
|
|
"""Lance le backtest"""
|
|
conn = get_connection()
|
|
c = conn.execute("""
|
|
SELECT DISTINCT date_programme
|
|
FROM v_resultats_complets
|
|
WHERE position_finale > 0
|
|
ORDER BY date_programme DESC
|
|
""")
|
|
dates = [row[0] for row in c.fetchall()]
|
|
conn.close()
|
|
|
|
if not dates:
|
|
print("Aucune donnée trouvée")
|
|
return None
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"📊 BACKTEST ANALYZER")
|
|
print(f"{'='*60}")
|
|
print(f"Période: {dates[-1]} au {dates[0]} ({len(dates)} jours)")
|
|
|
|
all_results = []
|
|
stats = {'canalturf': {'top1': 0, 'top3': 0, 'top5': 0, 'ze2': 0, 'total': 0},
|
|
'scoring': {'top1': 0, 'top3': 0, 'top5': 0, 'ze2': 0, 'total': 0}}
|
|
|
|
for date in dates:
|
|
results = get_results_for_date(date)
|
|
if not results:
|
|
continue
|
|
|
|
canalturf_preds = get_canalturf_predictions(date)
|
|
scoring_preds = get_scoring_predictions(date)
|
|
|
|
for race_name, race_results in results.items():
|
|
# Canalturf
|
|
if race_name in canalturf_preds:
|
|
pred = canalturf_preds[race_name]['all']
|
|
m = calculate_metrics(pred, race_results)
|
|
if m:
|
|
stats['canalturf']['total'] += 1
|
|
stats['canalturf']['top1'] += 1 if m['top1_hit'] else 0
|
|
stats['canalturf']['top3'] += m['top3_precision']
|
|
stats['canalturf']['top5'] += m['top5_precision']
|
|
stats['canalturf']['ze2'] += 1 if m['ze2_hit'] else 0
|
|
|
|
all_results.append({
|
|
'date': date,
|
|
'race': race_name,
|
|
'source': 'canalturf',
|
|
'top1_pred': m['top1_predicted'],
|
|
'top1_hit': m['top1_hit'],
|
|
'ze2_hit': m['ze2_hit'],
|
|
})
|
|
|
|
# Scoring
|
|
if race_name in scoring_preds:
|
|
pred = scoring_preds[race_name]
|
|
m = calculate_metrics(pred, race_results)
|
|
if m:
|
|
stats['scoring']['total'] += 1
|
|
stats['scoring']['top1'] += 1 if m['top1_hit'] else 0
|
|
stats['scoring']['top3'] += m['top3_precision']
|
|
stats['scoring']['top5'] += m['top5_precision']
|
|
stats['scoring']['ze2'] += 1 if m['ze2_hit'] else 0
|
|
|
|
all_results.append({
|
|
'date': date,
|
|
'race': race_name,
|
|
'source': 'scoring',
|
|
'top1_pred': m['top1_predicted'],
|
|
'top1_hit': m['top1_hit'],
|
|
'ze2_hit': m['ze2_hit'],
|
|
})
|
|
|
|
# Calcul pourcentages
|
|
for source in ['canalturf', 'scoring']:
|
|
s = stats[source]
|
|
if s['total'] > 0:
|
|
s['top1_pct'] = round(s['top1'] / s['total'] * 100, 1)
|
|
s['top3_pct'] = round(s['top3'] / s['total'] * 100, 1)
|
|
s['top5_pct'] = round(s['top5'] / s['total'] * 100, 1)
|
|
s['ze2_pct'] = round(s['ze2'] / s['total'] * 100, 1)
|
|
|
|
return {
|
|
'dates': dates,
|
|
'results': all_results,
|
|
'stats': stats,
|
|
'generated_at': datetime.now().isoformat()
|
|
}
|
|
|
|
def print_report(data):
|
|
stats = data['stats']
|
|
print(f"\n📈 RÉSUMÉ GLOBAL")
|
|
print(f"{'='*60}")
|
|
|
|
for source, label in [('canalturf', 'CANALTURF'), ('scoring', 'SCORING')]:
|
|
s = stats[source]
|
|
if s['total'] > 0:
|
|
print(f"\n{label} ({s['total']} courses analysées):")
|
|
print(f" Top1: {s['top1']}/{s['total']} = {s['top1_pct']}%")
|
|
print(f" Top3: {s['top3_pct']}%")
|
|
print(f" Top5: {s['top5_pct']}%")
|
|
print(f" ZE2: {s['ze2']}/{s['total']} = {s['ze2_pct']}%")
|
|
|
|
def main():
|
|
data = run_backtest()
|
|
if data:
|
|
print_report(data)
|
|
|
|
# Sauvegarde JSON
|
|
with open('/home/h3r7/turf_scraper/backtest_result.json', 'w') as f:
|
|
json.dump(data, f, indent=2, default=str)
|
|
|
|
# Génère markdown
|
|
md = f"""---
|
|
date: {datetime.now().strftime('%Y-%m-%d')}
|
|
tags: [turf, backtest, analyse]
|
|
type: recherche
|
|
status: active
|
|
---
|
|
|
|
# Backtest - {data['dates'][-1]} au {data['dates'][0]}
|
|
|
|
> Analyse des prédictions vs résultats officiels PMU (8 jours)
|
|
|
|
## Résumé Global
|
|
|
|
| Source | Courses | Top1 | Top3 | Top5 | ZE2 Hit |
|
|
|--------|---------|------|------|------|---------|
|
|
| Canalturf | {data['stats']['canalturf']['total']} | {data['stats']['canalturf']['top1_pct']}% | {data['stats']['canalturf']['top3_pct']}% | {data['stats']['canalturf']['top5_pct']}% | {data['stats']['canalturf']['ze2_pct']}% |
|
|
| Scoring | {data['stats']['scoring']['total']} | {data['stats']['scoring']['top1_pct']}% | {data['stats']['scoring']['top3_pct']}% | {data['stats']['scoring']['top5_pct']}% | {data['stats']['scoring']['ze2_pct']}% |
|
|
|
|
## Détail
|
|
|
|
| Date | Course | Source | Top1 Prédit | Hit | ZE2 |
|
|
|------|--------|--------|-------------|-----|-----|
|
|
"""
|
|
|
|
for r in data['results']:
|
|
md += f"| {r['date']} | {r['race'][:30]}... | {r['source']} | {r['top1_pred'][:20] if r['top1_pred'] else 'N/A'}... | {'✅' if r['top1_hit'] else '❌'} | {'✅' if r['ze2_hit'] else '❌'} |\n"
|
|
|
|
md += f"""\n---
|
|
*Généré le {datetime.now().strftime('%Y-%m-%d %H:%M')}*
|
|
"""
|
|
|
|
with open('/home/h3r7/turf_scraper/backtest_result.md', 'w') as f:
|
|
f.write(md)
|
|
|
|
print(f"\n💾 Rapports sauvegardés:")
|
|
print(f" - /home/h3r7/turf_scraper/backtest_result.json")
|
|
print(f" - /home/h3r7/turf_scraper/backtest_result.md")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|