#!/usr/bin/env python3 """ Backtest Analyzer - Analyse des prédictions vs résultats """ import sqlite3 import json from datetime import datetime DB_PATH = "/home/h3r7/turf_scraper/turf.db" def get_connection(): return sqlite3.connect(DB_PATH) def get_results_for_date(date): """Récupère les résultats d'une date (toutes courses confondues)""" conn = get_connection() c = conn.execute(""" SELECT course, cheval, position_finale, num_pmu FROM v_resultats_complets WHERE date_programme = ? AND position_finale > 0 ORDER BY course, position_finale """, (date,)) rows = c.fetchall() conn.close() # Grouper par course courses = {} for row in rows: course_name = row[0] if course_name not in courses: courses[course_name] = [] courses[course_name].append({ 'horse': row[1], 'position': row[2], 'numero': row[3] }) return courses def get_canalturf_predictions(date): """Récupère les prédictions Canalturf par race""" conn = get_connection() # Toutes les prédictions pour la date c = conn.execute(""" SELECT race_name, horse_name, horse_number, source FROM predictions WHERE date = ? """, (date,)) rows = c.fetchall() conn.close() # Grouper par race races = {} for row in rows: race_name = row[0] if row[0] else 'UNKNOWN' if race_name not in races: races[race_name] = {'bases': [], 'chances': [], 'outsiders': [], 'all': []} entry = {'horse': row[1], 'numero': row[2]} races[race_name]['all'].append(entry) if row[3] == 'canalturf_prono_bases': races[race_name]['bases'].append(entry) elif row[3] == 'canalturf_prono_chances': races[race_name]['chances'].append(entry) elif row[3] == 'canalturf_prono_outsiders': races[race_name]['outsiders'].append(entry) return races def get_scoring_predictions(date): """Récupère les prédictions du scoring par race""" conn = get_connection() c = conn.execute(""" SELECT race_name, horse_name, horse_number, score, rang_scoring FROM scoring WHERE date = ? """, (date,)) rows = c.fetchall() conn.close() races = {} for row in rows: race_name = row[0] if row[0] else 'UNKNOWN' if race_name not in races: races[race_name] = [] races[race_name].append({ 'horse': row[1], 'numero': row[2], 'score': row[3], 'rang': row[4] }) return races def calculate_metrics(predicted, actual): """Calcule les métriques pour une course""" if not predicted or not actual: return None metrics = {} # Top1 pred_top1 = predicted[0]['horse'].upper() if predicted else None actual_top1 = actual[0]['horse'].upper() if actual else None metrics['top1_hit'] = pred_top1 == actual_top1 metrics['top1_predicted'] = pred_top1 # Top3 pred_top3 = set([p['horse'].upper() for p in predicted[:3]]) actual_top3 = set([a['horse'].upper() for a in actual[:3]]) metrics['top3_precision'] = len(pred_top3.intersection(actual_top3)) / 3 # Top5 pred_top5 = set([p['horse'].upper() for p in predicted[:5]]) actual_top5 = set([a['horse'].upper() for a in actual[:5]]) metrics['top5_precision'] = len(pred_top5.intersection(actual_top5)) / 5 # ZE2: 2/4 pred_top4 = set([p['horse'].upper() for p in predicted[:4]]) actual_top4 = set([a['horse'].upper() for a in actual[:4]]) metrics['ze2_hit'] = len(pred_top4.intersection(actual_top4)) >= 2 return metrics def run_backtest(): """Lance le backtest""" conn = get_connection() c = conn.execute(""" SELECT DISTINCT date_programme FROM v_resultats_complets WHERE position_finale > 0 ORDER BY date_programme DESC """) dates = [row[0] for row in c.fetchall()] conn.close() if not dates: print("Aucune donnée trouvée") return None print(f"\n{'='*60}") print(f"📊 BACKTEST ANALYZER") print(f"{'='*60}") print(f"Période: {dates[-1]} au {dates[0]} ({len(dates)} jours)") all_results = [] stats = {'canalturf': {'top1': 0, 'top3': 0, 'top5': 0, 'ze2': 0, 'total': 0}, 'scoring': {'top1': 0, 'top3': 0, 'top5': 0, 'ze2': 0, 'total': 0}} for date in dates: results = get_results_for_date(date) if not results: continue canalturf_preds = get_canalturf_predictions(date) scoring_preds = get_scoring_predictions(date) for race_name, race_results in results.items(): # Canalturf if race_name in canalturf_preds: pred = canalturf_preds[race_name]['all'] m = calculate_metrics(pred, race_results) if m: stats['canalturf']['total'] += 1 stats['canalturf']['top1'] += 1 if m['top1_hit'] else 0 stats['canalturf']['top3'] += m['top3_precision'] stats['canalturf']['top5'] += m['top5_precision'] stats['canalturf']['ze2'] += 1 if m['ze2_hit'] else 0 all_results.append({ 'date': date, 'race': race_name, 'source': 'canalturf', 'top1_pred': m['top1_predicted'], 'top1_hit': m['top1_hit'], 'ze2_hit': m['ze2_hit'], }) # Scoring if race_name in scoring_preds: pred = scoring_preds[race_name] m = calculate_metrics(pred, race_results) if m: stats['scoring']['total'] += 1 stats['scoring']['top1'] += 1 if m['top1_hit'] else 0 stats['scoring']['top3'] += m['top3_precision'] stats['scoring']['top5'] += m['top5_precision'] stats['scoring']['ze2'] += 1 if m['ze2_hit'] else 0 all_results.append({ 'date': date, 'race': race_name, 'source': 'scoring', 'top1_pred': m['top1_predicted'], 'top1_hit': m['top1_hit'], 'ze2_hit': m['ze2_hit'], }) # Calcul pourcentages for source in ['canalturf', 'scoring']: s = stats[source] if s['total'] > 0: s['top1_pct'] = round(s['top1'] / s['total'] * 100, 1) s['top3_pct'] = round(s['top3'] / s['total'] * 100, 1) s['top5_pct'] = round(s['top5'] / s['total'] * 100, 1) s['ze2_pct'] = round(s['ze2'] / s['total'] * 100, 1) return { 'dates': dates, 'results': all_results, 'stats': stats, 'generated_at': datetime.now().isoformat() } def print_report(data): stats = data['stats'] print(f"\n📈 RÉSUMÉ GLOBAL") print(f"{'='*60}") for source, label in [('canalturf', 'CANALTURF'), ('scoring', 'SCORING')]: s = stats[source] if s['total'] > 0: print(f"\n{label} ({s['total']} courses analysées):") print(f" Top1: {s['top1']}/{s['total']} = {s['top1_pct']}%") print(f" Top3: {s['top3_pct']}%") print(f" Top5: {s['top5_pct']}%") print(f" ZE2: {s['ze2']}/{s['total']} = {s['ze2_pct']}%") def main(): data = run_backtest() if data: print_report(data) # Sauvegarde JSON with open('/home/h3r7/turf_scraper/backtest_result.json', 'w') as f: json.dump(data, f, indent=2, default=str) # Génère markdown md = f"""--- date: {datetime.now().strftime('%Y-%m-%d')} tags: [turf, backtest, analyse] type: recherche status: active --- # Backtest - {data['dates'][-1]} au {data['dates'][0]} > Analyse des prédictions vs résultats officiels PMU (8 jours) ## Résumé Global | Source | Courses | Top1 | Top3 | Top5 | ZE2 Hit | |--------|---------|------|------|------|---------| | Canalturf | {data['stats']['canalturf']['total']} | {data['stats']['canalturf']['top1_pct']}% | {data['stats']['canalturf']['top3_pct']}% | {data['stats']['canalturf']['top5_pct']}% | {data['stats']['canalturf']['ze2_pct']}% | | Scoring | {data['stats']['scoring']['total']} | {data['stats']['scoring']['top1_pct']}% | {data['stats']['scoring']['top3_pct']}% | {data['stats']['scoring']['top5_pct']}% | {data['stats']['scoring']['ze2_pct']}% | ## Détail | Date | Course | Source | Top1 Prédit | Hit | ZE2 | |------|--------|--------|-------------|-----|-----| """ for r in data['results']: md += f"| {r['date']} | {r['race'][:30]}... | {r['source']} | {r['top1_pred'][:20] if r['top1_pred'] else 'N/A'}... | {'✅' if r['top1_hit'] else '❌'} | {'✅' if r['ze2_hit'] else '❌'} |\n" md += f"""\n--- *Généré le {datetime.now().strftime('%Y-%m-%d %H:%M')}* """ with open('/home/h3r7/turf_scraper/backtest_result.md', 'w') as f: f.write(md) print(f"\n💾 Rapports sauvegardés:") print(f" - /home/h3r7/turf_scraper/backtest_result.json") print(f" - /home/h3r7/turf_scraper/backtest_result.md") if __name__ == "__main__": main()