Initial commit: existing turf_saas codebase

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-25 17:18:43 +02:00
commit ed07c8a3d1
137 changed files with 36398 additions and 0 deletions
--- a/results_scraper.py
+++ b/results_scraper.py
@@ -0,0 +1,383 @@
+#!/usr/bin/env python3
+"""
+Results Scraper - API PMU officielle
+Scrape les résultats réels du Quinté+, les sauvegarde en BDD
+et calcule le taux de réussite des prédictions.
+À lancer à 21h via cron ou OpenClaw.
+"""
+
+import requests
+import sqlite3
+import json
+from datetime import datetime
+
+import os; DB_PATH = os.environ.get("DB_PATH", "/home/h3r7/turf_scraper/turf.db")
+HEADERS = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
+
+# ============================================================
+# API PMU
+# ============================================================
+
+def get_programme(date_str):
+    """
+    Récupère le programme complet du jour via l'API PMU.
+    date_str : format DDMMYYYY
+    Retourne la liste des réunions avec leurs courses.
+    """
+    url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_str}/reunions"
+    r = requests.get(url, headers=HEADERS, timeout=15)
+    r.raise_for_status()
+    data = r.json()
+    return data.get("programme", {}).get("reunions", [])
+
+
+def get_participants(date_str, num_reunion, num_course):
+    """
+    Récupère les participants + ordreArrivee pour une course donnée.
+    ordreArrivee = position finale officielle (0 = non classé/disqualifié)
+    """
+    url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_str}/R{num_reunion}/C{num_course}/participants"
+    r = requests.get(url, headers=HEADERS, timeout=15)
+    r.raise_for_status()
+    return r.json().get("participants", [])
+
+
+def find_quinte(reunions):
+    """
+    Identifie la course Quinté+ du jour (pariMultiCourses=True ou libelle contient 'PARIS-TURF').
+    Retourne (num_reunion, num_course, libelle, hippodrome) ou None.
+    """
+    for reunion in reunions:
+        for course in reunion.get("courses", []):
+            libelle = course.get("libelle", "")
+            paris_types = [p["typePari"] for p in course.get("paris", [])]
+            if any("QUINTE" in p for p in paris_types) or "PARIS-TURF" in libelle:
+                return (
+                    reunion["numOfficiel"],
+                    course["numOrdre"],
+                    libelle,
+                    reunion["hippodrome"]["libelleCourt"],
+                    course.get("arriveeDefinitive", False)
+                )
+    return None
+
+
+# ============================================================
+# BASE DE DONNÉES
+# ============================================================
+
+def init_db_results():
+    """Crée les tables si elles n'existent pas encore."""
+    conn = sqlite3.connect(DB_PATH)
+    c = conn.cursor()
+
+    # Table results : arrivée officielle
+    c.execute('''
+        CREATE TABLE IF NOT EXISTS results (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            date TEXT NOT NULL,
+            race_name TEXT,
+            race_hippodrome TEXT,
+            position INTEGER,
+            horse_name TEXT,
+            odds REAL,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    ''')
+
+    # Table performance : comparaison prédictions vs résultats
+    c.execute('''
+        CREATE TABLE IF NOT EXISTS performance (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            date TEXT NOT NULL,
+            race_name TEXT,
+            horse_name TEXT,
+            predicted_rank INTEGER,
+            actual_position INTEGER,
+            hit_top5 BOOLEAN,
+            hit_winner BOOLEAN,
+            source TEXT,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    ''')
+
+    conn.commit()
+    conn.close()
+
+
+def save_results(date, race_name, hippodrome, participants):
+    """Sauvegarde les positions officielles en BDD (évite les doublons)."""
+    conn = sqlite3.connect(DB_PATH)
+    c = conn.cursor()
+    saved = 0
+
+    for p in participants:
+        position = p.get("ordreArrivee", 0)
+        if position == 0:
+            continue  # Non classé / disqualifié
+        horse = p.get("nom", "")
+        # Cote finale (rapport direct simple gagnant)
+        rapport = p.get("dernierRapportDirect", {})
+        odds = rapport.get("rapport", 0.0) if rapport else 0.0
+
+        # Vérifier si déjà inséré
+        c.execute(
+            "SELECT id FROM results WHERE date=? AND race_name=? AND horse_name=? AND position=?",
+            (date, race_name, horse, position)
+        )
+        if c.fetchone():
+            continue
+
+        c.execute('''
+            INSERT INTO results (date, race_name, race_hippodrome, position, horse_name, odds)
+            VALUES (?, ?, ?, ?, ?, ?)
+        ''', (date, race_name, hippodrome, position, horse, odds))
+        saved += c.rowcount
+
+    conn.commit()
+    conn.close()
+    return saved
+
+
+def compare_predictions(date, race_name):
+    """
+    Compare les prédictions du jour avec les résultats réels.
+    Retourne un dict avec les stats de performance.
+    """
+    conn = sqlite3.connect(DB_PATH)
+    c = conn.cursor()
+
+    # Récupérer toutes les prédictions du jour, puis dédoublonner en Python
+    # Priorité source : bases > chances > outsiders > partants
+    c.execute('''
+        SELECT horse_name, prediction_rank, source
+        FROM predictions
+        WHERE date=? AND source LIKE 'canalturf%'
+        ORDER BY prediction_rank ASC, odds ASC
+    ''', (date,))
+    rows = c.fetchall()
+
+    # Dédoublonner : pour chaque cheval, garder la source la plus précise
+    SOURCE_PRIORITY = {
+        'canalturf_prono_bases': 1,
+        'canalturf_prono_chances': 2,
+        'canalturf_prono_outsiders': 3,
+        'canalturf_partants': 4,
+        'canalturf_selections': 5,
+    }
+    seen = {}
+    for horse, rank, source in rows:
+        prio = SOURCE_PRIORITY.get(source, 9)
+        if horse not in seen or prio < SOURCE_PRIORITY.get(seen[horse][2], 9):
+            seen[horse] = (horse, rank, source)
+    predictions = list(seen.values())
+
+    # Récupérer les résultats réels
+    c.execute('''
+        SELECT horse_name, position
+        FROM results
+        WHERE date=? AND race_name LIKE ?
+        ORDER BY position ASC
+    ''', (date, f"%{race_name[:15]}%"))
+    results = {row[0]: row[1] for row in c.fetchall()}
+
+    if not results:
+        conn.close()
+        return None
+
+    # Top 5 réel
+    top5_real = {h for h, pos in results.items() if pos <= 5}
+    winner_real = next((h for h, pos in results.items() if pos == 1), None)
+
+    # Calcul des hits
+    hits_top5 = []
+    hits_winner = []
+    performance_rows = []
+
+    for horse, pred_rank, source in predictions:
+        actual_pos = results.get(horse, 99)
+        hit_top5 = horse in top5_real
+        hit_winner = horse == winner_real
+
+        if hit_top5:
+            hits_top5.append(horse)
+        if hit_winner:
+            hits_winner.append(horse)
+
+        # Sauvegarder en table performance (structure existante)
+        c.execute("SELECT id FROM performance WHERE prediction_date=? AND horse_name=?",
+                  (date, horse))
+        if not c.fetchone():
+            c.execute('''
+                INSERT INTO performance
+                (prediction_date, race_date, horse_name, predicted_rank, actual_position, hit)
+                VALUES (?, ?, ?, ?, ?, ?)
+            ''', (date, date, horse, pred_rank, actual_pos, hit_top5))
+
+        performance_rows.append({
+            "cheval": horse,
+            "pred_rank": pred_rank,
+            "actual_pos": actual_pos,
+            "hit_top5": hit_top5,
+            "hit_winner": hit_winner,
+            "source": source
+        })
+
+    conn.commit()
+
+    # Stats globales
+    bases = [p for p in performance_rows if p["source"] == "canalturf_prono_bases"]
+    chances = [p for p in performance_rows if p["source"] == "canalturf_prono_chances"]
+    outsiders = [p for p in performance_rows if p["source"] == "canalturf_prono_outsiders"]
+    partants = [p for p in performance_rows if p["source"] == "canalturf_partants"]
+
+    nb_pred = len(performance_rows)
+    nb_top5 = len(hits_top5)
+
+    stats = {
+        "date": date,
+        "race_name": race_name,
+        "total_predictions": nb_pred,
+        "hits_top5": nb_top5,
+        "hit_rate_top5": round(nb_top5 / nb_pred * 100, 1) if nb_pred else 0,
+        "winner": winner_real,
+        "winner_predicted": winner_real in [p["cheval"] for p in performance_rows],
+        "bases_hit": [p["cheval"] for p in bases if p["hit_top5"]],
+        "bases_miss": [p["cheval"] for p in bases if not p["hit_top5"]],
+        "top5_real": sorted([(h, pos) for h, pos in results.items() if pos <= 5], key=lambda x: x[1]),
+        "details": performance_rows
+    }
+
+    conn.close()
+    return stats
+
+
+# ============================================================
+# RAPPORT
+# ============================================================
+
+def print_report(stats):
+    """Affiche un rapport détaillé en console."""
+    if not stats:
+        print("❌ Aucune donnée à comparer.")
+        return
+
+    print(f"\n{'='*60}")
+    print(f"📊 BILAN QUINTÉ+ — {stats['date']}")
+    print(f"🏇 {stats['race_name']}")
+    print(f"{'='*60}")
+
+    # Arrivée réelle
+    print(f"\n🏆 ARRIVÉE OFFICIELLE (Top 5) :")
+    for horse, pos in stats["top5_real"]:
+        print(f"   {pos}. {horse}")
+
+    # Gagnant prédit ?
+    winner = stats["winner"]
+    if stats["winner_predicted"]:
+        print(f"\n✅ GAGNANT PRÉDIT : {winner}")
+    else:
+        print(f"\n❌ Gagnant non prédit : {winner}")
+
+    # Bases
+    print(f"\n⭐ BASES :")
+    for h in stats["bases_hit"]:
+        print(f"   ✅ {h} (dans le top 5)")
+    for h in stats["bases_miss"]:
+        print(f"   ❌ {h} (hors top 5)")
+
+    # Taux de réussite global
+    print(f"\n📈 TAUX DE RÉUSSITE : {stats['hit_rate_top5']}% ({stats['hits_top5']}/{stats['total_predictions']} chevaux dans le top 5)")
+
+    # Top 5 favori (cotes les plus basses)
+    partants_hits = [p for p in stats["details"] if p["source"] == "canalturf_partants" and p["hit_top5"]]
+    print(f"\n💰 FAVORIS PLACÉS : {', '.join([p['cheval'] for p in partants_hits]) or 'aucun'}")
+
+    print(f"{'='*60}\n")
+
+
+def save_report_json(stats, date):
+    """Sauvegarde le rapport en JSON pour archivage."""
+    path = f"{os.environ.get('TURF_DIR', '/home/h3r7/turf_scraper')}/perf_{date.replace('-','')}.json"
+    with open(path, 'w', encoding='utf-8') as f:
+        json.dump(stats, f, indent=2, ensure_ascii=False)
+    return path
+
+
+# ============================================================
+# MAIN
+# ============================================================
+
+def main():
+    today = datetime.now().strftime('%Y-%m-%d')
+    date_pmu = datetime.now().strftime('%d%m%Y')
+
+    print(f"\n{'='*60}")
+    print(f"🏇 RESULTS SCRAPER — {datetime.now().strftime('%d/%m/%Y %H:%M')}")
+    print(f"{'='*60}\n")
+
+    # Init BDD
+    init_db_results()
+
+    # Récupérer le programme
+    print("📡 Récupération du programme PMU...")
+    try:
+        reunions = get_programme(date_pmu)
+        print(f"   ✅ {len(reunions)} réunion(s) trouvée(s)")
+    except Exception as e:
+        print(f"   ❌ Erreur API PMU : {e}")
+        return
+
+    # Trouver le Quinté+
+    quinte = find_quinte(reunions)
+    if not quinte:
+        print("   ❌ Quinté+ non trouvé dans le programme")
+        return
+
+    num_r, num_c, libelle, hippodrome, arrivee_def = quinte
+    print(f"   🏇 Quinté+ : R{num_r}C{num_c} — {libelle} ({hippodrome})")
+    print(f"   Arrivée définitive : {'✅ OUI' if arrivee_def else '⏳ PAS ENCORE'}")
+
+    if not arrivee_def:
+        print("\n⚠️  La course n'est pas encore terminée. Relancez après la course.")
+        return
+
+    # Récupérer les participants avec résultats
+    print(f"\n📡 Récupération des résultats R{num_r}C{num_c}...")
+    try:
+        participants = get_participants(date_pmu, num_r, num_c)
+        print(f"   ✅ {len(participants)} participants récupérés")
+    except Exception as e:
+        print(f"   ❌ Erreur : {e}")
+        return
+
+    # Trier par position
+    classes = sorted(
+        [p for p in participants if p.get("ordreArrivee", 0) > 0],
+        key=lambda x: x["ordreArrivee"]
+    )
+
+    print(f"\n🏆 TOP 5 OFFICIEL :")
+    for p in classes[:5]:
+        cote = p.get("dernierRapportDirect", {}).get("rapport", "?") if p.get("dernierRapportDirect") else "?"
+        print(f"   {p['ordreArrivee']}. {p['nom']:<25} cote={cote}")
+
+    # Sauvegarder les résultats
+    saved = save_results(today, libelle, hippodrome, participants)
+    print(f"\n💾 {saved} résultats sauvegardés en BDD")
+
+    # Comparer avec les prédictions
+    print(f"\n🔍 Comparaison avec les prédictions...")
+    stats = compare_predictions(today, libelle)
+
+    if stats:
+        print_report(stats)
+        path = save_report_json(stats, today)
+        print(f"📁 Rapport sauvegardé : {path}")
+    else:
+        print("⚠️  Aucune prédiction trouvée pour aujourd'hui en BDD.")
+        print("   Vérifiez que multi_scraper_v5.py a bien tourné ce matin.")
+
+
+if __name__ == "__main__":
+    main()