384 lines
13 KiB
Python
Executable File
384 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Results Scraper - API PMU officielle
|
|
Scrape les résultats réels du Quinté+, les sauvegarde en BDD
|
|
et calcule le taux de réussite des prédictions.
|
|
À lancer à 21h via cron ou OpenClaw.
|
|
"""
|
|
|
|
import requests
|
|
import sqlite3
|
|
import json
|
|
from datetime import datetime
|
|
|
|
import os; DB_PATH = os.environ.get("DB_PATH", "/home/h3r7/turf_scraper/turf.db")
|
|
HEADERS = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
|
|
|
|
# ============================================================
|
|
# API PMU
|
|
# ============================================================
|
|
|
|
def get_programme(date_str):
|
|
"""
|
|
Récupère le programme complet du jour via l'API PMU.
|
|
date_str : format DDMMYYYY
|
|
Retourne la liste des réunions avec leurs courses.
|
|
"""
|
|
url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_str}/reunions"
|
|
r = requests.get(url, headers=HEADERS, timeout=15)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
return data.get("programme", {}).get("reunions", [])
|
|
|
|
|
|
def get_participants(date_str, num_reunion, num_course):
|
|
"""
|
|
Récupère les participants + ordreArrivee pour une course donnée.
|
|
ordreArrivee = position finale officielle (0 = non classé/disqualifié)
|
|
"""
|
|
url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_str}/R{num_reunion}/C{num_course}/participants"
|
|
r = requests.get(url, headers=HEADERS, timeout=15)
|
|
r.raise_for_status()
|
|
return r.json().get("participants", [])
|
|
|
|
|
|
def find_quinte(reunions):
|
|
"""
|
|
Identifie la course Quinté+ du jour (pariMultiCourses=True ou libelle contient 'PARIS-TURF').
|
|
Retourne (num_reunion, num_course, libelle, hippodrome) ou None.
|
|
"""
|
|
for reunion in reunions:
|
|
for course in reunion.get("courses", []):
|
|
libelle = course.get("libelle", "")
|
|
paris_types = [p["typePari"] for p in course.get("paris", [])]
|
|
if any("QUINTE" in p for p in paris_types) or "PARIS-TURF" in libelle:
|
|
return (
|
|
reunion["numOfficiel"],
|
|
course["numOrdre"],
|
|
libelle,
|
|
reunion["hippodrome"]["libelleCourt"],
|
|
course.get("arriveeDefinitive", False)
|
|
)
|
|
return None
|
|
|
|
|
|
# ============================================================
|
|
# BASE DE DONNÉES
|
|
# ============================================================
|
|
|
|
def init_db_results():
|
|
"""Crée les tables si elles n'existent pas encore."""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
c = conn.cursor()
|
|
|
|
# Table results : arrivée officielle
|
|
c.execute('''
|
|
CREATE TABLE IF NOT EXISTS results (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
race_name TEXT,
|
|
race_hippodrome TEXT,
|
|
position INTEGER,
|
|
horse_name TEXT,
|
|
odds REAL,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# Table performance : comparaison prédictions vs résultats
|
|
c.execute('''
|
|
CREATE TABLE IF NOT EXISTS performance (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT NOT NULL,
|
|
race_name TEXT,
|
|
horse_name TEXT,
|
|
predicted_rank INTEGER,
|
|
actual_position INTEGER,
|
|
hit_top5 BOOLEAN,
|
|
hit_winner BOOLEAN,
|
|
source TEXT,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
|
|
def save_results(date, race_name, hippodrome, participants):
|
|
"""Sauvegarde les positions officielles en BDD (évite les doublons)."""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
c = conn.cursor()
|
|
saved = 0
|
|
|
|
for p in participants:
|
|
position = p.get("ordreArrivee", 0)
|
|
if position == 0:
|
|
continue # Non classé / disqualifié
|
|
horse = p.get("nom", "")
|
|
# Cote finale (rapport direct simple gagnant)
|
|
rapport = p.get("dernierRapportDirect", {})
|
|
odds = rapport.get("rapport", 0.0) if rapport else 0.0
|
|
|
|
# Vérifier si déjà inséré
|
|
c.execute(
|
|
"SELECT id FROM results WHERE date=? AND race_name=? AND horse_name=? AND position=?",
|
|
(date, race_name, horse, position)
|
|
)
|
|
if c.fetchone():
|
|
continue
|
|
|
|
c.execute('''
|
|
INSERT INTO results (date, race_name, race_hippodrome, position, horse_name, odds)
|
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
''', (date, race_name, hippodrome, position, horse, odds))
|
|
saved += c.rowcount
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
return saved
|
|
|
|
|
|
def compare_predictions(date, race_name):
|
|
"""
|
|
Compare les prédictions du jour avec les résultats réels.
|
|
Retourne un dict avec les stats de performance.
|
|
"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
c = conn.cursor()
|
|
|
|
# Récupérer toutes les prédictions du jour, puis dédoublonner en Python
|
|
# Priorité source : bases > chances > outsiders > partants
|
|
c.execute('''
|
|
SELECT horse_name, prediction_rank, source
|
|
FROM predictions
|
|
WHERE date=? AND source LIKE 'canalturf%'
|
|
ORDER BY prediction_rank ASC, odds ASC
|
|
''', (date,))
|
|
rows = c.fetchall()
|
|
|
|
# Dédoublonner : pour chaque cheval, garder la source la plus précise
|
|
SOURCE_PRIORITY = {
|
|
'canalturf_prono_bases': 1,
|
|
'canalturf_prono_chances': 2,
|
|
'canalturf_prono_outsiders': 3,
|
|
'canalturf_partants': 4,
|
|
'canalturf_selections': 5,
|
|
}
|
|
seen = {}
|
|
for horse, rank, source in rows:
|
|
prio = SOURCE_PRIORITY.get(source, 9)
|
|
if horse not in seen or prio < SOURCE_PRIORITY.get(seen[horse][2], 9):
|
|
seen[horse] = (horse, rank, source)
|
|
predictions = list(seen.values())
|
|
|
|
# Récupérer les résultats réels
|
|
c.execute('''
|
|
SELECT horse_name, position
|
|
FROM results
|
|
WHERE date=? AND race_name LIKE ?
|
|
ORDER BY position ASC
|
|
''', (date, f"%{race_name[:15]}%"))
|
|
results = {row[0]: row[1] for row in c.fetchall()}
|
|
|
|
if not results:
|
|
conn.close()
|
|
return None
|
|
|
|
# Top 5 réel
|
|
top5_real = {h for h, pos in results.items() if pos <= 5}
|
|
winner_real = next((h for h, pos in results.items() if pos == 1), None)
|
|
|
|
# Calcul des hits
|
|
hits_top5 = []
|
|
hits_winner = []
|
|
performance_rows = []
|
|
|
|
for horse, pred_rank, source in predictions:
|
|
actual_pos = results.get(horse, 99)
|
|
hit_top5 = horse in top5_real
|
|
hit_winner = horse == winner_real
|
|
|
|
if hit_top5:
|
|
hits_top5.append(horse)
|
|
if hit_winner:
|
|
hits_winner.append(horse)
|
|
|
|
# Sauvegarder en table performance (structure existante)
|
|
c.execute("SELECT id FROM performance WHERE prediction_date=? AND horse_name=?",
|
|
(date, horse))
|
|
if not c.fetchone():
|
|
c.execute('''
|
|
INSERT INTO performance
|
|
(prediction_date, race_date, horse_name, predicted_rank, actual_position, hit)
|
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
''', (date, date, horse, pred_rank, actual_pos, hit_top5))
|
|
|
|
performance_rows.append({
|
|
"cheval": horse,
|
|
"pred_rank": pred_rank,
|
|
"actual_pos": actual_pos,
|
|
"hit_top5": hit_top5,
|
|
"hit_winner": hit_winner,
|
|
"source": source
|
|
})
|
|
|
|
conn.commit()
|
|
|
|
# Stats globales
|
|
bases = [p for p in performance_rows if p["source"] == "canalturf_prono_bases"]
|
|
chances = [p for p in performance_rows if p["source"] == "canalturf_prono_chances"]
|
|
outsiders = [p for p in performance_rows if p["source"] == "canalturf_prono_outsiders"]
|
|
partants = [p for p in performance_rows if p["source"] == "canalturf_partants"]
|
|
|
|
nb_pred = len(performance_rows)
|
|
nb_top5 = len(hits_top5)
|
|
|
|
stats = {
|
|
"date": date,
|
|
"race_name": race_name,
|
|
"total_predictions": nb_pred,
|
|
"hits_top5": nb_top5,
|
|
"hit_rate_top5": round(nb_top5 / nb_pred * 100, 1) if nb_pred else 0,
|
|
"winner": winner_real,
|
|
"winner_predicted": winner_real in [p["cheval"] for p in performance_rows],
|
|
"bases_hit": [p["cheval"] for p in bases if p["hit_top5"]],
|
|
"bases_miss": [p["cheval"] for p in bases if not p["hit_top5"]],
|
|
"top5_real": sorted([(h, pos) for h, pos in results.items() if pos <= 5], key=lambda x: x[1]),
|
|
"details": performance_rows
|
|
}
|
|
|
|
conn.close()
|
|
return stats
|
|
|
|
|
|
# ============================================================
|
|
# RAPPORT
|
|
# ============================================================
|
|
|
|
def print_report(stats):
|
|
"""Affiche un rapport détaillé en console."""
|
|
if not stats:
|
|
print("❌ Aucune donnée à comparer.")
|
|
return
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"📊 BILAN QUINTÉ+ — {stats['date']}")
|
|
print(f"🏇 {stats['race_name']}")
|
|
print(f"{'='*60}")
|
|
|
|
# Arrivée réelle
|
|
print(f"\n🏆 ARRIVÉE OFFICIELLE (Top 5) :")
|
|
for horse, pos in stats["top5_real"]:
|
|
print(f" {pos}. {horse}")
|
|
|
|
# Gagnant prédit ?
|
|
winner = stats["winner"]
|
|
if stats["winner_predicted"]:
|
|
print(f"\n✅ GAGNANT PRÉDIT : {winner}")
|
|
else:
|
|
print(f"\n❌ Gagnant non prédit : {winner}")
|
|
|
|
# Bases
|
|
print(f"\n⭐ BASES :")
|
|
for h in stats["bases_hit"]:
|
|
print(f" ✅ {h} (dans le top 5)")
|
|
for h in stats["bases_miss"]:
|
|
print(f" ❌ {h} (hors top 5)")
|
|
|
|
# Taux de réussite global
|
|
print(f"\n📈 TAUX DE RÉUSSITE : {stats['hit_rate_top5']}% ({stats['hits_top5']}/{stats['total_predictions']} chevaux dans le top 5)")
|
|
|
|
# Top 5 favori (cotes les plus basses)
|
|
partants_hits = [p for p in stats["details"] if p["source"] == "canalturf_partants" and p["hit_top5"]]
|
|
print(f"\n💰 FAVORIS PLACÉS : {', '.join([p['cheval'] for p in partants_hits]) or 'aucun'}")
|
|
|
|
print(f"{'='*60}\n")
|
|
|
|
|
|
def save_report_json(stats, date):
|
|
"""Sauvegarde le rapport en JSON pour archivage."""
|
|
path = f"{os.environ.get('TURF_DIR', '/home/h3r7/turf_scraper')}/perf_{date.replace('-','')}.json"
|
|
with open(path, 'w', encoding='utf-8') as f:
|
|
json.dump(stats, f, indent=2, ensure_ascii=False)
|
|
return path
|
|
|
|
|
|
# ============================================================
|
|
# MAIN
|
|
# ============================================================
|
|
|
|
def main():
|
|
today = datetime.now().strftime('%Y-%m-%d')
|
|
date_pmu = datetime.now().strftime('%d%m%Y')
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"🏇 RESULTS SCRAPER — {datetime.now().strftime('%d/%m/%Y %H:%M')}")
|
|
print(f"{'='*60}\n")
|
|
|
|
# Init BDD
|
|
init_db_results()
|
|
|
|
# Récupérer le programme
|
|
print("📡 Récupération du programme PMU...")
|
|
try:
|
|
reunions = get_programme(date_pmu)
|
|
print(f" ✅ {len(reunions)} réunion(s) trouvée(s)")
|
|
except Exception as e:
|
|
print(f" ❌ Erreur API PMU : {e}")
|
|
return
|
|
|
|
# Trouver le Quinté+
|
|
quinte = find_quinte(reunions)
|
|
if not quinte:
|
|
print(" ❌ Quinté+ non trouvé dans le programme")
|
|
return
|
|
|
|
num_r, num_c, libelle, hippodrome, arrivee_def = quinte
|
|
print(f" 🏇 Quinté+ : R{num_r}C{num_c} — {libelle} ({hippodrome})")
|
|
print(f" Arrivée définitive : {'✅ OUI' if arrivee_def else '⏳ PAS ENCORE'}")
|
|
|
|
if not arrivee_def:
|
|
print("\n⚠️ La course n'est pas encore terminée. Relancez après la course.")
|
|
return
|
|
|
|
# Récupérer les participants avec résultats
|
|
print(f"\n📡 Récupération des résultats R{num_r}C{num_c}...")
|
|
try:
|
|
participants = get_participants(date_pmu, num_r, num_c)
|
|
print(f" ✅ {len(participants)} participants récupérés")
|
|
except Exception as e:
|
|
print(f" ❌ Erreur : {e}")
|
|
return
|
|
|
|
# Trier par position
|
|
classes = sorted(
|
|
[p for p in participants if p.get("ordreArrivee", 0) > 0],
|
|
key=lambda x: x["ordreArrivee"]
|
|
)
|
|
|
|
print(f"\n🏆 TOP 5 OFFICIEL :")
|
|
for p in classes[:5]:
|
|
cote = p.get("dernierRapportDirect", {}).get("rapport", "?") if p.get("dernierRapportDirect") else "?"
|
|
print(f" {p['ordreArrivee']}. {p['nom']:<25} cote={cote}")
|
|
|
|
# Sauvegarder les résultats
|
|
saved = save_results(today, libelle, hippodrome, participants)
|
|
print(f"\n💾 {saved} résultats sauvegardés en BDD")
|
|
|
|
# Comparer avec les prédictions
|
|
print(f"\n🔍 Comparaison avec les prédictions...")
|
|
stats = compare_predictions(today, libelle)
|
|
|
|
if stats:
|
|
print_report(stats)
|
|
path = save_report_json(stats, today)
|
|
print(f"📁 Rapport sauvegardé : {path}")
|
|
else:
|
|
print("⚠️ Aucune prédiction trouvée pour aujourd'hui en BDD.")
|
|
print(" Vérifiez que multi_scraper_v5.py a bien tourné ce matin.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|