Initial commit: existing turf_saas codebase
Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
383
results_scraper.py
Executable file
383
results_scraper.py
Executable file
@@ -0,0 +1,383 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Results Scraper - API PMU officielle
|
||||
Scrape les résultats réels du Quinté+, les sauvegarde en BDD
|
||||
et calcule le taux de réussite des prédictions.
|
||||
À lancer à 21h via cron ou OpenClaw.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import sqlite3
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
import os; DB_PATH = os.environ.get("DB_PATH", "/home/h3r7/turf_scraper/turf.db")
|
||||
HEADERS = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
|
||||
|
||||
# ============================================================
|
||||
# API PMU
|
||||
# ============================================================
|
||||
|
||||
def get_programme(date_str):
|
||||
"""
|
||||
Récupère le programme complet du jour via l'API PMU.
|
||||
date_str : format DDMMYYYY
|
||||
Retourne la liste des réunions avec leurs courses.
|
||||
"""
|
||||
url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_str}/reunions"
|
||||
r = requests.get(url, headers=HEADERS, timeout=15)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
return data.get("programme", {}).get("reunions", [])
|
||||
|
||||
|
||||
def get_participants(date_str, num_reunion, num_course):
|
||||
"""
|
||||
Récupère les participants + ordreArrivee pour une course donnée.
|
||||
ordreArrivee = position finale officielle (0 = non classé/disqualifié)
|
||||
"""
|
||||
url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_str}/R{num_reunion}/C{num_course}/participants"
|
||||
r = requests.get(url, headers=HEADERS, timeout=15)
|
||||
r.raise_for_status()
|
||||
return r.json().get("participants", [])
|
||||
|
||||
|
||||
def find_quinte(reunions):
|
||||
"""
|
||||
Identifie la course Quinté+ du jour (pariMultiCourses=True ou libelle contient 'PARIS-TURF').
|
||||
Retourne (num_reunion, num_course, libelle, hippodrome) ou None.
|
||||
"""
|
||||
for reunion in reunions:
|
||||
for course in reunion.get("courses", []):
|
||||
libelle = course.get("libelle", "")
|
||||
paris_types = [p["typePari"] for p in course.get("paris", [])]
|
||||
if any("QUINTE" in p for p in paris_types) or "PARIS-TURF" in libelle:
|
||||
return (
|
||||
reunion["numOfficiel"],
|
||||
course["numOrdre"],
|
||||
libelle,
|
||||
reunion["hippodrome"]["libelleCourt"],
|
||||
course.get("arriveeDefinitive", False)
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# ============================================================
|
||||
# BASE DE DONNÉES
|
||||
# ============================================================
|
||||
|
||||
def init_db_results():
|
||||
"""Crée les tables si elles n'existent pas encore."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
|
||||
# Table results : arrivée officielle
|
||||
c.execute('''
|
||||
CREATE TABLE IF NOT EXISTS results (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
date TEXT NOT NULL,
|
||||
race_name TEXT,
|
||||
race_hippodrome TEXT,
|
||||
position INTEGER,
|
||||
horse_name TEXT,
|
||||
odds REAL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
''')
|
||||
|
||||
# Table performance : comparaison prédictions vs résultats
|
||||
c.execute('''
|
||||
CREATE TABLE IF NOT EXISTS performance (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
date TEXT NOT NULL,
|
||||
race_name TEXT,
|
||||
horse_name TEXT,
|
||||
predicted_rank INTEGER,
|
||||
actual_position INTEGER,
|
||||
hit_top5 BOOLEAN,
|
||||
hit_winner BOOLEAN,
|
||||
source TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
''')
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def save_results(date, race_name, hippodrome, participants):
|
||||
"""Sauvegarde les positions officielles en BDD (évite les doublons)."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
saved = 0
|
||||
|
||||
for p in participants:
|
||||
position = p.get("ordreArrivee", 0)
|
||||
if position == 0:
|
||||
continue # Non classé / disqualifié
|
||||
horse = p.get("nom", "")
|
||||
# Cote finale (rapport direct simple gagnant)
|
||||
rapport = p.get("dernierRapportDirect", {})
|
||||
odds = rapport.get("rapport", 0.0) if rapport else 0.0
|
||||
|
||||
# Vérifier si déjà inséré
|
||||
c.execute(
|
||||
"SELECT id FROM results WHERE date=? AND race_name=? AND horse_name=? AND position=?",
|
||||
(date, race_name, horse, position)
|
||||
)
|
||||
if c.fetchone():
|
||||
continue
|
||||
|
||||
c.execute('''
|
||||
INSERT INTO results (date, race_name, race_hippodrome, position, horse_name, odds)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
''', (date, race_name, hippodrome, position, horse, odds))
|
||||
saved += c.rowcount
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return saved
|
||||
|
||||
|
||||
def compare_predictions(date, race_name):
|
||||
"""
|
||||
Compare les prédictions du jour avec les résultats réels.
|
||||
Retourne un dict avec les stats de performance.
|
||||
"""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
c = conn.cursor()
|
||||
|
||||
# Récupérer toutes les prédictions du jour, puis dédoublonner en Python
|
||||
# Priorité source : bases > chances > outsiders > partants
|
||||
c.execute('''
|
||||
SELECT horse_name, prediction_rank, source
|
||||
FROM predictions
|
||||
WHERE date=? AND source LIKE 'canalturf%'
|
||||
ORDER BY prediction_rank ASC, odds ASC
|
||||
''', (date,))
|
||||
rows = c.fetchall()
|
||||
|
||||
# Dédoublonner : pour chaque cheval, garder la source la plus précise
|
||||
SOURCE_PRIORITY = {
|
||||
'canalturf_prono_bases': 1,
|
||||
'canalturf_prono_chances': 2,
|
||||
'canalturf_prono_outsiders': 3,
|
||||
'canalturf_partants': 4,
|
||||
'canalturf_selections': 5,
|
||||
}
|
||||
seen = {}
|
||||
for horse, rank, source in rows:
|
||||
prio = SOURCE_PRIORITY.get(source, 9)
|
||||
if horse not in seen or prio < SOURCE_PRIORITY.get(seen[horse][2], 9):
|
||||
seen[horse] = (horse, rank, source)
|
||||
predictions = list(seen.values())
|
||||
|
||||
# Récupérer les résultats réels
|
||||
c.execute('''
|
||||
SELECT horse_name, position
|
||||
FROM results
|
||||
WHERE date=? AND race_name LIKE ?
|
||||
ORDER BY position ASC
|
||||
''', (date, f"%{race_name[:15]}%"))
|
||||
results = {row[0]: row[1] for row in c.fetchall()}
|
||||
|
||||
if not results:
|
||||
conn.close()
|
||||
return None
|
||||
|
||||
# Top 5 réel
|
||||
top5_real = {h for h, pos in results.items() if pos <= 5}
|
||||
winner_real = next((h for h, pos in results.items() if pos == 1), None)
|
||||
|
||||
# Calcul des hits
|
||||
hits_top5 = []
|
||||
hits_winner = []
|
||||
performance_rows = []
|
||||
|
||||
for horse, pred_rank, source in predictions:
|
||||
actual_pos = results.get(horse, 99)
|
||||
hit_top5 = horse in top5_real
|
||||
hit_winner = horse == winner_real
|
||||
|
||||
if hit_top5:
|
||||
hits_top5.append(horse)
|
||||
if hit_winner:
|
||||
hits_winner.append(horse)
|
||||
|
||||
# Sauvegarder en table performance (structure existante)
|
||||
c.execute("SELECT id FROM performance WHERE prediction_date=? AND horse_name=?",
|
||||
(date, horse))
|
||||
if not c.fetchone():
|
||||
c.execute('''
|
||||
INSERT INTO performance
|
||||
(prediction_date, race_date, horse_name, predicted_rank, actual_position, hit)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
''', (date, date, horse, pred_rank, actual_pos, hit_top5))
|
||||
|
||||
performance_rows.append({
|
||||
"cheval": horse,
|
||||
"pred_rank": pred_rank,
|
||||
"actual_pos": actual_pos,
|
||||
"hit_top5": hit_top5,
|
||||
"hit_winner": hit_winner,
|
||||
"source": source
|
||||
})
|
||||
|
||||
conn.commit()
|
||||
|
||||
# Stats globales
|
||||
bases = [p for p in performance_rows if p["source"] == "canalturf_prono_bases"]
|
||||
chances = [p for p in performance_rows if p["source"] == "canalturf_prono_chances"]
|
||||
outsiders = [p for p in performance_rows if p["source"] == "canalturf_prono_outsiders"]
|
||||
partants = [p for p in performance_rows if p["source"] == "canalturf_partants"]
|
||||
|
||||
nb_pred = len(performance_rows)
|
||||
nb_top5 = len(hits_top5)
|
||||
|
||||
stats = {
|
||||
"date": date,
|
||||
"race_name": race_name,
|
||||
"total_predictions": nb_pred,
|
||||
"hits_top5": nb_top5,
|
||||
"hit_rate_top5": round(nb_top5 / nb_pred * 100, 1) if nb_pred else 0,
|
||||
"winner": winner_real,
|
||||
"winner_predicted": winner_real in [p["cheval"] for p in performance_rows],
|
||||
"bases_hit": [p["cheval"] for p in bases if p["hit_top5"]],
|
||||
"bases_miss": [p["cheval"] for p in bases if not p["hit_top5"]],
|
||||
"top5_real": sorted([(h, pos) for h, pos in results.items() if pos <= 5], key=lambda x: x[1]),
|
||||
"details": performance_rows
|
||||
}
|
||||
|
||||
conn.close()
|
||||
return stats
|
||||
|
||||
|
||||
# ============================================================
|
||||
# RAPPORT
|
||||
# ============================================================
|
||||
|
||||
def print_report(stats):
|
||||
"""Affiche un rapport détaillé en console."""
|
||||
if not stats:
|
||||
print("❌ Aucune donnée à comparer.")
|
||||
return
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"📊 BILAN QUINTÉ+ — {stats['date']}")
|
||||
print(f"🏇 {stats['race_name']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Arrivée réelle
|
||||
print(f"\n🏆 ARRIVÉE OFFICIELLE (Top 5) :")
|
||||
for horse, pos in stats["top5_real"]:
|
||||
print(f" {pos}. {horse}")
|
||||
|
||||
# Gagnant prédit ?
|
||||
winner = stats["winner"]
|
||||
if stats["winner_predicted"]:
|
||||
print(f"\n✅ GAGNANT PRÉDIT : {winner}")
|
||||
else:
|
||||
print(f"\n❌ Gagnant non prédit : {winner}")
|
||||
|
||||
# Bases
|
||||
print(f"\n⭐ BASES :")
|
||||
for h in stats["bases_hit"]:
|
||||
print(f" ✅ {h} (dans le top 5)")
|
||||
for h in stats["bases_miss"]:
|
||||
print(f" ❌ {h} (hors top 5)")
|
||||
|
||||
# Taux de réussite global
|
||||
print(f"\n📈 TAUX DE RÉUSSITE : {stats['hit_rate_top5']}% ({stats['hits_top5']}/{stats['total_predictions']} chevaux dans le top 5)")
|
||||
|
||||
# Top 5 favori (cotes les plus basses)
|
||||
partants_hits = [p for p in stats["details"] if p["source"] == "canalturf_partants" and p["hit_top5"]]
|
||||
print(f"\n💰 FAVORIS PLACÉS : {', '.join([p['cheval'] for p in partants_hits]) or 'aucun'}")
|
||||
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
|
||||
def save_report_json(stats, date):
|
||||
"""Sauvegarde le rapport en JSON pour archivage."""
|
||||
path = f"{os.environ.get('TURF_DIR', '/home/h3r7/turf_scraper')}/perf_{date.replace('-','')}.json"
|
||||
with open(path, 'w', encoding='utf-8') as f:
|
||||
json.dump(stats, f, indent=2, ensure_ascii=False)
|
||||
return path
|
||||
|
||||
|
||||
# ============================================================
|
||||
# MAIN
|
||||
# ============================================================
|
||||
|
||||
def main():
|
||||
today = datetime.now().strftime('%Y-%m-%d')
|
||||
date_pmu = datetime.now().strftime('%d%m%Y')
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"🏇 RESULTS SCRAPER — {datetime.now().strftime('%d/%m/%Y %H:%M')}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# Init BDD
|
||||
init_db_results()
|
||||
|
||||
# Récupérer le programme
|
||||
print("📡 Récupération du programme PMU...")
|
||||
try:
|
||||
reunions = get_programme(date_pmu)
|
||||
print(f" ✅ {len(reunions)} réunion(s) trouvée(s)")
|
||||
except Exception as e:
|
||||
print(f" ❌ Erreur API PMU : {e}")
|
||||
return
|
||||
|
||||
# Trouver le Quinté+
|
||||
quinte = find_quinte(reunions)
|
||||
if not quinte:
|
||||
print(" ❌ Quinté+ non trouvé dans le programme")
|
||||
return
|
||||
|
||||
num_r, num_c, libelle, hippodrome, arrivee_def = quinte
|
||||
print(f" 🏇 Quinté+ : R{num_r}C{num_c} — {libelle} ({hippodrome})")
|
||||
print(f" Arrivée définitive : {'✅ OUI' if arrivee_def else '⏳ PAS ENCORE'}")
|
||||
|
||||
if not arrivee_def:
|
||||
print("\n⚠️ La course n'est pas encore terminée. Relancez après la course.")
|
||||
return
|
||||
|
||||
# Récupérer les participants avec résultats
|
||||
print(f"\n📡 Récupération des résultats R{num_r}C{num_c}...")
|
||||
try:
|
||||
participants = get_participants(date_pmu, num_r, num_c)
|
||||
print(f" ✅ {len(participants)} participants récupérés")
|
||||
except Exception as e:
|
||||
print(f" ❌ Erreur : {e}")
|
||||
return
|
||||
|
||||
# Trier par position
|
||||
classes = sorted(
|
||||
[p for p in participants if p.get("ordreArrivee", 0) > 0],
|
||||
key=lambda x: x["ordreArrivee"]
|
||||
)
|
||||
|
||||
print(f"\n🏆 TOP 5 OFFICIEL :")
|
||||
for p in classes[:5]:
|
||||
cote = p.get("dernierRapportDirect", {}).get("rapport", "?") if p.get("dernierRapportDirect") else "?"
|
||||
print(f" {p['ordreArrivee']}. {p['nom']:<25} cote={cote}")
|
||||
|
||||
# Sauvegarder les résultats
|
||||
saved = save_results(today, libelle, hippodrome, participants)
|
||||
print(f"\n💾 {saved} résultats sauvegardés en BDD")
|
||||
|
||||
# Comparer avec les prédictions
|
||||
print(f"\n🔍 Comparaison avec les prédictions...")
|
||||
stats = compare_predictions(today, libelle)
|
||||
|
||||
if stats:
|
||||
print_report(stats)
|
||||
path = save_report_json(stats, today)
|
||||
print(f"📁 Rapport sauvegardé : {path}")
|
||||
else:
|
||||
print("⚠️ Aucune prédiction trouvée pour aujourd'hui en BDD.")
|
||||
print(" Vérifiez que multi_scraper_v5.py a bien tourné ce matin.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user