#!/usr/bin/env python3
"""
Results Scraper - API PMU officielle
Scrape les résultats réels du Quinté+, les sauvegarde en BDD
et calcule le taux de réussite des prédictions.
À lancer à 21h via cron ou OpenClaw.
"""

import requests
import sqlite3
import json
from datetime import datetime

import os; DB_PATH = os.environ.get("DB_PATH", "/home/h3r7/turf_scraper/turf.db")
HEADERS = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}

# ============================================================
# API PMU
# ============================================================

def get_programme(date_str):
    """
    Récupère le programme complet du jour via l'API PMU.
    date_str : format DDMMYYYY
    Retourne la liste des réunions avec leurs courses.
    """
    url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_str}/reunions"
    r = requests.get(url, headers=HEADERS, timeout=15)
    r.raise_for_status()
    data = r.json()
    return data.get("programme", {}).get("reunions", [])


def get_participants(date_str, num_reunion, num_course):
    """
    Récupère les participants + ordreArrivee pour une course donnée.
    ordreArrivee = position finale officielle (0 = non classé/disqualifié)
    """
    url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_str}/R{num_reunion}/C{num_course}/participants"
    r = requests.get(url, headers=HEADERS, timeout=15)
    r.raise_for_status()
    return r.json().get("participants", [])


def find_quinte(reunions):
    """
    Identifie la course Quinté+ du jour (pariMultiCourses=True ou libelle contient 'PARIS-TURF').
    Retourne (num_reunion, num_course, libelle, hippodrome) ou None.
    """
    for reunion in reunions:
        for course in reunion.get("courses", []):
            libelle = course.get("libelle", "")
            paris_types = [p["typePari"] for p in course.get("paris", [])]
            if any("QUINTE" in p for p in paris_types) or "PARIS-TURF" in libelle:
                return (
                    reunion["numOfficiel"],
                    course["numOrdre"],
                    libelle,
                    reunion["hippodrome"]["libelleCourt"],
                    course.get("arriveeDefinitive", False)
                )
    return None


# ============================================================
# BASE DE DONNÉES
# ============================================================

def init_db_results():
    """Crée les tables si elles n'existent pas encore."""
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()

    # Table results : arrivée officielle
    c.execute('''
        CREATE TABLE IF NOT EXISTS results (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            date TEXT NOT NULL,
            race_name TEXT,
            race_hippodrome TEXT,
            position INTEGER,
            horse_name TEXT,
            odds REAL,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')

    # Table performance : comparaison prédictions vs résultats
    c.execute('''
        CREATE TABLE IF NOT EXISTS performance (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            date TEXT NOT NULL,
            race_name TEXT,
            horse_name TEXT,
            predicted_rank INTEGER,
            actual_position INTEGER,
            hit_top5 BOOLEAN,
            hit_winner BOOLEAN,
            source TEXT,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')

    conn.commit()
    conn.close()


def save_results(date, race_name, hippodrome, participants):
    """Sauvegarde les positions officielles en BDD (évite les doublons)."""
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    saved = 0

    for p in participants:
        position = p.get("ordreArrivee", 0)
        if position == 0:
            continue  # Non classé / disqualifié
        horse = p.get("nom", "")
        # Cote finale (rapport direct simple gagnant)
        rapport = p.get("dernierRapportDirect", {})
        odds = rapport.get("rapport", 0.0) if rapport else 0.0

        # Vérifier si déjà inséré
        c.execute(
            "SELECT id FROM results WHERE date=? AND race_name=? AND horse_name=? AND position=?",
            (date, race_name, horse, position)
        )
        if c.fetchone():
            continue

        c.execute('''
            INSERT INTO results (date, race_name, race_hippodrome, position, horse_name, odds)
            VALUES (?, ?, ?, ?, ?, ?)
        ''', (date, race_name, hippodrome, position, horse, odds))
        saved += c.rowcount

    conn.commit()
    conn.close()
    return saved


def compare_predictions(date, race_name):
    """
    Compare les prédictions du jour avec les résultats réels.
    Retourne un dict avec les stats de performance.
    """
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()

    # Récupérer toutes les prédictions du jour, puis dédoublonner en Python
    # Priorité source : bases > chances > outsiders > partants
    c.execute('''
        SELECT horse_name, prediction_rank, source
        FROM predictions
        WHERE date=? AND source LIKE 'canalturf%'
        ORDER BY prediction_rank ASC, odds ASC
    ''', (date,))
    rows = c.fetchall()

    # Dédoublonner : pour chaque cheval, garder la source la plus précise
    SOURCE_PRIORITY = {
        'canalturf_prono_bases': 1,
        'canalturf_prono_chances': 2,
        'canalturf_prono_outsiders': 3,
        'canalturf_partants': 4,
        'canalturf_selections': 5,
    }
    seen = {}
    for horse, rank, source in rows:
        prio = SOURCE_PRIORITY.get(source, 9)
        if horse not in seen or prio < SOURCE_PRIORITY.get(seen[horse][2], 9):
            seen[horse] = (horse, rank, source)
    predictions = list(seen.values())

    # Récupérer les résultats réels
    c.execute('''
        SELECT horse_name, position
        FROM results
        WHERE date=? AND race_name LIKE ?
        ORDER BY position ASC
    ''', (date, f"%{race_name[:15]}%"))
    results = {row[0]: row[1] for row in c.fetchall()}

    if not results:
        conn.close()
        return None

    # Top 5 réel
    top5_real = {h for h, pos in results.items() if pos <= 5}
    winner_real = next((h for h, pos in results.items() if pos == 1), None)

    # Calcul des hits
    hits_top5 = []
    hits_winner = []
    performance_rows = []

    for horse, pred_rank, source in predictions:
        actual_pos = results.get(horse, 99)
        hit_top5 = horse in top5_real
        hit_winner = horse == winner_real

        if hit_top5:
            hits_top5.append(horse)
        if hit_winner:
            hits_winner.append(horse)

        # Sauvegarder en table performance (structure existante)
        c.execute("SELECT id FROM performance WHERE prediction_date=? AND horse_name=?",
                  (date, horse))
        if not c.fetchone():
            c.execute('''
                INSERT INTO performance
                (prediction_date, race_date, horse_name, predicted_rank, actual_position, hit)
                VALUES (?, ?, ?, ?, ?, ?)
            ''', (date, date, horse, pred_rank, actual_pos, hit_top5))

        performance_rows.append({
            "cheval": horse,
            "pred_rank": pred_rank,
            "actual_pos": actual_pos,
            "hit_top5": hit_top5,
            "hit_winner": hit_winner,
            "source": source
        })

    conn.commit()

    # Stats globales
    bases = [p for p in performance_rows if p["source"] == "canalturf_prono_bases"]
    chances = [p for p in performance_rows if p["source"] == "canalturf_prono_chances"]
    outsiders = [p for p in performance_rows if p["source"] == "canalturf_prono_outsiders"]
    partants = [p for p in performance_rows if p["source"] == "canalturf_partants"]

    nb_pred = len(performance_rows)
    nb_top5 = len(hits_top5)

    stats = {
        "date": date,
        "race_name": race_name,
        "total_predictions": nb_pred,
        "hits_top5": nb_top5,
        "hit_rate_top5": round(nb_top5 / nb_pred * 100, 1) if nb_pred else 0,
        "winner": winner_real,
        "winner_predicted": winner_real in [p["cheval"] for p in performance_rows],
        "bases_hit": [p["cheval"] for p in bases if p["hit_top5"]],
        "bases_miss": [p["cheval"] for p in bases if not p["hit_top5"]],
        "top5_real": sorted([(h, pos) for h, pos in results.items() if pos <= 5], key=lambda x: x[1]),
        "details": performance_rows
    }

    conn.close()
    return stats


# ============================================================
# RAPPORT
# ============================================================

def print_report(stats):
    """Affiche un rapport détaillé en console."""
    if not stats:
        print("❌ Aucune donnée à comparer.")
        return

    print(f"\n{'='*60}")
    print(f"📊 BILAN QUINTÉ+ — {stats['date']}")
    print(f"🏇 {stats['race_name']}")
    print(f"{'='*60}")

    # Arrivée réelle
    print(f"\n🏆 ARRIVÉE OFFICIELLE (Top 5) :")
    for horse, pos in stats["top5_real"]:
        print(f"   {pos}. {horse}")

    # Gagnant prédit ?
    winner = stats["winner"]
    if stats["winner_predicted"]:
        print(f"\n✅ GAGNANT PRÉDIT : {winner}")
    else:
        print(f"\n❌ Gagnant non prédit : {winner}")

    # Bases
    print(f"\n⭐ BASES :")
    for h in stats["bases_hit"]:
        print(f"   ✅ {h} (dans le top 5)")
    for h in stats["bases_miss"]:
        print(f"   ❌ {h} (hors top 5)")

    # Taux de réussite global
    print(f"\n📈 TAUX DE RÉUSSITE : {stats['hit_rate_top5']}% ({stats['hits_top5']}/{stats['total_predictions']} chevaux dans le top 5)")

    # Top 5 favori (cotes les plus basses)
    partants_hits = [p for p in stats["details"] if p["source"] == "canalturf_partants" and p["hit_top5"]]
    print(f"\n💰 FAVORIS PLACÉS : {', '.join([p['cheval'] for p in partants_hits]) or 'aucun'}")

    print(f"{'='*60}\n")


def save_report_json(stats, date):
    """Sauvegarde le rapport en JSON pour archivage."""
    path = f"{os.environ.get('TURF_DIR', '/home/h3r7/turf_scraper')}/perf_{date.replace('-','')}.json"
    with open(path, 'w', encoding='utf-8') as f:
        json.dump(stats, f, indent=2, ensure_ascii=False)
    return path


# ============================================================
# MAIN
# ============================================================

def main():
    today = datetime.now().strftime('%Y-%m-%d')
    date_pmu = datetime.now().strftime('%d%m%Y')

    print(f"\n{'='*60}")
    print(f"🏇 RESULTS SCRAPER — {datetime.now().strftime('%d/%m/%Y %H:%M')}")
    print(f"{'='*60}\n")

    # Init BDD
    init_db_results()

    # Récupérer le programme
    print("📡 Récupération du programme PMU...")
    try:
        reunions = get_programme(date_pmu)
        print(f"   ✅ {len(reunions)} réunion(s) trouvée(s)")
    except Exception as e:
        print(f"   ❌ Erreur API PMU : {e}")
        return

    # Trouver le Quinté+
    quinte = find_quinte(reunions)
    if not quinte:
        print("   ❌ Quinté+ non trouvé dans le programme")
        return

    num_r, num_c, libelle, hippodrome, arrivee_def = quinte
    print(f"   🏇 Quinté+ : R{num_r}C{num_c} — {libelle} ({hippodrome})")
    print(f"   Arrivée définitive : {'✅ OUI' if arrivee_def else '⏳ PAS ENCORE'}")

    if not arrivee_def:
        print("\n⚠️  La course n'est pas encore terminée. Relancez après la course.")
        return

    # Récupérer les participants avec résultats
    print(f"\n📡 Récupération des résultats R{num_r}C{num_c}...")
    try:
        participants = get_participants(date_pmu, num_r, num_c)
        print(f"   ✅ {len(participants)} participants récupérés")
    except Exception as e:
        print(f"   ❌ Erreur : {e}")
        return

    # Trier par position
    classes = sorted(
        [p for p in participants if p.get("ordreArrivee", 0) > 0],
        key=lambda x: x["ordreArrivee"]
    )

    print(f"\n🏆 TOP 5 OFFICIEL :")
    for p in classes[:5]:
        cote = p.get("dernierRapportDirect", {}).get("rapport", "?") if p.get("dernierRapportDirect") else "?"
        print(f"   {p['ordreArrivee']}. {p['nom']:<25} cote={cote}")

    # Sauvegarder les résultats
    saved = save_results(today, libelle, hippodrome, participants)
    print(f"\n💾 {saved} résultats sauvegardés en BDD")

    # Comparer avec les prédictions
    print(f"\n🔍 Comparaison avec les prédictions...")
    stats = compare_predictions(today, libelle)

    if stats:
        print_report(stats)
        path = save_report_json(stats, today)
        print(f"📁 Rapport sauvegardé : {path}")
    else:
        print("⚠️  Aucune prédiction trouvée pour aujourd'hui en BDD.")
        print("   Vérifiez que multi_scraper_v5.py a bien tourné ce matin.")


if __name__ == "__main__":
    main()