Merge pull request 'Sprint 6-7 — ML Upgrade: Ensemble XGBoost+LightGBM+MLP + Optuna' (#1) from feature/ml-upgrade-ensemble into master
This commit was merged in pull request #1.
This commit is contained in:
241
combined_api.py
241
combined_api.py
@@ -3519,7 +3519,6 @@ def brave_search():
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
|
||||
@app.route("/turf/api/predictions_analysis", methods=["GET"])
|
||||
def api_predictions_analysis():
|
||||
"""Analyse des predictions vs resultats reels"""
|
||||
@@ -3533,13 +3532,25 @@ def api_predictions_analysis():
|
||||
cursor = conn.cursor()
|
||||
|
||||
stats = {
|
||||
"canalturf": {"total": 0, "top1_pct": 0, "top3_pct": 0, "top5_pct": 0, "ze2_pct": 0},
|
||||
"scoring": {"total": 0, "top1_pct": 0, "top3_pct": 0, "top5_pct": 0, "ze2_pct": 0},
|
||||
"canalturf": {
|
||||
"total": 0,
|
||||
"top1_pct": 0,
|
||||
"top3_pct": 0,
|
||||
"top5_pct": 0,
|
||||
"ze2_pct": 0,
|
||||
},
|
||||
"scoring": {
|
||||
"total": 0,
|
||||
"top1_pct": 0,
|
||||
"top3_pct": 0,
|
||||
"top5_pct": 0,
|
||||
"ze2_pct": 0,
|
||||
},
|
||||
}
|
||||
|
||||
for source in ["canalturf", "scoring"]:
|
||||
pred_table = "predictions" if source == "canalturf" else "scoring"
|
||||
pred_col = "predicted_1" if source == "canalturf" else "horse_number"
|
||||
pred_col = "predicted_1" if source == "canalturf" else "horse_number"
|
||||
try:
|
||||
cursor.execute(
|
||||
f"""
|
||||
@@ -3566,16 +3577,16 @@ def api_predictions_analysis():
|
||||
top1_hit = top3_hit = 0
|
||||
total = len(races)
|
||||
for race, data in races.items():
|
||||
actual = set(data["actual"][:3])
|
||||
pred_top1 = data["predicted"][0] if data["predicted"] else None
|
||||
actual_top1 = data["actual"][0] if data["actual"] else None
|
||||
actual = set(data["actual"][:3])
|
||||
pred_top1 = data["predicted"][0] if data["predicted"] else None
|
||||
actual_top1 = data["actual"][0] if data["actual"] else None
|
||||
if pred_top1 and actual_top1 and pred_top1 == actual_top1:
|
||||
top1_hit += 1
|
||||
if len(set(data["predicted"][:3]) & actual) >= 1:
|
||||
top3_hit += 1
|
||||
|
||||
if total > 0:
|
||||
stats[source]["total"] = total
|
||||
stats[source]["total"] = total
|
||||
stats[source]["top1_pct"] = round(top1_hit / total * 100, 1)
|
||||
stats[source]["top3_pct"] = round(top3_hit / total * 100, 1)
|
||||
except Exception as e:
|
||||
@@ -3585,5 +3596,219 @@ def api_predictions_analysis():
|
||||
return jsonify({"stats": stats, "period": {"start": start_date, "end": end_date}})
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# /api/v1/predictions — Ensemble model endpoint (Sprint 6-7 ML Upgrade)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
_predict_v2 = None
|
||||
|
||||
|
||||
def _load_predict_v2():
|
||||
"""Lazy import of predict_v2 module (ensemble model)."""
|
||||
global _predict_v2
|
||||
if _predict_v2 is None:
|
||||
try:
|
||||
import importlib.util, sys
|
||||
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"predict_v2", "/home/h3r7/turf_saas/predict_v2.py"
|
||||
)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
_predict_v2 = mod
|
||||
except Exception as e:
|
||||
import logging
|
||||
|
||||
logging.error(f"[v1/predictions] predict_v2 import failed: {e}")
|
||||
return _predict_v2
|
||||
|
||||
|
||||
@app.route("/api/v1/predictions", methods=["GET"])
|
||||
@app.route("/turf/api/v1/predictions", methods=["GET"])
|
||||
def api_v1_predictions():
|
||||
"""
|
||||
Ensemble ML predictions using XGBoost + LightGBM + MLP (Optuna-tuned).
|
||||
Query params:
|
||||
- date: YYYY-MM-DD (default: today / latest available)
|
||||
- reunion: int (default: all)
|
||||
- course: int (default: all)
|
||||
"""
|
||||
import time as _time
|
||||
|
||||
t0 = _time.perf_counter()
|
||||
|
||||
mod = _load_predict_v2()
|
||||
if mod is None:
|
||||
# Graceful fallback: redirect to legacy ml_predictions
|
||||
return jsonify(
|
||||
{
|
||||
"error": "Ensemble model not available yet",
|
||||
"fallback": "/api/ml_predictions",
|
||||
"message": "Model is still training. Use /api/ml_predictions for legacy XGBoost predictions.",
|
||||
}
|
||||
), 503
|
||||
|
||||
ensemble = mod.load_ensemble()
|
||||
if ensemble is None:
|
||||
return jsonify(
|
||||
{
|
||||
"error": "Ensemble model file not found",
|
||||
"model_path": str(mod.ENSEMBLE_PATH),
|
||||
"message": "Run train_ensemble.py to generate the model.",
|
||||
"fallback": "/api/ml_predictions",
|
||||
}
|
||||
), 503
|
||||
|
||||
date_param = request.args.get("date", None)
|
||||
reunion_param = request.args.get("reunion", None)
|
||||
course_param = request.args.get("course", None)
|
||||
|
||||
conn = sqlite3.connect("/home/h3r7/turf_saas/turf.db")
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Determine date to use
|
||||
if date_param:
|
||||
date_used = date_param
|
||||
else:
|
||||
row = conn.execute(
|
||||
"SELECT MAX(date_programme) as d FROM pmu_partants"
|
||||
).fetchone()
|
||||
date_used = (
|
||||
row["d"] if row and row["d"] else datetime.now().strftime("%Y-%m-%d")
|
||||
)
|
||||
|
||||
# Build query
|
||||
where_clauses = ["p.date_programme = ?"]
|
||||
params = [date_used]
|
||||
if reunion_param:
|
||||
where_clauses.append("p.num_reunion = ?")
|
||||
params.append(int(reunion_param))
|
||||
if course_param:
|
||||
where_clauses.append("p.num_course = ?")
|
||||
params.append(int(course_param))
|
||||
|
||||
query = f"""
|
||||
SELECT p.*, c.distance, c.discipline, c.specialite,
|
||||
c.nb_declares_partants, c.montant_prix, c.penetrometre_intitule,
|
||||
c.libelle as course_libelle, c.libelle_court as hippodrome,
|
||||
c.heure_depart_str, c.parcours
|
||||
FROM pmu_partants p
|
||||
LEFT JOIN pmu_courses c ON p.date_programme = c.date_programme
|
||||
AND p.num_reunion = c.num_reunion AND p.num_course = c.num_course
|
||||
WHERE {" AND ".join(where_clauses)}
|
||||
ORDER BY p.num_reunion, p.num_course, p.num_pmu
|
||||
"""
|
||||
rows = conn.execute(query, params).fetchall()
|
||||
conn.close()
|
||||
|
||||
if not rows:
|
||||
return jsonify(
|
||||
{
|
||||
"date": date_used,
|
||||
"model_version": mod.get_model_version(),
|
||||
"predictions": [],
|
||||
"message": f"No partants found for date {date_used}",
|
||||
}
|
||||
)
|
||||
|
||||
# Convert to list of dicts
|
||||
partants = [dict(r) for r in rows]
|
||||
|
||||
# Run ensemble prediction
|
||||
preds = mod.predict_top3(partants, model=ensemble)
|
||||
|
||||
# Group by race
|
||||
races = {}
|
||||
for pred in preds:
|
||||
key = f"R{pred.get('num_reunion', 0)}C{pred.get('num_course', 0)}"
|
||||
if key not in races:
|
||||
# Find race metadata from partants
|
||||
for p in partants:
|
||||
if p.get("num_reunion") == pred.get("num_reunion") and p.get(
|
||||
"num_course"
|
||||
) == pred.get("num_course"):
|
||||
races[key] = {
|
||||
"reunion": pred.get("num_reunion"),
|
||||
"course": pred.get("num_course"),
|
||||
"label": key,
|
||||
"race_name": p.get("course_libelle", ""),
|
||||
"hippodrome": p.get("hippodrome", ""),
|
||||
"heure": p.get("heure_depart_str", ""),
|
||||
"discipline": p.get("discipline", ""),
|
||||
"distance": p.get("distance", 0),
|
||||
"horses": [],
|
||||
}
|
||||
break
|
||||
if key in races:
|
||||
races[key]["horses"].append(pred)
|
||||
|
||||
latency_ms = (_time.perf_counter() - t0) * 1000
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"date": date_used,
|
||||
"model_version": mod.get_model_version(),
|
||||
"latency_ms": round(latency_ms, 1),
|
||||
"total_horses": len(preds),
|
||||
"races": list(races.values()),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.route("/api/v1/model/invalidate-cache", methods=["POST"])
|
||||
@app.route("/turf/api/v1/model/invalidate-cache", methods=["POST"])
|
||||
def api_v1_invalidate_cache():
|
||||
"""Force reload of ensemble model on next prediction call."""
|
||||
mod = _load_predict_v2()
|
||||
if mod:
|
||||
mod.invalidate_model_cache()
|
||||
return jsonify({"status": "ok", "message": "Model cache invalidated"})
|
||||
return jsonify({"status": "error", "message": "predict_v2 module not loaded"}), 500
|
||||
|
||||
|
||||
@app.route("/api/v1/model/status", methods=["GET"])
|
||||
@app.route("/turf/api/v1/model/status", methods=["GET"])
|
||||
def api_v1_model_status():
|
||||
"""Return ensemble model status and version."""
|
||||
import os as _os
|
||||
from pathlib import Path as _Path
|
||||
|
||||
ensemble_path = _Path("/home/h3r7/turf_saas/models/ensemble_top3.pkl")
|
||||
benchmark_path = _Path("/home/h3r7/turf_saas/models/benchmark_report.json")
|
||||
|
||||
status = {
|
||||
"ensemble_available": ensemble_path.exists(),
|
||||
"ensemble_path": str(ensemble_path),
|
||||
}
|
||||
if ensemble_path.exists():
|
||||
mtime = _os.path.getmtime(str(ensemble_path))
|
||||
status["last_trained"] = datetime.fromtimestamp(mtime).isoformat()
|
||||
|
||||
if benchmark_path.exists():
|
||||
try:
|
||||
with open(benchmark_path) as f:
|
||||
import json as _json
|
||||
|
||||
report = _json.load(f)
|
||||
status["benchmark"] = {
|
||||
"baseline_precision_at3": report.get("baseline", {}).get(
|
||||
"precision_at3"
|
||||
),
|
||||
"ensemble_precision_at3": report.get("ensemble", {}).get(
|
||||
"precision_at3"
|
||||
),
|
||||
"delta": report.get("delta_precision_at3"),
|
||||
"deployed": report.get("deploy"),
|
||||
"run_date": report.get("run_date"),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
mod = _load_predict_v2()
|
||||
if mod and ensemble_path.exists():
|
||||
status["model_version"] = mod.get_model_version()
|
||||
|
||||
return jsonify(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=8790, debug=False)
|
||||
|
||||
174
models/benchmark_report.json
Normal file
174
models/benchmark_report.json
Normal file
@@ -0,0 +1,174 @@
|
||||
{
|
||||
"run_date": "2026-04-25T19:09:46.629142",
|
||||
"dataset": {
|
||||
"db_path": "/home/h3r7/turf_saas/turf.db",
|
||||
"total_rows": 10899,
|
||||
"train_rows": 8719,
|
||||
"holdout_rows": 2180,
|
||||
"train_date_range": [
|
||||
"2026-03-31",
|
||||
"2026-04-19"
|
||||
],
|
||||
"holdout_date_range": [
|
||||
"2026-04-19",
|
||||
"2026-04-24"
|
||||
]
|
||||
},
|
||||
"baseline": {
|
||||
"model": "XGBoost (baseline)",
|
||||
"precision_at3": 0.5286821705426358,
|
||||
"auc": 0.7254057665061495
|
||||
},
|
||||
"individual_models": {
|
||||
"xgboost": {
|
||||
"model": "xgboost",
|
||||
"auc": 0.7856,
|
||||
"accuracy": 0.6917,
|
||||
"precision": 0.4865,
|
||||
"recall": 0.7229,
|
||||
"precision_at3": 0.5783,
|
||||
"latency_ms_per_row": 0.0112
|
||||
},
|
||||
"lightgbm": {
|
||||
"model": "lightgbm",
|
||||
"auc": 0.7833,
|
||||
"accuracy": 0.6995,
|
||||
"precision": 0.4951,
|
||||
"recall": 0.709,
|
||||
"precision_at3": 0.5736,
|
||||
"latency_ms_per_row": 0.0041
|
||||
},
|
||||
"mlp": {
|
||||
"model": "mlp",
|
||||
"auc": 0.7743,
|
||||
"accuracy": 0.7445,
|
||||
"precision": 0.5743,
|
||||
"recall": 0.5325,
|
||||
"precision_at3": 0.5643,
|
||||
"latency_ms_per_row": 0.0052
|
||||
}
|
||||
},
|
||||
"ensemble": {
|
||||
"model": "ensemble",
|
||||
"auc": 0.784,
|
||||
"accuracy": 0.7147,
|
||||
"precision": 0.5142,
|
||||
"recall": 0.6718,
|
||||
"precision_at3": 0.5814,
|
||||
"latency_ms_per_row": 0.0208
|
||||
},
|
||||
"delta_precision_at3": 0.0527,
|
||||
"deploy": true,
|
||||
"optuna": {
|
||||
"n_trials": 100,
|
||||
"xgboost_best_params": {
|
||||
"n_estimators": 141,
|
||||
"max_depth": 5,
|
||||
"learning_rate": 0.016298172447266404,
|
||||
"subsample": 0.7660470794373848,
|
||||
"colsample_bytree": 0.471124415020467,
|
||||
"min_child_weight": 14,
|
||||
"reg_alpha": 1.9364166463791586,
|
||||
"reg_lambda": 6.018030083488602,
|
||||
"gamma": 4.614943551368141
|
||||
},
|
||||
"lightgbm_best_params": {
|
||||
"n_estimators": 186,
|
||||
"max_depth": 4,
|
||||
"learning_rate": 0.012915117465216954,
|
||||
"num_leaves": 141,
|
||||
"subsample": 0.6193119116922561,
|
||||
"colsample_bytree": 0.539310022549326,
|
||||
"min_child_samples": 9,
|
||||
"reg_alpha": 0.6864583098112754,
|
||||
"reg_lambda": 0.0549259590914184
|
||||
}
|
||||
},
|
||||
"features": {
|
||||
"total": 43,
|
||||
"selected_by_shap": 31,
|
||||
"feature_list": [
|
||||
"age",
|
||||
"sexe_enc",
|
||||
"nombre_courses",
|
||||
"nombre_victoires",
|
||||
"nombre_places",
|
||||
"tx_victoire",
|
||||
"tx_place",
|
||||
"forme_recente",
|
||||
"tendance_num",
|
||||
"gains_annee_en_cours",
|
||||
"cote_direct",
|
||||
"cote_reference",
|
||||
"distance",
|
||||
"nb_partants",
|
||||
"discipline_enc",
|
||||
"specialite_enc",
|
||||
"oeilleres_enc",
|
||||
"tendance_cote_enc",
|
||||
"penetrometre_intitule_enc",
|
||||
"form_1",
|
||||
"form_2",
|
||||
"form_3",
|
||||
"form_4",
|
||||
"form_5",
|
||||
"form_weighted",
|
||||
"form_avg",
|
||||
"form_best",
|
||||
"form_worst",
|
||||
"win_ratio",
|
||||
"place_ratio",
|
||||
"implied_prob",
|
||||
"win_rate_adj",
|
||||
"place_rate_adj",
|
||||
"earnings_per_race",
|
||||
"cote_diff",
|
||||
"cote_ratio",
|
||||
"rang_cote",
|
||||
"ratio_cote_field",
|
||||
"distance_cat",
|
||||
"age_win_interact",
|
||||
"is_favorite",
|
||||
"poids",
|
||||
"prize_norm"
|
||||
],
|
||||
"shap_selected": [
|
||||
"rang_cote",
|
||||
"implied_prob",
|
||||
"cote_direct",
|
||||
"ratio_cote_field",
|
||||
"nb_partants",
|
||||
"cote_diff",
|
||||
"cote_ratio",
|
||||
"specialite_enc",
|
||||
"earnings_per_race",
|
||||
"nombre_courses",
|
||||
"cote_reference",
|
||||
"distance",
|
||||
"discipline_enc",
|
||||
"is_favorite",
|
||||
"prize_norm",
|
||||
"win_ratio",
|
||||
"place_rate_adj",
|
||||
"gains_annee_en_cours",
|
||||
"poids",
|
||||
"tx_place",
|
||||
"penetrometre_intitule_enc",
|
||||
"age_win_interact",
|
||||
"nombre_places",
|
||||
"tendance_num",
|
||||
"age",
|
||||
"form_avg",
|
||||
"form_weighted",
|
||||
"place_ratio",
|
||||
"form_3",
|
||||
"oeilleres_enc",
|
||||
"form_5"
|
||||
]
|
||||
},
|
||||
"ensemble_weights": {
|
||||
"xgboost": 0.23161801824035544,
|
||||
"lightgbm": 0.23415467282905,
|
||||
"mlp": 0.21290370528252356
|
||||
}
|
||||
}
|
||||
68
models/benchmark_report.md
Normal file
68
models/benchmark_report.md
Normal file
@@ -0,0 +1,68 @@
|
||||
# Benchmark ML Ensemble — Turf Prédictions
|
||||
|
||||
**Date:** 2026-04-25
|
||||
**Dataset:** 10,899 partants
|
||||
**Holdout:** 2,180 lignes (2026-04-19 → 2026-04-24)
|
||||
|
||||
## Résultats
|
||||
|
||||
| Modèle | Precision@3 | AUC | Latence/prédiction |
|
||||
|--------|-------------|-----|-------------------|
|
||||
| XGBoost (baseline) | 0.5287 | 0.7254 | — |
|
||||
| xgboost | 0.5783 | 0.7856 | 0.01 ms |
|
||||
| lightgbm | 0.5736 | 0.7833 | 0.00 ms |
|
||||
| mlp | 0.5643 | 0.7743 | 0.01 ms |
|
||||
| **Ensemble** | **0.5814** | **0.7840** | **0.02 ms** |
|
||||
|
||||
## Décision de déploiement
|
||||
|
||||
- Delta Precision@3 : **+0.0527** (+5.3%)
|
||||
- Seuil requis : **+5%**
|
||||
- Résultat : **✅ DEPLOIEMENT RECOMMANDE**
|
||||
|
||||
## Optimisation Optuna
|
||||
|
||||
- Trials XGBoost : 100
|
||||
- Trials LightGBM : 100
|
||||
- Pruning : MedianPruner
|
||||
|
||||
### Meilleurs hyperparamètres XGBoost
|
||||
```json
|
||||
{
|
||||
"n_estimators": 141,
|
||||
"max_depth": 5,
|
||||
"learning_rate": 0.016298172447266404,
|
||||
"subsample": 0.7660470794373848,
|
||||
"colsample_bytree": 0.471124415020467,
|
||||
"min_child_weight": 14,
|
||||
"reg_alpha": 1.9364166463791586,
|
||||
"reg_lambda": 6.018030083488602,
|
||||
"gamma": 4.614943551368141
|
||||
}
|
||||
```
|
||||
|
||||
### Meilleurs hyperparamètres LightGBM
|
||||
```json
|
||||
{
|
||||
"n_estimators": 186,
|
||||
"max_depth": 4,
|
||||
"learning_rate": 0.012915117465216954,
|
||||
"num_leaves": 141,
|
||||
"subsample": 0.6193119116922561,
|
||||
"colsample_bytree": 0.539310022549326,
|
||||
"min_child_samples": 9,
|
||||
"reg_alpha": 0.6864583098112754,
|
||||
"reg_lambda": 0.0549259590914184
|
||||
}
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
- Total features : 43
|
||||
- Retenues par SHAP : 31
|
||||
|
||||
## Poids de l'ensemble
|
||||
|
||||
- xgboost : 0.2316
|
||||
- lightgbm : 0.2342
|
||||
- mlp : 0.2129
|
||||
387
predict_v2.py
Normal file
387
predict_v2.py
Normal file
@@ -0,0 +1,387 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Ensemble prediction module for /api/v1/predictions.
|
||||
|
||||
Loads the trained ensemble model and provides a high-level predict_top3()
|
||||
function compatible with the existing combined_api.py interface.
|
||||
|
||||
Cache: model is loaded once at import time (or on first call).
|
||||
Invalidation: reload if models/ensemble_top3.pkl mtime changes.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MODELS_DIR = Path("/home/h3r7/turf_saas/models")
|
||||
ENSEMBLE_PATH = MODELS_DIR / "ensemble_top3.pkl"
|
||||
|
||||
# ── Cache ─────────────────────────────────────────────────────────────────────
|
||||
_model_cache = {
|
||||
"ensemble": None,
|
||||
"mtime": None,
|
||||
"lock": threading.Lock(),
|
||||
}
|
||||
|
||||
# ── Feature list (must match train_ensemble.py FEATURE_COLS) ─────────────────
|
||||
FEATURE_COLS = [
|
||||
"age",
|
||||
"sexe_enc",
|
||||
"nombre_courses",
|
||||
"nombre_victoires",
|
||||
"nombre_places",
|
||||
"tx_victoire",
|
||||
"tx_place",
|
||||
"forme_recente",
|
||||
"tendance_num",
|
||||
"gains_annee_en_cours",
|
||||
"cote_direct",
|
||||
"cote_reference",
|
||||
"distance",
|
||||
"nb_partants",
|
||||
"discipline_enc",
|
||||
"specialite_enc",
|
||||
"oeilleres_enc",
|
||||
"tendance_cote_enc",
|
||||
"penetrometre_intitule_enc",
|
||||
"form_1",
|
||||
"form_2",
|
||||
"form_3",
|
||||
"form_4",
|
||||
"form_5",
|
||||
"form_weighted",
|
||||
"form_avg",
|
||||
"form_best",
|
||||
"form_worst",
|
||||
"win_ratio",
|
||||
"place_ratio",
|
||||
"implied_prob",
|
||||
"win_rate_adj",
|
||||
"place_rate_adj",
|
||||
"earnings_per_race",
|
||||
"cote_diff",
|
||||
"cote_ratio",
|
||||
"rang_cote",
|
||||
"ratio_cote_field",
|
||||
"distance_cat",
|
||||
"age_win_interact",
|
||||
"is_favorite",
|
||||
"poids",
|
||||
"prize_norm",
|
||||
]
|
||||
|
||||
|
||||
# ── Encoders (built per-prediction batch for live data) ──────────────────────
|
||||
def _fit_encoder(values, default):
|
||||
le = LabelEncoder()
|
||||
unique = list(set(str(v) if v else default for v in values)) + [default]
|
||||
le.fit(unique)
|
||||
return le
|
||||
|
||||
|
||||
def _safe_transform(le: LabelEncoder, value, default: str):
|
||||
v = str(value) if value else default
|
||||
if v not in le.classes_:
|
||||
v = default
|
||||
return int(le.transform([v])[0])
|
||||
|
||||
|
||||
# ── Model loading with auto-invalidation ─────────────────────────────────────
|
||||
def load_ensemble(force: bool = False) -> Optional[object]:
|
||||
"""Load ensemble model, reload if file changed."""
|
||||
with _model_cache["lock"]:
|
||||
if not ENSEMBLE_PATH.exists():
|
||||
return None
|
||||
mtime = ENSEMBLE_PATH.stat().st_mtime
|
||||
if force or _model_cache["ensemble"] is None or mtime != _model_cache["mtime"]:
|
||||
try:
|
||||
with open(ENSEMBLE_PATH, "rb") as f:
|
||||
_model_cache["ensemble"] = pickle.load(f)
|
||||
_model_cache["mtime"] = mtime
|
||||
logger.info(f"[predict_v2] Loaded ensemble model from {ENSEMBLE_PATH}")
|
||||
except Exception as e:
|
||||
logger.error(f"[predict_v2] Failed to load ensemble: {e}")
|
||||
return None
|
||||
return _model_cache["ensemble"]
|
||||
|
||||
|
||||
def invalidate_model_cache():
|
||||
"""Force reload on next prediction call."""
|
||||
with _model_cache["lock"]:
|
||||
_model_cache["mtime"] = None
|
||||
|
||||
|
||||
# ── Feature engineering for live pmu_partants rows ───────────────────────────
|
||||
def _parse_musique(musique) -> list:
|
||||
if not musique or pd.isna(str(musique)):
|
||||
return [0, 0, 0, 0, 0]
|
||||
try:
|
||||
clean = re.sub(r"\(\d+\)", "", str(musique))
|
||||
numbers = re.findall(r"\d+", clean)
|
||||
result = [int(n) for n in numbers[:5]]
|
||||
result += [0] * (5 - len(result))
|
||||
return result[:5]
|
||||
except Exception:
|
||||
return [0, 0, 0, 0, 0]
|
||||
|
||||
|
||||
def build_feature_df(partants: list) -> pd.DataFrame:
|
||||
"""
|
||||
Convert a list of pmu_partants dicts to a feature DataFrame.
|
||||
|
||||
Expected keys (same as pmu_partants columns):
|
||||
date_programme, num_reunion, num_course, num_pmu,
|
||||
age, sexe, musique, nombre_courses, nombre_victoires, nombre_places,
|
||||
gains_annee_en_cours, handicap_poids, oeilleres, cote_direct,
|
||||
cote_reference, tendance_cote, favoris, tx_victoire, tx_place,
|
||||
forme_recente, tendance_forme, indicateur_inedit,
|
||||
distance, discipline, specialite, nb_declares_partants,
|
||||
montant_prix, penetrometre_intitule
|
||||
"""
|
||||
if not partants:
|
||||
return pd.DataFrame()
|
||||
|
||||
df = pd.DataFrame(partants)
|
||||
|
||||
# ── Categorical encoders fitted on this batch ─────────────────────────────
|
||||
le_sexe = _fit_encoder(df.get("sexe", ["U"]), "U")
|
||||
le_oeilleres = _fit_encoder(df.get("oeilleres", ["SANS"]), "SANS")
|
||||
le_discipline = _fit_encoder(df.get("discipline", ["UNKNOWN"]), "UNKNOWN")
|
||||
le_specialite = _fit_encoder(df.get("specialite", ["UNKNOWN"]), "UNKNOWN")
|
||||
le_tendance = _fit_encoder(df.get("tendance_cote", ["STABLE"]), "STABLE")
|
||||
le_penet = _fit_encoder(df.get("penetrometre_intitule", ["BON"]), "BON")
|
||||
|
||||
df["sexe_enc"] = df["sexe"].apply(lambda v: _safe_transform(le_sexe, v, "U"))
|
||||
df["oeilleres_enc"] = df["oeilleres"].apply(
|
||||
lambda v: _safe_transform(le_oeilleres, v, "SANS")
|
||||
)
|
||||
df["discipline_enc"] = df.get("discipline", pd.Series(["UNKNOWN"] * len(df))).apply(
|
||||
lambda v: _safe_transform(le_discipline, v, "UNKNOWN")
|
||||
)
|
||||
df["specialite_enc"] = df.get("specialite", pd.Series(["UNKNOWN"] * len(df))).apply(
|
||||
lambda v: _safe_transform(le_specialite, v, "UNKNOWN")
|
||||
)
|
||||
df["tendance_cote_enc"] = df.get(
|
||||
"tendance_cote", pd.Series(["STABLE"] * len(df))
|
||||
).apply(lambda v: _safe_transform(le_tendance, v, "STABLE"))
|
||||
df["penetrometre_intitule_enc"] = df.get(
|
||||
"penetrometre_intitule", pd.Series(["BON"] * len(df))
|
||||
).apply(lambda v: _safe_transform(le_penet, v, "BON"))
|
||||
|
||||
# ── Musique ────────────────────────────────────────────────────────────────
|
||||
music_parsed = df["musique"].apply(_parse_musique)
|
||||
for i in range(5):
|
||||
df[f"form_{i + 1}"] = music_parsed.apply(lambda x: x[i])
|
||||
weights = np.array([0.4, 0.25, 0.15, 0.12, 0.08])
|
||||
df["form_weighted"] = music_parsed.apply(
|
||||
lambda x: sum(w * v for w, v in zip(weights, x))
|
||||
)
|
||||
df["form_avg"] = music_parsed.apply(np.mean)
|
||||
df["form_best"] = music_parsed.apply(min)
|
||||
df["form_worst"] = music_parsed.apply(max)
|
||||
|
||||
# ── Numeric features ───────────────────────────────────────────────────────
|
||||
for col in [
|
||||
"nombre_courses",
|
||||
"nombre_victoires",
|
||||
"nombre_places",
|
||||
"tx_victoire",
|
||||
"tx_place",
|
||||
"forme_recente",
|
||||
"tendance_forme",
|
||||
"gains_annee_en_cours",
|
||||
"cote_direct",
|
||||
"cote_reference",
|
||||
"distance",
|
||||
"handicap_poids",
|
||||
"age",
|
||||
"montant_prix",
|
||||
"nb_declares_partants",
|
||||
]:
|
||||
if col not in df.columns:
|
||||
df[col] = 0.0
|
||||
df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
|
||||
|
||||
df["tendance_num"] = df["tendance_forme"].fillna(0)
|
||||
df["win_ratio"] = df["nombre_victoires"] / df["nombre_courses"].replace(0, 1)
|
||||
df["place_ratio"] = df["nombre_places"] / df["nombre_courses"].replace(0, 1)
|
||||
df["implied_prob"] = 1.0 / df["cote_direct"].replace(0, np.nan)
|
||||
df["win_rate_adj"] = df["tx_victoire"] * np.log1p(df["nombre_courses"])
|
||||
df["place_rate_adj"] = df["tx_place"] * np.log1p(df["nombre_courses"])
|
||||
df["earnings_per_race"] = df["gains_annee_en_cours"] / df["nombre_courses"].replace(
|
||||
0, 1
|
||||
)
|
||||
df["cote_diff"] = (df["cote_direct"] - df["cote_reference"]).fillna(0)
|
||||
df["cote_ratio"] = (
|
||||
df["cote_direct"] / df["cote_reference"].replace(0, np.nan)
|
||||
).fillna(1)
|
||||
|
||||
# ── Per-race rank features ─────────────────────────────────────────────────
|
||||
if "num_reunion" in df.columns and "num_course" in df.columns:
|
||||
grp = ["date_programme", "num_reunion", "num_course"]
|
||||
# Some fields may be missing
|
||||
for g in grp:
|
||||
if g not in df.columns:
|
||||
df[g] = 0
|
||||
df["rang_cote"] = df.groupby(grp)["cote_direct"].rank(
|
||||
method="min", na_option="bottom"
|
||||
)
|
||||
race_mean = df.groupby(grp)["cote_direct"].transform("mean")
|
||||
df["ratio_cote_field"] = df["cote_direct"] / race_mean.replace(0, np.nan)
|
||||
df["nb_partants"] = df.groupby(grp)["cote_direct"].transform("count")
|
||||
else:
|
||||
df["rang_cote"] = 1.0
|
||||
df["ratio_cote_field"] = 1.0
|
||||
df["nb_partants"] = df.get("nb_declares_partants", pd.Series([10] * len(df)))
|
||||
|
||||
df["distance_cat"] = pd.cut(
|
||||
df["distance"].fillna(1600),
|
||||
bins=[0, 1400, 1800, 2200, 2600, 10000],
|
||||
labels=[1, 2, 3, 4, 5],
|
||||
).astype(float)
|
||||
df["age_win_interact"] = df["age"] * df["tx_victoire"]
|
||||
df["is_favorite"] = (
|
||||
df.get("favoris", pd.Series([0] * len(df))).fillna(0).astype(int)
|
||||
)
|
||||
df["poids"] = df["handicap_poids"].fillna(60)
|
||||
df["prize_norm"] = np.log1p(df["montant_prix"].fillna(0))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
# ── Main prediction function ───────────────────────────────────────────────────
|
||||
def predict_top3(partants: list, model=None) -> list:
|
||||
"""
|
||||
Given a list of partant dicts (from pmu_partants), return predictions.
|
||||
|
||||
Returns list of {horse_name, num_pmu, prob_top3, prob_top1_approx, ...}
|
||||
sorted by prob_top3 descending.
|
||||
|
||||
Falls back to empty list if model not available.
|
||||
"""
|
||||
t_start = time.perf_counter()
|
||||
|
||||
if model is None:
|
||||
model = load_ensemble()
|
||||
if model is None:
|
||||
logger.warning("[predict_v2] Ensemble model not available — no predictions")
|
||||
return []
|
||||
|
||||
df = build_feature_df(partants)
|
||||
if df.empty:
|
||||
return []
|
||||
|
||||
available = [c for c in FEATURE_COLS if c in df.columns]
|
||||
X = df[available].fillna(0)
|
||||
|
||||
try:
|
||||
proba = model.predict_proba(X)[:, 1]
|
||||
except Exception as e:
|
||||
logger.error(f"[predict_v2] predict_proba failed: {e}")
|
||||
return []
|
||||
|
||||
latency_ms = (time.perf_counter() - t_start) * 1000
|
||||
|
||||
results = []
|
||||
for i, (p, row) in enumerate(zip(proba, partants)):
|
||||
results.append(
|
||||
{
|
||||
"horse_name": row.get("nom", row.get("horse_name", f"H{i}")),
|
||||
"num_pmu": row.get("num_pmu", i + 1),
|
||||
"num_reunion": row.get("num_reunion"),
|
||||
"num_course": row.get("num_course"),
|
||||
"prob_top3": round(float(p) * 100, 1),
|
||||
# approx top1 from top3 score (divide by ~2.5 empirically)
|
||||
"prob_top1": round(float(p) / 2.5 * 100, 1),
|
||||
"ml_score": round(float(p) * 100, 1),
|
||||
"recommendation": "top3"
|
||||
if p >= 0.40
|
||||
else ("watch" if p >= 0.28 else "pass"),
|
||||
"is_value_bet": int(
|
||||
p >= 0.35 and float(row.get("cote_direct", 0) or 0) > 10
|
||||
),
|
||||
"model_version": getattr(model, "version", "ensemble_v1"),
|
||||
}
|
||||
)
|
||||
|
||||
results.sort(key=lambda x: x["prob_top3"], reverse=True)
|
||||
|
||||
# Mark top-3 predicted
|
||||
for i, r in enumerate(results[:3]):
|
||||
r["predicted_rank"] = i + 1
|
||||
|
||||
if results:
|
||||
logger.info(
|
||||
f"[predict_v2] {len(results)} horses predicted in {latency_ms:.1f} ms "
|
||||
f"({latency_ms / len(results):.2f} ms/horse)"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ── API-compatible wrapper keeping model_version & structure ──────────────────
|
||||
def get_model_version() -> str:
|
||||
m = load_ensemble()
|
||||
if m is None:
|
||||
return "ensemble_v1_not_loaded"
|
||||
return getattr(m, "version", "ensemble_v1")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Quick self-test
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect("/home/h3r7/turf_saas/turf.db")
|
||||
rows = conn.execute(
|
||||
"""SELECT p.*, c.distance, c.discipline, c.specialite,
|
||||
c.nb_declares_partants, c.montant_prix, c.penetrometre_intitule
|
||||
FROM pmu_partants p
|
||||
LEFT JOIN pmu_courses c ON p.date_programme=c.date_programme
|
||||
AND p.num_reunion=c.num_reunion AND p.num_course=c.num_course
|
||||
WHERE p.date_programme=(SELECT MAX(date_programme) FROM pmu_partants)
|
||||
AND p.num_reunion=1 AND p.num_course=1
|
||||
LIMIT 20"""
|
||||
).fetchall()
|
||||
conn.close()
|
||||
|
||||
if not rows:
|
||||
print("No data found for self-test")
|
||||
else:
|
||||
cols = [d[0] for d in conn.description] if hasattr(conn, "description") else []
|
||||
# Fallback column list
|
||||
import sqlite3 as sq3
|
||||
|
||||
conn2 = sq3.connect("/home/h3r7/turf_saas/turf.db")
|
||||
cur = conn2.execute(
|
||||
"""SELECT p.*, c.distance, c.discipline, c.specialite,
|
||||
c.nb_declares_partants, c.montant_prix, c.penetrometre_intitule
|
||||
FROM pmu_partants p
|
||||
LEFT JOIN pmu_courses c ON p.date_programme=c.date_programme
|
||||
AND p.num_reunion=c.num_reunion AND p.num_course=c.num_course
|
||||
WHERE p.date_programme=(SELECT MAX(date_programme) FROM pmu_partants)
|
||||
AND p.num_reunion=1 AND p.num_course=1
|
||||
LIMIT 20"""
|
||||
)
|
||||
cols = [d[0] for d in cur.description]
|
||||
rows2 = cur.fetchall()
|
||||
conn2.close()
|
||||
|
||||
partants = [dict(zip(cols, row)) for row in rows2]
|
||||
preds = predict_top3(partants)
|
||||
print(f"Self-test: {len(preds)} predictions")
|
||||
for p in preds[:5]:
|
||||
print(
|
||||
f" {p['horse_name']:20s} prob_top3={p['prob_top3']}% rec={p['recommendation']}"
|
||||
)
|
||||
12
pytest.ini
Normal file
12
pytest.ini
Normal file
@@ -0,0 +1,12 @@
|
||||
[pytest]
|
||||
asyncio_mode = auto
|
||||
testpaths = tests
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
addopts = --tb=short -v
|
||||
markers =
|
||||
e2e: Tests End-to-End Playwright
|
||||
load: Tests de charge Locust
|
||||
security: Tests de sécurité
|
||||
smoke: Tests rapides de smoke
|
||||
182
rebuild_ensemble.py
Normal file
182
rebuild_ensemble.py
Normal file
@@ -0,0 +1,182 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Rebuild ensemble using known best Optuna params (from completed study).
|
||||
Skips the 100-trial Optuna search and goes straight to training + pickling.
|
||||
"""
|
||||
import sys
|
||||
sys.path.insert(0, '/home/h3r7/turf_saas')
|
||||
|
||||
from train_ensemble import (
|
||||
load_data, engineer_features, temporal_split, get_features_and_target,
|
||||
evaluate_baseline, train_xgboost, train_lightgbm, train_mlp,
|
||||
shap_feature_selection, compute_ensemble_weights,
|
||||
evaluate_model, compute_precision_at3, TurfEnsemble,
|
||||
MODELS_DIR, DEPLOY_THRESHOLD, _write_markdown_report
|
||||
)
|
||||
import json, pickle, numpy as np
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
DB_PATH = '/home/h3r7/turf_saas/turf.db'
|
||||
|
||||
# Best params from the 100-trial Optuna run
|
||||
XGB_BEST = {
|
||||
'n_estimators': 141, 'max_depth': 5,
|
||||
'learning_rate': 0.016298172447266404,
|
||||
'subsample': 0.7660470794373848,
|
||||
'colsample_bytree': 0.471124415020467,
|
||||
'min_child_weight': 14,
|
||||
'reg_alpha': 1.9364166463791586,
|
||||
'reg_lambda': 6.018030083488602,
|
||||
'gamma': 4.614943551368141,
|
||||
}
|
||||
LGB_BEST = {
|
||||
'n_estimators': 186, 'max_depth': 4,
|
||||
'learning_rate': 0.012915117465216954,
|
||||
'num_leaves': 141,
|
||||
'subsample': 0.6193119116922561,
|
||||
'colsample_bytree': 0.539310022549326,
|
||||
'min_child_samples': 9,
|
||||
'reg_alpha': 0.6864583098112754,
|
||||
'reg_lambda': 0.0549259590914184,
|
||||
}
|
||||
|
||||
print("=" * 65)
|
||||
print("TURF ENSEMBLE REBUILD (using pre-computed Optuna params)")
|
||||
print("=" * 65)
|
||||
|
||||
print("\n[1/7] Loading data...")
|
||||
df = load_data(DB_PATH)
|
||||
df = engineer_features(df)
|
||||
|
||||
print("\n[2/7] Temporal split...")
|
||||
train_df, holdout_df = temporal_split(df)
|
||||
X_train, y_train, feat_cols = get_features_and_target(train_df)
|
||||
X_holdout, y_holdout, _ = get_features_and_target(holdout_df)
|
||||
|
||||
n = len(X_train); n_val = int(n * 0.15)
|
||||
X_tr = X_train.iloc[:n-n_val]; y_tr = y_train.iloc[:n-n_val]
|
||||
X_val = X_train.iloc[n-n_val:]; y_val = y_train.iloc[n-n_val:]
|
||||
|
||||
print("\n[3/7] Evaluating baseline XGBoost...")
|
||||
baseline = evaluate_baseline(holdout_df, '/home/h3r7/turf_saas/xgboost_models.pkl')
|
||||
print(f" Baseline P@3={baseline['precision_at3']:.4f} AUC={baseline['auc']:.4f}")
|
||||
|
||||
print("\n[4/7] Training models with best params...")
|
||||
print(" XGBoost...")
|
||||
xgb_model = train_xgboost(X_tr, y_tr, XGB_BEST)
|
||||
print(" LightGBM...")
|
||||
lgb_model = train_lightgbm(X_tr, y_tr, LGB_BEST)
|
||||
print(" MLP...")
|
||||
mlp_model = train_mlp(X_tr.values, y_tr)
|
||||
|
||||
print("\n[5/7] SHAP analysis...")
|
||||
selected_features, shap_df = shap_feature_selection(xgb_model, X_tr)
|
||||
|
||||
print("\n[6/7] Computing ensemble weights...")
|
||||
class WrappedMLP:
|
||||
def __init__(self, pipeline, cols):
|
||||
self.pipeline = pipeline
|
||||
self.feature_cols = cols
|
||||
def predict_proba(self, X):
|
||||
import pandas as pd
|
||||
available = [c for c in self.feature_cols if c in X.columns]
|
||||
return self.pipeline.predict_proba(X[available].values)
|
||||
|
||||
class WrappedTree:
|
||||
def __init__(self, model, cols):
|
||||
self.model = model
|
||||
self.feature_cols = cols
|
||||
def predict_proba(self, X):
|
||||
available = [c for c in self.feature_cols if c in X.columns]
|
||||
return self.model.predict_proba(X[available])
|
||||
|
||||
wrapped_xgb = WrappedTree(xgb_model, feat_cols)
|
||||
wrapped_lgb = WrappedTree(lgb_model, feat_cols)
|
||||
wrapped_mlp = WrappedMLP(mlp_model, feat_cols)
|
||||
model_dict = {'xgboost': wrapped_xgb, 'lightgbm': wrapped_lgb, 'mlp': wrapped_mlp}
|
||||
|
||||
weights = compute_ensemble_weights(model_dict, X_val, y_val, feat_cols)
|
||||
print(" Weights:", weights)
|
||||
|
||||
print("\n[7/7] Evaluating + saving ensemble...")
|
||||
ensemble = TurfEnsemble(xgb_model, lgb_model, mlp_model, weights, feat_cols)
|
||||
|
||||
results = {}
|
||||
for name, wrapped in model_dict.items():
|
||||
res = evaluate_model(wrapped, X_holdout, y_holdout, holdout_df, name)
|
||||
results[name] = res
|
||||
print(f" {name:12s} P@3={res['precision_at3']:.4f} AUC={res['auc']:.4f}")
|
||||
|
||||
ens_res = evaluate_model(ensemble, X_holdout, y_holdout, holdout_df, "ensemble")
|
||||
results["ensemble"] = ens_res
|
||||
print(f" {'ensemble':12s} P@3={ens_res['precision_at3']:.4f} AUC={ens_res['auc']:.4f}")
|
||||
|
||||
delta = ens_res['precision_at3'] - baseline['precision_at3']
|
||||
deploy = delta >= DEPLOY_THRESHOLD
|
||||
print(f"\n Delta: {delta:+.4f} ({delta*100:+.1f}%) Deploy={'YES' if deploy else 'NO'}")
|
||||
|
||||
# Save ensemble
|
||||
ensemble_path = MODELS_DIR / "ensemble_top3.pkl"
|
||||
with open(ensemble_path, "wb") as f:
|
||||
pickle.dump(ensemble, f)
|
||||
print(f"\n ✅ ensemble_top3.pkl saved ({ensemble_path.stat().st_size//1024} KB)")
|
||||
|
||||
# Save individual models
|
||||
for name, model in [("xgboost_optimized", xgb_model), ("lightgbm", lgb_model)]:
|
||||
path = MODELS_DIR / f"{name}_top3.pkl"
|
||||
with open(path, "wb") as f:
|
||||
pickle.dump({"model": model, "feature_cols": feat_cols}, f)
|
||||
print(f" ✅ {name}_top3.pkl saved")
|
||||
|
||||
mlp_path = MODELS_DIR / "mlp_top3.pkl"
|
||||
with open(mlp_path, "wb") as f:
|
||||
pickle.dump({"pipeline": mlp_model, "feature_cols": feat_cols}, f)
|
||||
print(f" ✅ mlp_top3.pkl saved")
|
||||
|
||||
# Benchmark report
|
||||
report = {
|
||||
"run_date": datetime.now().isoformat(),
|
||||
"dataset": {
|
||||
"db_path": DB_PATH,
|
||||
"total_rows": len(df),
|
||||
"train_rows": len(X_train),
|
||||
"holdout_rows": len(X_holdout),
|
||||
"train_date_range": [str(train_df["date_programme"].min()), str(train_df["date_programme"].max())],
|
||||
"holdout_date_range": [str(holdout_df["date_programme"].min()), str(holdout_df["date_programme"].max())],
|
||||
},
|
||||
"baseline": baseline,
|
||||
"individual_models": {k: v for k, v in results.items() if k != "ensemble"},
|
||||
"ensemble": ens_res,
|
||||
"delta_precision_at3": round(delta, 4),
|
||||
"deploy": deploy,
|
||||
"optuna": {
|
||||
"n_trials": 100,
|
||||
"xgboost_best_params": XGB_BEST,
|
||||
"lightgbm_best_params": LGB_BEST,
|
||||
},
|
||||
"features": {
|
||||
"total": len(feat_cols),
|
||||
"selected_by_shap": len(selected_features),
|
||||
"feature_list": feat_cols,
|
||||
"shap_selected": selected_features,
|
||||
},
|
||||
"ensemble_weights": weights,
|
||||
}
|
||||
|
||||
report_path = MODELS_DIR / "benchmark_report.json"
|
||||
with open(report_path, "w") as f:
|
||||
json.dump(report, f, indent=2)
|
||||
print(f" ✅ benchmark_report.json saved")
|
||||
|
||||
md_path = MODELS_DIR / "benchmark_report.md"
|
||||
_write_markdown_report(report, md_path)
|
||||
print(f" ✅ benchmark_report.md saved")
|
||||
|
||||
print("\n" + "=" * 65)
|
||||
print("DONE")
|
||||
print(f" Baseline P@3: {baseline['precision_at3']:.4f}")
|
||||
print(f" Ensemble P@3: {ens_res['precision_at3']:.4f}")
|
||||
print(f" Delta: {delta:+.4f} ({delta*100:+.1f}%)")
|
||||
print(f" Deploy: {'✅ YES' if deploy else '❌ NO'}")
|
||||
print("=" * 65)
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
448
tests/beta_monitor.py
Normal file
448
tests/beta_monitor.py
Normal file
@@ -0,0 +1,448 @@
|
||||
"""
|
||||
Beta Monitoring — SaaS Turf Prédictions IA
|
||||
Sprint 8 — QA, Beta Fermee, Go/No-Go
|
||||
Ticket: HRT-34
|
||||
|
||||
Ce module :
|
||||
- Collecte les feedbacks beta via l'API in-app
|
||||
- Envoie des alertes Telegram en cas d'erreur détectée pendant la beta
|
||||
- Génère le rapport beta final (bugs, UX, NPS)
|
||||
|
||||
Usage :
|
||||
# Démarrer le monitoring beta
|
||||
python tests/beta_monitor.py --watch --interval 60
|
||||
|
||||
# Générer le rapport beta final
|
||||
python tests/beta_monitor.py --report
|
||||
|
||||
# Test d'envoi Telegram
|
||||
python tests/beta_monitor.py --test-telegram
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import sqlite3
|
||||
import requests
|
||||
import argparse
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
# ============================================================
|
||||
# Configuration
|
||||
# ============================================================
|
||||
|
||||
BASE_URL = os.environ.get("APP_URL", "http://localhost:8792")
|
||||
TELEGRAM_TOKEN = os.environ.get(
|
||||
"TELEGRAM_TOKEN", "8649773134:AAFqzZVtSHfPPFDadcte1B-1h23nZ8DmdYE"
|
||||
)
|
||||
TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_CHAT_ID", "") # À configurer
|
||||
|
||||
BETA_DB_PATH = os.environ.get("BETA_DB_PATH", "/home/h3r7/turf_saas/turf_saas.db")
|
||||
REPORTS_DIR = Path("tests/reports")
|
||||
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Seuils d'alerte
|
||||
ERROR_RATE_THRESHOLD = 0.01 # 1% d'erreurs → alerte
|
||||
LATENCY_P95_THRESHOLD_MS = 500 # p95 > 500ms → alerte
|
||||
BETA_MIN_USERS = 10 # Minimum d'utilisateurs beta requis
|
||||
NPS_TARGET = 7.0 # NPS cible (sur 10)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Alertes Telegram
|
||||
# ============================================================
|
||||
|
||||
|
||||
def send_telegram(message: str, parse_mode: str = "Markdown") -> bool:
|
||||
"""Envoie un message Telegram d'alerte."""
|
||||
if not TELEGRAM_TOKEN or not TELEGRAM_CHAT_ID:
|
||||
print(f"⚠️ Telegram non configuré. Message: {message[:100]}")
|
||||
return False
|
||||
|
||||
try:
|
||||
resp = requests.post(
|
||||
f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage",
|
||||
json={
|
||||
"chat_id": TELEGRAM_CHAT_ID,
|
||||
"text": message,
|
||||
"parse_mode": parse_mode,
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
print(f"✅ Alerte Telegram envoyée")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ Telegram erreur: {resp.status_code} — {resp.text}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ Telegram exception: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def alert_error(endpoint: str, status_code: int, message: str):
|
||||
"""Alerte Telegram sur erreur critique."""
|
||||
text = (
|
||||
f"🚨 *ALERTE BETA — SaaS Turf IA*\n\n"
|
||||
f"Erreur détectée sur `{endpoint}`\n"
|
||||
f"Status: `{status_code}`\n"
|
||||
f"Message: {message[:200]}\n"
|
||||
f"Heure: {datetime.now().strftime('%H:%M:%S')}\n\n"
|
||||
f"_Ticket: HRT-34_"
|
||||
)
|
||||
send_telegram(text)
|
||||
|
||||
|
||||
def alert_performance(p95_ms: float, error_rate: float):
|
||||
"""Alerte Telegram sur dégradation de performance."""
|
||||
text = (
|
||||
f"⚠️ *ALERTE PERFORMANCE — SaaS Turf IA*\n\n"
|
||||
f"p95 latence: `{p95_ms:.0f}ms` (seuil: {LATENCY_P95_THRESHOLD_MS}ms)\n"
|
||||
f"Error rate: `{error_rate * 100:.2f}%` (seuil: {ERROR_RATE_THRESHOLD * 100:.1f}%)\n"
|
||||
f"Heure: {datetime.now().strftime('%H:%M:%S')}\n\n"
|
||||
f"_Ticket: HRT-34_"
|
||||
)
|
||||
send_telegram(text)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Collecte de métriques
|
||||
# ============================================================
|
||||
|
||||
|
||||
class BetaMonitor:
|
||||
"""Moniteur actif pendant la beta fermée."""
|
||||
|
||||
ENDPOINTS_TO_CHECK = [
|
||||
"/api",
|
||||
"/api/races",
|
||||
"/api/scoring",
|
||||
"/dashboard",
|
||||
"/",
|
||||
]
|
||||
|
||||
def __init__(self, base_url: str = BASE_URL):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.errors: list[dict] = []
|
||||
self.latencies: list[float] = []
|
||||
self.check_count = 0
|
||||
|
||||
def check_endpoint(self, path: str) -> dict:
|
||||
"""Vérifie un endpoint et retourne le résultat."""
|
||||
start = time.time()
|
||||
try:
|
||||
resp = requests.get(f"{self.base_url}{path}", timeout=10)
|
||||
latency_ms = (time.time() - start) * 1000
|
||||
return {
|
||||
"path": path,
|
||||
"status": resp.status_code,
|
||||
"latency_ms": latency_ms,
|
||||
"ok": resp.status_code < 500,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
return {
|
||||
"path": path,
|
||||
"status": 0,
|
||||
"latency_ms": 0,
|
||||
"ok": False,
|
||||
"error": str(e),
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"path": path,
|
||||
"status": 0,
|
||||
"latency_ms": 0,
|
||||
"ok": False,
|
||||
"error": str(e),
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
def run_checks(self) -> dict:
|
||||
"""Exécute tous les checks et retourne un résumé."""
|
||||
results = [self.check_endpoint(p) for p in self.ENDPOINTS_TO_CHECK]
|
||||
self.check_count += 1
|
||||
|
||||
failures = [r for r in results if not r["ok"]]
|
||||
latencies = [r["latency_ms"] for r in results if r["latency_ms"] > 0]
|
||||
|
||||
p95 = (
|
||||
sorted(latencies)[int(len(latencies) * 0.95)]
|
||||
if len(latencies) >= 2
|
||||
else (latencies[0] if latencies else 0)
|
||||
)
|
||||
error_rate = len(failures) / len(results) if results else 0
|
||||
|
||||
# Stocker pour rapport
|
||||
self.latencies.extend(latencies)
|
||||
self.errors.extend(failures)
|
||||
|
||||
return {
|
||||
"check_number": self.check_count,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"total_checks": len(results),
|
||||
"failures": len(failures),
|
||||
"error_rate": error_rate,
|
||||
"p95_ms": p95,
|
||||
"results": results,
|
||||
}
|
||||
|
||||
def watch(self, interval_seconds: int = 60):
|
||||
"""Surveillance continue avec alertes Telegram."""
|
||||
print(f"🔍 Beta monitoring démarré — {self.base_url}")
|
||||
print(f" Intervalle: {interval_seconds}s")
|
||||
print(f" Endpoints: {len(self.ENDPOINTS_TO_CHECK)}")
|
||||
print(f" Ctrl+C pour arrêter\n")
|
||||
|
||||
consecutive_errors = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
summary = self.run_checks()
|
||||
timestamp = datetime.now().strftime("%H:%M:%S")
|
||||
|
||||
status_icon = "✅" if summary["error_rate"] == 0 else "❌"
|
||||
print(
|
||||
f"[{timestamp}] {status_icon} "
|
||||
f"Check #{summary['check_number']} — "
|
||||
f"p95={summary['p95_ms']:.0f}ms, "
|
||||
f"errors={summary['failures']}/{summary['total_checks']}"
|
||||
)
|
||||
|
||||
# Alertes
|
||||
if summary["error_rate"] > ERROR_RATE_THRESHOLD:
|
||||
consecutive_errors += 1
|
||||
if consecutive_errors >= 2: # 2 checks consécutifs en erreur
|
||||
for failure in summary["results"]:
|
||||
if not failure["ok"]:
|
||||
alert_error(
|
||||
failure["path"],
|
||||
failure.get("status", 0),
|
||||
failure.get("error", "Non-2xx response"),
|
||||
)
|
||||
else:
|
||||
consecutive_errors = 0
|
||||
|
||||
if summary["p95_ms"] > LATENCY_P95_THRESHOLD_MS:
|
||||
print(f"⚠️ Latence p95 élevée: {summary['p95_ms']:.0f}ms")
|
||||
if summary["p95_ms"] > LATENCY_P95_THRESHOLD_MS * 2:
|
||||
alert_performance(summary["p95_ms"], summary["error_rate"])
|
||||
|
||||
# Sauvegarder les résultats
|
||||
log_file = REPORTS_DIR / "beta_monitor_log.jsonl"
|
||||
with open(log_file, "a") as f:
|
||||
f.write(json.dumps(summary) + "\n")
|
||||
|
||||
time.sleep(interval_seconds)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n⏹️ Monitoring arrêté après {self.check_count} checks")
|
||||
self.generate_report()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Rapport beta final
|
||||
# ============================================================
|
||||
|
||||
|
||||
class BetaReport:
|
||||
"""Générateur de rapport beta fermée."""
|
||||
|
||||
def __init__(self, base_url: str = BASE_URL):
|
||||
self.base_url = base_url
|
||||
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
def collect_feedback_from_db(self) -> list[dict]:
|
||||
"""Collecte les feedbacks depuis la BDD (table beta_feedback si elle existe)."""
|
||||
try:
|
||||
conn = sqlite3.connect(BETA_DB_PATH)
|
||||
c = conn.cursor()
|
||||
c.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='beta_feedback'"
|
||||
)
|
||||
if not c.fetchone():
|
||||
conn.close()
|
||||
return []
|
||||
c.execute("SELECT * FROM beta_feedback ORDER BY created_at DESC")
|
||||
rows = c.fetchall()
|
||||
conn.close()
|
||||
return [dict(zip([col[0] for col in c.description], row)) for row in rows]
|
||||
except Exception as e:
|
||||
print(f"⚠️ Impossible de lire beta_feedback: {e}")
|
||||
return []
|
||||
|
||||
def collect_monitor_logs(self) -> list[dict]:
|
||||
"""Lit les logs du monitoring beta."""
|
||||
log_file = REPORTS_DIR / "beta_monitor_log.jsonl"
|
||||
if not log_file.exists():
|
||||
return []
|
||||
entries = []
|
||||
with open(log_file) as f:
|
||||
for line in f:
|
||||
try:
|
||||
entries.append(json.loads(line))
|
||||
except Exception:
|
||||
pass
|
||||
return entries
|
||||
|
||||
def generate(self) -> str:
|
||||
"""Génère le rapport complet et le sauvegarde."""
|
||||
feedbacks = self.collect_feedback_from_db()
|
||||
monitor_logs = self.collect_monitor_logs()
|
||||
|
||||
# Calculer NPS depuis les feedbacks
|
||||
nps_scores = [
|
||||
f.get("nps_score") for f in feedbacks if f.get("nps_score") is not None
|
||||
]
|
||||
avg_nps = sum(nps_scores) / len(nps_scores) if nps_scores else None
|
||||
|
||||
# Statistiques monitoring
|
||||
if monitor_logs:
|
||||
all_latencies = []
|
||||
total_errors = 0
|
||||
total_checks = 0
|
||||
for entry in monitor_logs:
|
||||
all_latencies.extend(
|
||||
[
|
||||
r["latency_ms"]
|
||||
for r in entry.get("results", [])
|
||||
if r.get("latency_ms", 0) > 0
|
||||
]
|
||||
)
|
||||
total_errors += entry.get("failures", 0)
|
||||
total_checks += entry.get("total_checks", 0)
|
||||
avg_latency = (
|
||||
sum(all_latencies) / len(all_latencies) if all_latencies else 0
|
||||
)
|
||||
overall_error_rate = total_errors / total_checks if total_checks > 0 else 0
|
||||
else:
|
||||
avg_latency = 0
|
||||
overall_error_rate = 0
|
||||
total_checks = 0
|
||||
|
||||
# Construire le rapport
|
||||
report = []
|
||||
report.append("=" * 60)
|
||||
report.append("RAPPORT BETA FERMÉE — SaaS Turf Prédictions IA")
|
||||
report.append(f"Généré le : {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
report.append(f"Ticket : HRT-34")
|
||||
report.append("=" * 60)
|
||||
report.append("")
|
||||
report.append("## 1. PARTICIPANTS BETA")
|
||||
report.append(f" Feedbacks reçus : {len(feedbacks)}")
|
||||
report.append(
|
||||
f" NPS moyen : {avg_nps:.1f}/10"
|
||||
if avg_nps
|
||||
else " NPS moyen : (en attente feedbacks)"
|
||||
)
|
||||
report.append(f" Cible NPS : ≥ {NPS_TARGET}/10")
|
||||
nps_ok = avg_nps is not None and avg_nps >= NPS_TARGET
|
||||
report.append(
|
||||
f" Statut NPS : {'✅ OBJECTIF ATTEINT' if nps_ok else '⏳ En attente' if avg_nps is None else '❌ OBJECTIF NON ATTEINT'}"
|
||||
)
|
||||
report.append("")
|
||||
report.append("## 2. BUGS SIGNALÉS")
|
||||
bugs = [f for f in feedbacks if f.get("type") == "bug"]
|
||||
critical_bugs = [b for b in bugs if b.get("severity") in ("critical", "high")]
|
||||
report.append(f" Total bugs : {len(bugs)}")
|
||||
report.append(f" Critiques/High : {len(critical_bugs)}")
|
||||
report.append(
|
||||
f" Statut : {'✅ 0 bug critique' if len(critical_bugs) == 0 else f'❌ {len(critical_bugs)} bug(s) critique(s)'}"
|
||||
)
|
||||
report.append("")
|
||||
report.append("## 3. PERFORMANCE RÉELLE (monitoring)")
|
||||
report.append(f" Checks effectués: {total_checks}")
|
||||
report.append(f" Latence moyenne : {avg_latency:.1f}ms")
|
||||
report.append(f" Error rate : {overall_error_rate * 100:.2f}%")
|
||||
report.append(f" Seuil latence : {LATENCY_P95_THRESHOLD_MS}ms")
|
||||
perf_ok = (
|
||||
avg_latency < LATENCY_P95_THRESHOLD_MS
|
||||
and overall_error_rate < ERROR_RATE_THRESHOLD
|
||||
)
|
||||
report.append(
|
||||
f" Statut : {'✅ OBJECTIF ATTEINT' if perf_ok else '⏳ Données insuffisantes' if total_checks == 0 else '❌ OBJECTIF NON ATTEINT'}"
|
||||
)
|
||||
report.append("")
|
||||
report.append("## 4. FEEDBACKS UX")
|
||||
ux_feedbacks = [f for f in feedbacks if f.get("type") == "ux"]
|
||||
report.append(f" Retours UX : {len(ux_feedbacks)}")
|
||||
if ux_feedbacks:
|
||||
for fb in ux_feedbacks[:5]: # Top 5
|
||||
report.append(f" - {fb.get('comment', '')[:100]}")
|
||||
report.append("")
|
||||
report.append("## 5. VERDICT BETA FERMÉE")
|
||||
users_ok = len(feedbacks) >= 5 # Au moins 5 feedbacks = 5 users satisfaits
|
||||
verdict = all([users_ok, nps_ok, len(critical_bugs) == 0])
|
||||
report.append(
|
||||
f" Participants suffisants (≥5) : {'✅' if users_ok else '❌'}"
|
||||
)
|
||||
report.append(f" NPS ≥ 7/10 : {'✅' if nps_ok else '❌'}")
|
||||
report.append(
|
||||
f" 0 bug critique : {'✅' if len(critical_bugs) == 0 else '❌'}"
|
||||
)
|
||||
report.append("")
|
||||
report.append(
|
||||
f" VERDICT GLOBAL : {'✅ GO — Beta réussie' if verdict else '❌ NO-GO — Conditions non remplies'}"
|
||||
)
|
||||
report.append("=" * 60)
|
||||
|
||||
report_text = "\n".join(report)
|
||||
|
||||
# Sauvegarder
|
||||
report_file = REPORTS_DIR / f"beta_report_{self.timestamp}.txt"
|
||||
with open(report_file, "w") as f:
|
||||
f.write(report_text)
|
||||
|
||||
print(report_text)
|
||||
print(f"\nRapport sauvegardé : {report_file}")
|
||||
|
||||
return report_text
|
||||
|
||||
|
||||
# ============================================================
|
||||
# CLI
|
||||
# ============================================================
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Beta Monitor — SaaS Turf IA")
|
||||
parser.add_argument("--watch", action="store_true", help="Surveillance continue")
|
||||
parser.add_argument(
|
||||
"--interval", type=int, default=60, help="Intervalle en secondes (défaut: 60)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--report", action="store_true", help="Générer le rapport beta final"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test-telegram", action="store_true", help="Tester l'envoi Telegram"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--url", default=BASE_URL, help=f"URL de l'app (défaut: {BASE_URL})"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.test_telegram:
|
||||
print("Test d'envoi Telegram...")
|
||||
ok = send_telegram(
|
||||
"✅ *Test alerte Beta* — SaaS Turf IA\n_Ceci est un test du système d'alertes QA_\nTicket: HRT-34"
|
||||
)
|
||||
sys.exit(0 if ok else 1)
|
||||
|
||||
if args.report:
|
||||
reporter = BetaReport(args.url)
|
||||
reporter.generate()
|
||||
sys.exit(0)
|
||||
|
||||
if args.watch:
|
||||
monitor = BetaMonitor(args.url)
|
||||
monitor.watch(interval_seconds=args.interval)
|
||||
sys.exit(0)
|
||||
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
124
tests/conftest.py
Normal file
124
tests/conftest.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
conftest.py — Configuration pytest globale
|
||||
SaaS Turf Prédictions IA — Sprint 8 QA
|
||||
Ticket: HRT-34
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
# ============================================================
|
||||
# Répertoires de sortie
|
||||
# ============================================================
|
||||
|
||||
REPORTS_DIR = Path("tests/reports")
|
||||
SCREENSHOTS_DIR = Path("tests/screenshots")
|
||||
|
||||
for d in [REPORTS_DIR, SCREENSHOTS_DIR]:
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Variables d'environnement
|
||||
# ============================================================
|
||||
|
||||
BASE_URL = os.environ.get("APP_URL", "http://localhost:8792")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Fixtures globales
|
||||
# ============================================================
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def base_url():
|
||||
return BASE_URL
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def event_loop():
|
||||
"""Event loop partagé pour les tests async de la session."""
|
||||
policy = asyncio.get_event_loop_policy()
|
||||
loop = policy.new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def reports_dir():
|
||||
return REPORTS_DIR
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def screenshots_dir():
|
||||
return SCREENSHOTS_DIR
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Hook : screenshot automatique sur échec
|
||||
# ============================================================
|
||||
|
||||
|
||||
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
|
||||
def pytest_runtest_makereport(item, call):
|
||||
"""Capture screenshot automatiquement sur tout test E2E en échec."""
|
||||
outcome = yield
|
||||
report = outcome.get_result()
|
||||
|
||||
if report.when == "call" and report.failed:
|
||||
# Récupérer la page Playwright si disponible dans les fixtures
|
||||
page = None
|
||||
for fixture_name in ("page", "context_page"):
|
||||
if fixture_name in item.funcargs:
|
||||
val = item.funcargs[fixture_name]
|
||||
if isinstance(val, tuple):
|
||||
page = val[0] # (page, browser_name)
|
||||
else:
|
||||
page = val
|
||||
break
|
||||
|
||||
if page is not None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
test_name = item.name.replace("/", "_").replace(":", "_")
|
||||
screenshot_path = SCREENSHOTS_DIR / f"FAIL_{test_name}_{timestamp}.png"
|
||||
try:
|
||||
# Playwright page.screenshot est synchrone dans les fixtures sync
|
||||
# Pour les fixtures async, on force la capture
|
||||
import asyncio as _asyncio
|
||||
|
||||
if _asyncio.iscoroutinefunction(page.screenshot):
|
||||
loop = _asyncio.get_event_loop()
|
||||
loop.run_until_complete(page.screenshot(path=str(screenshot_path)))
|
||||
else:
|
||||
page.screenshot(path=str(screenshot_path))
|
||||
report.sections.append(
|
||||
("Screenshot", f"Sauvegardé : {screenshot_path}")
|
||||
)
|
||||
except Exception as e:
|
||||
report.sections.append(
|
||||
("Screenshot Error", f"Impossible de capturer : {e}")
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Marqueurs personnalisés
|
||||
# ============================================================
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line("markers", "e2e: Tests End-to-End Playwright")
|
||||
config.addinivalue_line("markers", "load: Tests de charge Locust")
|
||||
config.addinivalue_line("markers", "security: Tests de sécurité")
|
||||
config.addinivalue_line(
|
||||
"markers", "smoke: Tests rapides de smoke (sans infra complète)"
|
||||
)
|
||||
config.addinivalue_line("markers", "beta: Tests spécifiques beta fermée")
|
||||
config.addinivalue_line(
|
||||
"markers", "requires_billing: Nécessite HRT-31 (Billing Stripe)"
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "requires_infra: Nécessite HRT-33 (infra staging)"
|
||||
)
|
||||
333
tests/test_ml_ensemble.py
Normal file
333
tests/test_ml_ensemble.py
Normal file
@@ -0,0 +1,333 @@
|
||||
"""
|
||||
Tests ML Ensemble — HRT-32 Sprint 6-7
|
||||
Tests de régression, benchmark et latence pour le nouveau modèle ensemble.
|
||||
|
||||
Usage:
|
||||
pytest tests/test_ml_ensemble.py -v
|
||||
pytest tests/test_ml_ensemble.py -v -m regression
|
||||
pytest tests/test_ml_ensemble.py -v -m latency
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import pickle
|
||||
import sqlite3
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
BASE_URL = os.environ.get("APP_URL", "http://localhost:8790")
|
||||
DB_PATH = os.environ.get("DB_PATH", "/home/h3r7/turf_saas/turf.db")
|
||||
MODELS_DIR = Path("/home/h3r7/turf_saas/models")
|
||||
ENSEMBLE_PATH = MODELS_DIR / "ensemble_top3.pkl"
|
||||
BENCHMARK_PATH = MODELS_DIR / "benchmark_report.json"
|
||||
|
||||
|
||||
# ─── Fixtures ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def ensemble_model():
|
||||
"""Load ensemble model (skip tests if not yet trained)."""
|
||||
if not ENSEMBLE_PATH.exists():
|
||||
pytest.skip(
|
||||
f"Ensemble model not found at {ENSEMBLE_PATH}. Run train_ensemble.py first."
|
||||
)
|
||||
with open(ENSEMBLE_PATH, "rb") as f:
|
||||
return pickle.load(f)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def benchmark_report():
|
||||
"""Load benchmark report (skip if not generated)."""
|
||||
if not BENCHMARK_PATH.exists():
|
||||
pytest.skip(f"Benchmark report not found at {BENCHMARK_PATH}.")
|
||||
with open(BENCHMARK_PATH) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def holdout_data():
|
||||
"""Load holdout slice (last 20% temporal) for regression tests."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
df = pd.read_sql_query(
|
||||
"""
|
||||
SELECT p.*, c.distance, c.discipline, c.specialite,
|
||||
c.nb_declares_partants, c.montant_prix, c.penetrometre_intitule
|
||||
FROM pmu_partants p
|
||||
LEFT JOIN pmu_courses c ON p.date_programme=c.date_programme
|
||||
AND p.num_reunion=c.num_reunion AND p.num_course=c.num_course
|
||||
WHERE p.ordre_arrivee > 0
|
||||
ORDER BY p.date_programme, p.num_reunion, p.num_course, p.num_pmu
|
||||
""",
|
||||
conn,
|
||||
)
|
||||
conn.close()
|
||||
n = len(df)
|
||||
cutoff = int(n * 0.80)
|
||||
return df.iloc[cutoff:].copy()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def predict_v2():
|
||||
"""Import predict_v2 module."""
|
||||
import importlib.util
|
||||
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"predict_v2", "/home/h3r7/turf_saas/predict_v2.py"
|
||||
)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
return mod
|
||||
|
||||
|
||||
# ─── Model Existence Tests ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestModelFiles:
|
||||
"""Verify all expected model files exist."""
|
||||
|
||||
def test_ensemble_model_exists(self):
|
||||
assert ENSEMBLE_PATH.exists(), f"Ensemble model missing: {ENSEMBLE_PATH}"
|
||||
|
||||
def test_benchmark_report_exists(self):
|
||||
assert BENCHMARK_PATH.exists(), f"Benchmark report missing: {BENCHMARK_PATH}"
|
||||
|
||||
def test_models_dir_contains_expected_files(self):
|
||||
expected = ["ensemble_top3.pkl", "benchmark_report.json", "benchmark_report.md"]
|
||||
for fname in expected:
|
||||
assert (MODELS_DIR / fname).exists(), f"Missing: {MODELS_DIR / fname}"
|
||||
|
||||
|
||||
# ─── Benchmark Tests ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestBenchmark:
|
||||
"""Validate benchmark metrics from the training report."""
|
||||
|
||||
@pytest.mark.regression
|
||||
def test_ensemble_beats_baseline_or_meets_threshold(self, benchmark_report):
|
||||
"""Ensemble Precision@3 must be >= baseline XGBoost."""
|
||||
baseline = benchmark_report["baseline"]["precision_at3"]
|
||||
ensemble = benchmark_report["ensemble"]["precision_at3"]
|
||||
assert ensemble >= baseline, (
|
||||
f"Ensemble Precision@3 {ensemble:.4f} < baseline {baseline:.4f}"
|
||||
)
|
||||
|
||||
@pytest.mark.regression
|
||||
def test_ensemble_auc_above_random(self, benchmark_report):
|
||||
"""Ensemble AUC must be > 0.60 (significantly above random 0.50)."""
|
||||
auc = benchmark_report["ensemble"]["auc"]
|
||||
assert auc > 0.60, f"Ensemble AUC {auc:.4f} <= 0.60"
|
||||
|
||||
@pytest.mark.regression
|
||||
def test_optuna_ran_minimum_trials(self, benchmark_report):
|
||||
"""Optuna must have run at least 100 trials per model."""
|
||||
n_trials = benchmark_report["optuna"]["n_trials"]
|
||||
assert n_trials >= 100, f"Only {n_trials} Optuna trials (minimum 100 required)"
|
||||
|
||||
@pytest.mark.regression
|
||||
def test_no_precision_regression(self, benchmark_report):
|
||||
"""Ensemble Precision@3 must not be below naive random baseline (~30%)."""
|
||||
ensemble_p3 = benchmark_report["ensemble"]["precision_at3"]
|
||||
assert ensemble_p3 >= 0.30, (
|
||||
f"Precision@3 {ensemble_p3:.4f} is below random baseline (~0.30)"
|
||||
)
|
||||
|
||||
def test_benchmark_has_all_required_models(self, benchmark_report):
|
||||
"""Benchmark must include results for all 3 models."""
|
||||
required = {"xgboost", "lightgbm", "mlp"}
|
||||
found = set(benchmark_report.get("individual_models", {}).keys())
|
||||
missing = required - found
|
||||
assert not missing, f"Missing model benchmarks: {missing}"
|
||||
|
||||
|
||||
# ─── Regression Tests ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestPrecisionRegression:
|
||||
"""Holdout regression: ensure precision doesn't degrade."""
|
||||
|
||||
@pytest.mark.regression
|
||||
def test_precision_at3_on_holdout(self, ensemble_model, holdout_data):
|
||||
"""Precision@3 on holdout must be above naive baseline."""
|
||||
from predict_v2 import build_feature_df, FEATURE_COLS
|
||||
|
||||
df = holdout_data.copy()
|
||||
df["top3"] = (df["ordre_arrivee"] <= 3).astype(int)
|
||||
|
||||
partants = df.to_dict("records")
|
||||
feature_df = build_feature_df(partants)
|
||||
available = [c for c in FEATURE_COLS if c in feature_df.columns]
|
||||
X = feature_df[available].fillna(0)
|
||||
|
||||
proba = ensemble_model.predict_proba(X)[:, 1]
|
||||
|
||||
# Per-race Precision@3
|
||||
tmp = df[["date_programme", "num_reunion", "num_course"]].copy()
|
||||
tmp["proba"] = proba
|
||||
tmp["actual"] = df["top3"].values
|
||||
|
||||
precisions = []
|
||||
for _, group in tmp.groupby(["date_programme", "num_reunion", "num_course"]):
|
||||
if len(group) >= 3:
|
||||
top3_pred = group.nlargest(3, "proba")
|
||||
precisions.append(top3_pred["actual"].sum() / 3.0)
|
||||
|
||||
p_at3 = float(np.mean(precisions)) if precisions else 0.0
|
||||
print(f"\n Holdout Precision@3: {p_at3:.4f} over {len(precisions)} races")
|
||||
|
||||
# Must beat random baseline (30%)
|
||||
assert p_at3 >= 0.30, f"Holdout Precision@3 {p_at3:.4f} < 0.30"
|
||||
|
||||
@pytest.mark.regression
|
||||
def test_no_all_zero_predictions(self, ensemble_model, holdout_data):
|
||||
"""Ensemble must not predict 0 probability for all horses."""
|
||||
from predict_v2 import build_feature_df, FEATURE_COLS
|
||||
|
||||
partants = holdout_data.head(50).to_dict("records")
|
||||
feature_df = build_feature_df(partants)
|
||||
available = [c for c in FEATURE_COLS if c in feature_df.columns]
|
||||
X = feature_df[available].fillna(0)
|
||||
|
||||
proba = ensemble_model.predict_proba(X)[:, 1]
|
||||
assert proba.max() > 0.01, "All predictions are near 0 — model appears broken"
|
||||
assert proba.std() > 0.01, (
|
||||
"All predictions have identical probability — no discrimination"
|
||||
)
|
||||
|
||||
|
||||
# ─── Latency Tests ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestPredictionLatency:
|
||||
"""Prediction latency must be < 200ms per race."""
|
||||
|
||||
@pytest.mark.latency
|
||||
def test_single_race_latency(self, ensemble_model, holdout_data):
|
||||
"""Prediction for a single race (<=20 horses) must be < 200ms."""
|
||||
from predict_v2 import build_feature_df, FEATURE_COLS
|
||||
|
||||
# Take one race
|
||||
first_race = (
|
||||
holdout_data.groupby(["date_programme", "num_reunion", "num_course"])
|
||||
.first()
|
||||
.reset_index()
|
||||
.iloc[0]
|
||||
)
|
||||
mask = (
|
||||
(holdout_data["date_programme"] == first_race["date_programme"])
|
||||
& (holdout_data["num_reunion"] == first_race["num_reunion"])
|
||||
& (holdout_data["num_course"] == first_race["num_course"])
|
||||
)
|
||||
race_df = holdout_data[mask]
|
||||
partants = race_df.to_dict("records")
|
||||
|
||||
# Warm-up
|
||||
feature_df = build_feature_df(partants)
|
||||
available = [c for c in FEATURE_COLS if c in feature_df.columns]
|
||||
X = feature_df[available].fillna(0)
|
||||
ensemble_model.predict_proba(X)
|
||||
|
||||
# Timed run
|
||||
t0 = time.perf_counter()
|
||||
for _ in range(10):
|
||||
ensemble_model.predict_proba(X)
|
||||
elapsed_ms = (time.perf_counter() - t0) / 10 * 1000
|
||||
|
||||
print(f"\n Single-race latency: {elapsed_ms:.2f} ms ({len(partants)} horses)")
|
||||
assert elapsed_ms < 200, (
|
||||
f"Prediction latency {elapsed_ms:.1f} ms exceeds 200 ms limit"
|
||||
)
|
||||
|
||||
@pytest.mark.latency
|
||||
def test_full_day_latency(self, ensemble_model, holdout_data):
|
||||
"""Prediction for a full day (all races) must complete < 5 seconds."""
|
||||
from predict_v2 import build_feature_df, FEATURE_COLS
|
||||
|
||||
# Take one day
|
||||
day = holdout_data["date_programme"].iloc[0]
|
||||
day_df = holdout_data[holdout_data["date_programme"] == day]
|
||||
partants = day_df.to_dict("records")
|
||||
|
||||
feature_df = build_feature_df(partants)
|
||||
available = [c for c in FEATURE_COLS if c in feature_df.columns]
|
||||
X = feature_df[available].fillna(0)
|
||||
|
||||
t0 = time.perf_counter()
|
||||
proba = ensemble_model.predict_proba(X)
|
||||
elapsed_ms = (time.perf_counter() - t0) * 1000
|
||||
|
||||
print(
|
||||
f"\n Full day latency: {elapsed_ms:.2f} ms ({len(partants)} horses, {day})"
|
||||
)
|
||||
assert elapsed_ms < 5000, (
|
||||
f"Full-day prediction {elapsed_ms:.0f} ms exceeds 5s limit"
|
||||
)
|
||||
|
||||
|
||||
# ─── API Endpoint Tests ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestV1PredictionsAPI:
|
||||
"""Tests for the new /api/v1/predictions endpoint."""
|
||||
|
||||
def _api_available(self):
|
||||
try:
|
||||
requests.get(f"{BASE_URL}/api/v1/model/status", timeout=3)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@pytest.mark.api
|
||||
def test_model_status_endpoint(self):
|
||||
"""GET /api/v1/model/status returns valid JSON."""
|
||||
if not self._api_available():
|
||||
pytest.skip("API server not running")
|
||||
resp = requests.get(f"{BASE_URL}/api/v1/model/status", timeout=10)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "ensemble_available" in data
|
||||
|
||||
@pytest.mark.api
|
||||
def test_v1_predictions_no_500(self):
|
||||
"""GET /api/v1/predictions must not return 5xx."""
|
||||
if not self._api_available():
|
||||
pytest.skip("API server not running")
|
||||
resp = requests.get(f"{BASE_URL}/api/v1/predictions", timeout=30)
|
||||
assert resp.status_code < 500, (
|
||||
f"Server error: {resp.status_code}\n{resp.text[:200]}"
|
||||
)
|
||||
|
||||
@pytest.mark.api
|
||||
def test_v1_predictions_returns_json(self):
|
||||
"""GET /api/v1/predictions returns valid JSON with expected keys."""
|
||||
if not self._api_available():
|
||||
pytest.skip("API server not running")
|
||||
resp = requests.get(f"{BASE_URL}/api/v1/predictions", timeout=30)
|
||||
if resp.status_code == 503:
|
||||
pytest.skip("Ensemble model not yet deployed")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "model_version" in data, "Missing model_version in response"
|
||||
assert "races" in data or "predictions" in data, (
|
||||
"Missing races/predictions in response"
|
||||
)
|
||||
|
||||
@pytest.mark.api
|
||||
def test_v1_predictions_latency(self):
|
||||
"""GET /api/v1/predictions must respond in < 3 seconds."""
|
||||
if not self._api_available():
|
||||
pytest.skip("API server not running")
|
||||
resp = requests.get(f"{BASE_URL}/api/v1/predictions", timeout=30)
|
||||
if resp.status_code == 503:
|
||||
pytest.skip("Ensemble model not yet deployed")
|
||||
# Check API-reported latency
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
latency = data.get("latency_ms", 0)
|
||||
assert latency < 3000, f"API latency {latency:.0f} ms > 3000 ms"
|
||||
205
tests/test_smoke.py
Normal file
205
tests/test_smoke.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""
|
||||
Tests de smoke — SaaS Turf Prédictions IA
|
||||
Sprint 8 — QA, Beta Fermee, Go/No-Go
|
||||
Ticket: HRT-34
|
||||
|
||||
Vérifications rapides sur l'état de l'application :
|
||||
- Routes de base accessibles
|
||||
- API répond en JSON valide
|
||||
- Base de données accessible
|
||||
- Pas d'erreurs 5xx sur les routes principales
|
||||
|
||||
Ces tests peuvent tourner SANS infra complète (pas besoin de HRT-31/33).
|
||||
Exécuter sur l'app actuelle en staging ou localhost.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
import os
|
||||
import json
|
||||
|
||||
BASE_URL = os.environ.get("APP_URL", "http://localhost:8792")
|
||||
|
||||
# Routes qui doivent retourner 200 (publiques)
|
||||
PUBLIC_ROUTES_200 = [
|
||||
"/",
|
||||
"/dashboard",
|
||||
]
|
||||
|
||||
# Routes API qui doivent retourner 200 ou 401 (jamais 500)
|
||||
API_ROUTES_NO_500 = [
|
||||
"/api",
|
||||
"/api/races",
|
||||
"/api/scoring",
|
||||
"/api/weather",
|
||||
"/api/odds_history",
|
||||
]
|
||||
|
||||
|
||||
class TestSmoke:
|
||||
"""Tests de smoke : l'app répond correctement aux requêtes de base."""
|
||||
|
||||
@pytest.mark.smoke
|
||||
@pytest.mark.parametrize("route", PUBLIC_ROUTES_200)
|
||||
def test_route_publique_accessible(self, route):
|
||||
"""Les routes publiques doivent retourner 200."""
|
||||
try:
|
||||
resp = requests.get(f"{BASE_URL}{route}", timeout=10)
|
||||
assert resp.status_code in (200, 304), (
|
||||
f"Route publique inaccessible: {route} → {resp.status_code}"
|
||||
)
|
||||
assert len(resp.content) > 0, f"Réponse vide sur {route}"
|
||||
except requests.exceptions.ConnectionError:
|
||||
pytest.skip(
|
||||
f"App non accessible sur {BASE_URL} — vérifier que le serveur est démarré"
|
||||
)
|
||||
|
||||
@pytest.mark.smoke
|
||||
@pytest.mark.parametrize("route", API_ROUTES_NO_500)
|
||||
def test_api_pas_derreur_serveur(self, route):
|
||||
"""Les routes API ne doivent jamais retourner 5xx."""
|
||||
try:
|
||||
resp = requests.get(f"{BASE_URL}{route}", timeout=10)
|
||||
assert resp.status_code < 500, (
|
||||
f"Erreur serveur sur {route}: {resp.status_code}\n{resp.text[:200]}"
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
pytest.skip(f"App non accessible sur {BASE_URL}")
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_api_today_retourne_json(self):
|
||||
"""L'endpoint principal /api doit retourner du JSON valide."""
|
||||
try:
|
||||
resp = requests.get(f"{BASE_URL}/api", timeout=10)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
assert data is not None, "Réponse JSON nulle"
|
||||
assert isinstance(data, (list, dict)), (
|
||||
f"Type de réponse inattendu: {type(data)}"
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
pytest.skip(f"App non accessible sur {BASE_URL}")
|
||||
except json.JSONDecodeError as e:
|
||||
pytest.fail(f"/api ne retourne pas du JSON valide: {e}")
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_contenu_html_portail_valide(self):
|
||||
"""Le portail doit contenir un titre et du contenu significatif."""
|
||||
try:
|
||||
resp = requests.get(f"{BASE_URL}/", timeout=10)
|
||||
if resp.status_code == 200:
|
||||
content = resp.text
|
||||
assert "<html" in content.lower() or "<!doctype" in content.lower(), (
|
||||
"La page d'accueil ne retourne pas du HTML"
|
||||
)
|
||||
assert len(content) > 500, (
|
||||
f"Page d'accueil trop courte ({len(content)} chars)"
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
pytest.skip(f"App non accessible sur {BASE_URL}")
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_headers_securite_presents(self):
|
||||
"""Les headers de sécurité de base doivent être présents."""
|
||||
try:
|
||||
resp = requests.get(f"{BASE_URL}/", timeout=10)
|
||||
if resp.status_code != 200:
|
||||
return
|
||||
|
||||
# En production (derrière Nginx), ces headers doivent être présents
|
||||
# En dev direct Flask, ils peuvent être absents — on note seulement
|
||||
security_headers = {
|
||||
"X-Content-Type-Options": "nosniff",
|
||||
"X-Frame-Options": None, # SAMEORIGIN ou DENY
|
||||
"X-XSS-Protection": None,
|
||||
}
|
||||
|
||||
missing = []
|
||||
for header, expected_value in security_headers.items():
|
||||
if header not in resp.headers:
|
||||
missing.append(header)
|
||||
|
||||
if missing:
|
||||
# Warning seulement — bloquant uniquement en prod derrière Nginx
|
||||
pytest.warns(UserWarning, match=r".*") if False else None
|
||||
print(f"⚠️ Headers sécurité manquants (requis en prod): {missing}")
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
pytest.skip(f"App non accessible sur {BASE_URL}")
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_api_races_format_reponse(self):
|
||||
"""L'endpoint /api/races doit retourner une liste structurée."""
|
||||
try:
|
||||
resp = requests.get(f"{BASE_URL}/api/races", timeout=10)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
assert isinstance(data, (list, dict)), (
|
||||
f"Format inattendu pour /api/races: {type(data)}"
|
||||
)
|
||||
if isinstance(data, list) and len(data) > 0:
|
||||
first = data[0]
|
||||
# Vérifier la présence de champs clés
|
||||
expected_fields = ["date", "course", "hippodrome"]
|
||||
present = [
|
||||
f
|
||||
for f in expected_fields
|
||||
if f in first
|
||||
or any(k in first for k in [f, f.upper(), f.replace("_", "")])
|
||||
]
|
||||
assert len(present) > 0, (
|
||||
f"Champs attendus absents de /api/races. Champs présents: {list(first.keys())}"
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
pytest.skip(f"App non accessible sur {BASE_URL}")
|
||||
except json.JSONDecodeError:
|
||||
pytest.fail("/api/races ne retourne pas du JSON valide")
|
||||
|
||||
|
||||
class TestSmokeDatabase:
|
||||
"""Tests smoke sur la base de données."""
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_base_donnees_accessible(self):
|
||||
"""La base de données SQLite doit être accessible et contenir des données."""
|
||||
import sqlite3
|
||||
|
||||
db_path = "/home/h3r7/turf_saas/turf_saas.db"
|
||||
|
||||
if not __import__("os").path.exists(db_path):
|
||||
pytest.skip(f"Base de données non trouvée: {db_path}")
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
c = conn.cursor()
|
||||
|
||||
# Vérifier que les tables essentielles existent
|
||||
c.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = {row[0] for row in c.fetchall()}
|
||||
conn.close()
|
||||
|
||||
expected_tables = ["predictions", "results"]
|
||||
for table in expected_tables:
|
||||
assert table in tables, (
|
||||
f"Table manquante dans la BDD: {table}. Tables présentes: {tables}"
|
||||
)
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_donnees_predictions_disponibles(self):
|
||||
"""Des prédictions doivent être présentes dans la BDD."""
|
||||
import sqlite3
|
||||
|
||||
db_path = "/home/h3r7/turf_saas/turf_saas.db"
|
||||
|
||||
if not __import__("os").path.exists(db_path):
|
||||
pytest.skip(f"Base de données non trouvée: {db_path}")
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
c = conn.cursor()
|
||||
c.execute("SELECT COUNT(*) FROM predictions")
|
||||
count = c.fetchone()[0]
|
||||
conn.close()
|
||||
|
||||
# Au moins quelques données pour que le SaaS soit utile
|
||||
assert count >= 0, "Table predictions accessible"
|
||||
if count == 0:
|
||||
print("⚠️ Aucune prédiction en base — le scraper doit être lancé")
|
||||
1007
train_ensemble.py
Normal file
1007
train_ensemble.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user