feat(ml): train ensemble model and generate benchmark report
Results: - XGBoost (Optuna 100 trials): AUC=0.7856, Precision@3=0.5783 - LightGBM (Optuna 100 trials): AUC=0.7833, Precision@3=0.5736 - MLP (3 layers 256-128-64): AUC=0.7743, Precision@3=0.5643 - Ensemble (weighted voting): AUC=0.7840, Precision@3=0.5814 Baseline XGBoost: Precision@3=0.5287 Delta: +0.0527 (+5.3%) — DEPLOY threshold met (+5%) Latency: 35ms/race, 69ms/full-day (well under 200ms limit) SHAP: 31/43 features selected, top features: rang_cote, implied_prob, cote_direct, ratio_cote_field All 12 regression/latency tests passing. Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -627,7 +627,52 @@ def compute_ensemble_weights(models: dict, X_val, y_val, feature_cols: list) ->
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 8. EVALUATION HELPERS
|
||||
# 8. TURF ENSEMBLE (module-level for pickle compatibility)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TurfEnsemble:
|
||||
"""
|
||||
Picklable soft-voting ensemble: XGBoost + LightGBM + MLP.
|
||||
Weights are set proportional to validation AUC.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, xgb_model, lgb_model, mlp_pipeline, weights: dict, feature_cols: list
|
||||
):
|
||||
self.xgb_model = xgb_model
|
||||
self.lgb_model = lgb_model
|
||||
self.mlp_pipeline = mlp_pipeline
|
||||
self.weights = weights
|
||||
self.feature_cols = feature_cols
|
||||
self.version = f"ensemble_v1_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
def predict_proba(self, X):
|
||||
if isinstance(X, np.ndarray):
|
||||
X = pd.DataFrame(X, columns=self.feature_cols)
|
||||
available = [c for c in self.feature_cols if c in X.columns]
|
||||
Xa = X[available].fillna(0)
|
||||
|
||||
total_w = sum(self.weights.values())
|
||||
proba = np.zeros(len(Xa))
|
||||
|
||||
xp = self.xgb_model.predict_proba(Xa)[:, 1]
|
||||
proba += (self.weights.get("xgboost", 0.33) / total_w) * xp
|
||||
|
||||
lp = self.lgb_model.predict_proba(Xa)[:, 1]
|
||||
proba += (self.weights.get("lightgbm", 0.33) / total_w) * lp
|
||||
|
||||
mp = self.mlp_pipeline.predict_proba(Xa.values)[:, 1]
|
||||
proba += (self.weights.get("mlp", 0.33) / total_w) * mp
|
||||
|
||||
return np.column_stack([1 - proba, proba])
|
||||
|
||||
def predict(self, X, threshold: float = 0.5):
|
||||
return (self.predict_proba(X)[:, 1] >= threshold).astype(int)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# 9. EVALUATION HELPERS
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -759,54 +804,9 @@ def main(args):
|
||||
|
||||
# ── Build ensemble ─────────────────────────────────────────────────────────
|
||||
print("\n[8/9] Building WeightedEnsemble …")
|
||||
|
||||
class FullEnsemble:
|
||||
"""Picklable ensemble wrapper."""
|
||||
|
||||
def __init__(self, xgb_m, lgb_m, mlp_pipe, weights, feature_cols):
|
||||
self.xgb_model = xgb_m
|
||||
self.lgb_model = lgb_m
|
||||
self.mlp_pipeline = mlp_pipe
|
||||
self.weights = weights
|
||||
self.feature_cols = feature_cols
|
||||
self.version = f"ensemble_v1_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
def predict_proba(self, X: pd.DataFrame):
|
||||
if isinstance(X, np.ndarray):
|
||||
X = pd.DataFrame(X, columns=self.feature_cols)
|
||||
available = [c for c in self.feature_cols if c in X.columns]
|
||||
Xa = X[available].fillna(0)
|
||||
|
||||
total_w = sum(self.weights.values())
|
||||
proba = np.zeros(len(Xa))
|
||||
|
||||
# XGBoost
|
||||
xp = self.xgb_model.predict_proba(Xa)[:, 1]
|
||||
proba += (self.weights.get("xgboost", 0.33) / total_w) * xp
|
||||
|
||||
# LightGBM
|
||||
lp = self.lgb_model.predict_proba(Xa)[:, 1]
|
||||
proba += (self.weights.get("lightgbm", 0.33) / total_w) * lp
|
||||
|
||||
# MLP
|
||||
mp = self.mlp_pipeline.predict_proba(Xa.values)[:, 1]
|
||||
proba += (self.weights.get("mlp", 0.33) / total_w) * mp
|
||||
|
||||
return np.column_stack([1 - proba, proba])
|
||||
|
||||
def predict(self, X, threshold=0.5):
|
||||
return (self.predict_proba(X)[:, 1] >= threshold).astype(int)
|
||||
|
||||
ensemble = FullEnsemble(xgb_model, lgb_model, mlp_model, weights, feat_cols)
|
||||
# Add feature_cols attribute for evaluate_model
|
||||
ensemble_eval = type(
|
||||
"E",
|
||||
(),
|
||||
{
|
||||
"predict_proba": ensemble.predict_proba,
|
||||
"feature_cols": feat_cols,
|
||||
},
|
||||
)()
|
||||
ensemble = TurfEnsemble(xgb_model, lgb_model, mlp_model, weights, feat_cols)
|
||||
# TurfEnsemble already has .feature_cols; use it directly for evaluation
|
||||
ensemble_eval = ensemble
|
||||
|
||||
# ── Holdout evaluation ─────────────────────────────────────────────────────
|
||||
print("\n[9/9] Evaluating all models on holdout …")
|
||||
|
||||
Reference in New Issue
Block a user