Compare commits
14 Commits
feature/HR
...
feature/HR
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec024d8236 | ||
|
|
225295030b | ||
|
|
86e85aa1c6 | ||
| 5aa6013c52 | |||
|
|
4b4323f707 | ||
|
|
356bdf5bec | ||
|
|
f9a45e6deb | ||
|
|
cfc0f038f9 | ||
|
|
c999285895 | ||
| 837a0845ec | |||
|
|
4bf458f1b8 | ||
|
|
099286b078 | ||
|
|
7f5573f076 | ||
|
|
82d6bdafba |
@@ -22,8 +22,14 @@ from auth import jwt_required_middleware, plan_required, free_daily_limit_check
|
||||
predictions_bp = Blueprint("v1_predictions", __name__, url_prefix="/api/v1/predictions")
|
||||
|
||||
|
||||
def _fetch_ml_predictions(conn, date: str, limit: int = None, offset: int = 0):
|
||||
"""Shared helper — returns rows from ml_predictions_cache."""
|
||||
def _fetch_ml_predictions(
|
||||
conn, date: str, limit: int = None, offset: int = 0, include_weather: bool = False
|
||||
):
|
||||
"""Shared helper — returns rows from ml_predictions_cache.
|
||||
|
||||
include_weather=True adds terrain_condition and weather_impact columns
|
||||
via LEFT JOIN on pmu_meteo (premium routes only).
|
||||
"""
|
||||
if not table_exists(conn, "ml_predictions_cache"):
|
||||
return [], 0
|
||||
|
||||
@@ -33,13 +39,35 @@ def _fetch_ml_predictions(conn, date: str, limit: int = None, offset: int = 0):
|
||||
).fetchone()
|
||||
total = count_row["cnt"] if count_row else 0
|
||||
|
||||
sql = """SELECT
|
||||
race_label, hippodrome, discipline, distance, heure,
|
||||
horse_name, horse_number, odds, prob_top1, prob_top3,
|
||||
ml_score, recommendation, is_value_bet, risque_label, risque_score
|
||||
FROM ml_predictions_cache
|
||||
WHERE date = ?
|
||||
ORDER BY ml_score DESC"""
|
||||
if (
|
||||
include_weather
|
||||
and table_exists(conn, "pmu_meteo")
|
||||
and table_exists(conn, "pmu_courses")
|
||||
):
|
||||
sql = """SELECT
|
||||
m.race_label, m.hippodrome, m.discipline, m.distance, m.heure,
|
||||
m.horse_name, m.horse_number, m.odds, m.prob_top1, m.prob_top3,
|
||||
m.ml_score, m.recommendation, m.is_value_bet, m.risque_label, m.risque_score,
|
||||
c.penetrometre_intitule,
|
||||
mt.nebulositecode, mt.nebulosite_court, mt.temperature, mt.force_vent
|
||||
FROM ml_predictions_cache m
|
||||
LEFT JOIN pmu_courses c
|
||||
ON c.date_programme = m.date
|
||||
AND c.num_reunion = m.num_reunion
|
||||
AND c.num_course = m.num_course
|
||||
LEFT JOIN pmu_meteo mt
|
||||
ON mt.date_programme = m.date
|
||||
AND mt.num_reunion = m.num_reunion
|
||||
WHERE m.date = ?
|
||||
ORDER BY m.ml_score DESC"""
|
||||
else:
|
||||
sql = """SELECT
|
||||
race_label, hippodrome, discipline, distance, heure,
|
||||
horse_name, horse_number, odds, prob_top1, prob_top3,
|
||||
ml_score, recommendation, is_value_bet, risque_label, risque_score
|
||||
FROM ml_predictions_cache
|
||||
WHERE date = ?
|
||||
ORDER BY ml_score DESC"""
|
||||
params = [date]
|
||||
|
||||
if limit is not None:
|
||||
@@ -47,7 +75,42 @@ def _fetch_ml_predictions(conn, date: str, limit: int = None, offset: int = 0):
|
||||
params += [limit, offset]
|
||||
|
||||
rows = conn.execute(sql, params).fetchall()
|
||||
return [dict(r) for r in rows], total
|
||||
|
||||
results = []
|
||||
for r in rows:
|
||||
row_dict = dict(r)
|
||||
if include_weather:
|
||||
# Compute derived fields from raw columns
|
||||
penetrometre = row_dict.pop("penetrometre_intitule", None) or ""
|
||||
# Import inline to avoid circular dependency at module level
|
||||
from scoring_v2 import get_terrain_condition, compute_weather_impact
|
||||
|
||||
terrain_condition = (
|
||||
get_terrain_condition(penetrometre) if penetrometre else "inconnu"
|
||||
)
|
||||
weather_data = None
|
||||
if (
|
||||
row_dict.get("nebulositecode") is not None
|
||||
or row_dict.get("temperature") is not None
|
||||
):
|
||||
weather_data = {
|
||||
"nebulositecode": row_dict.pop("nebulositecode", None),
|
||||
"nebulosite_court": row_dict.pop("nebulosite_court", None),
|
||||
"temperature": row_dict.pop("temperature", None),
|
||||
"force_vent": row_dict.pop("force_vent", None),
|
||||
}
|
||||
else:
|
||||
# Remove raw meteo columns even if NULL
|
||||
row_dict.pop("nebulositecode", None)
|
||||
row_dict.pop("nebulosite_court", None)
|
||||
row_dict.pop("temperature", None)
|
||||
row_dict.pop("force_vent", None)
|
||||
weather_impact = compute_weather_impact(weather_data, terrain_condition)
|
||||
row_dict["terrain_condition"] = terrain_condition
|
||||
row_dict["weather_impact"] = weather_impact
|
||||
results.append(row_dict)
|
||||
|
||||
return results, total
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────
|
||||
@@ -145,7 +208,7 @@ def predictions_all():
|
||||
conn = get_db()
|
||||
try:
|
||||
predictions, total = _fetch_ml_predictions(
|
||||
conn, date_param, limit=limit, offset=offset
|
||||
conn, date_param, limit=limit, offset=offset, include_weather=True
|
||||
)
|
||||
pagination = paginate_query(predictions, total, limit, offset)
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ def valuebets():
|
||||
default: 0
|
||||
responses:
|
||||
200:
|
||||
description: Value bets du jour
|
||||
description: Value bets du jour avec météo et terrain (HRT-83)
|
||||
401:
|
||||
description: Token invalide
|
||||
403:
|
||||
@@ -69,7 +69,7 @@ def valuebets():
|
||||
|
||||
conn = get_db()
|
||||
try:
|
||||
rows = []
|
||||
rows_raw = []
|
||||
total = 0
|
||||
|
||||
if table_exists(conn, "ml_predictions_cache"):
|
||||
@@ -81,18 +81,73 @@ def valuebets():
|
||||
).fetchone()
|
||||
total = count_row["cnt"] if count_row else 0
|
||||
|
||||
rows = conn.execute(
|
||||
"""SELECT race_label, hippodrome, discipline, distance, heure,
|
||||
horse_name, horse_number, odds, prob_top1, prob_top3,
|
||||
ml_score, recommendation, risque_label, risque_score
|
||||
FROM ml_predictions_cache
|
||||
WHERE date = ? AND is_value_bet = 1 AND odds >= ?
|
||||
ORDER BY ml_score DESC
|
||||
LIMIT ? OFFSET ?""",
|
||||
(date_param, min_odds, limit, offset),
|
||||
).fetchall()
|
||||
# LEFT JOIN pmu_courses (terrain) + pmu_meteo (météo) — HRT-83
|
||||
has_courses = table_exists(conn, "pmu_courses")
|
||||
has_meteo = table_exists(conn, "pmu_meteo")
|
||||
|
||||
if has_courses and has_meteo:
|
||||
rows_raw = conn.execute(
|
||||
"""SELECT m.race_label, m.hippodrome, m.discipline, m.distance, m.heure,
|
||||
m.horse_name, m.horse_number, m.odds, m.prob_top1, m.prob_top3,
|
||||
m.ml_score, m.recommendation, m.risque_label, m.risque_score,
|
||||
c.penetrometre_intitule,
|
||||
mt.nebulositecode, mt.nebulosite_court,
|
||||
mt.temperature, mt.force_vent
|
||||
FROM ml_predictions_cache m
|
||||
LEFT JOIN pmu_courses c
|
||||
ON c.date_programme = m.date
|
||||
AND c.num_reunion = m.num_reunion
|
||||
AND c.num_course = m.num_course
|
||||
LEFT JOIN pmu_meteo mt
|
||||
ON mt.date_programme = m.date
|
||||
AND mt.num_reunion = m.num_reunion
|
||||
WHERE m.date = ? AND m.is_value_bet = 1 AND m.odds >= ?
|
||||
ORDER BY m.ml_score DESC
|
||||
LIMIT ? OFFSET ?""",
|
||||
(date_param, min_odds, limit, offset),
|
||||
).fetchall()
|
||||
else:
|
||||
rows_raw = conn.execute(
|
||||
"""SELECT race_label, hippodrome, discipline, distance, heure,
|
||||
horse_name, horse_number, odds, prob_top1, prob_top3,
|
||||
ml_score, recommendation, risque_label, risque_score
|
||||
FROM ml_predictions_cache
|
||||
WHERE date = ? AND is_value_bet = 1 AND odds >= ?
|
||||
ORDER BY ml_score DESC
|
||||
LIMIT ? OFFSET ?""",
|
||||
(date_param, min_odds, limit, offset),
|
||||
).fetchall()
|
||||
|
||||
from scoring_v2 import get_terrain_condition, compute_weather_impact
|
||||
|
||||
valuebets_list = []
|
||||
for r in rows_raw:
|
||||
row_dict = dict(r)
|
||||
penetrometre = row_dict.pop("penetrometre_intitule", None) or ""
|
||||
terrain_condition = (
|
||||
get_terrain_condition(penetrometre) if penetrometre else "inconnu"
|
||||
)
|
||||
weather_data = None
|
||||
if (
|
||||
row_dict.get("nebulositecode") is not None
|
||||
or row_dict.get("temperature") is not None
|
||||
):
|
||||
weather_data = {
|
||||
"nebulositecode": row_dict.pop("nebulositecode", None),
|
||||
"nebulosite_court": row_dict.pop("nebulosite_court", None),
|
||||
"temperature": row_dict.pop("temperature", None),
|
||||
"force_vent": row_dict.pop("force_vent", None),
|
||||
}
|
||||
else:
|
||||
row_dict.pop("nebulositecode", None)
|
||||
row_dict.pop("nebulosite_court", None)
|
||||
row_dict.pop("temperature", None)
|
||||
row_dict.pop("force_vent", None)
|
||||
weather_impact = compute_weather_impact(weather_data, terrain_condition)
|
||||
row_dict["terrain_condition"] = terrain_condition
|
||||
row_dict["weather_impact"] = weather_impact
|
||||
valuebets_list.append(row_dict)
|
||||
|
||||
valuebets_list = [dict(r) for r in rows]
|
||||
pagination = paginate_query(valuebets_list, total, limit, offset)
|
||||
|
||||
return jsonify(
|
||||
|
||||
21
infra/turf-saas-leadhunter.service
Normal file
21
infra/turf-saas-leadhunter.service
Normal file
@@ -0,0 +1,21 @@
|
||||
[Unit]
|
||||
Description=H3R7Tech LeadHunter API (Port 8775)
|
||||
Documentation=https://portal-kolifee.duckdns.org
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=h3r7
|
||||
WorkingDirectory=/home/h3r7/turf_saas
|
||||
|
||||
# Charger les variables d'environnement depuis /home/h3r7/.env
|
||||
# (notamment GOOGLE_PLACES_API_KEY)
|
||||
EnvironmentFile=/home/h3r7/.env
|
||||
|
||||
ExecStart=/home/h3r7/turf_saas/venv/bin/python3 /home/h3r7/turf_saas/leadhunter_api.py
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
Environment=PYTHONPATH=/home/h3r7/turf_saas
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
303
leadhunter_api.py
Normal file
303
leadhunter_api.py
Normal file
@@ -0,0 +1,303 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
H3R7Tech — LeadHunter API
|
||||
===========================
|
||||
Service Flask sur port 8775 exposant les endpoints LeadHunter.
|
||||
|
||||
Endpoints :
|
||||
GET /api/leads — Liste les leads (filtres: status, limit, offset)
|
||||
POST /api/leads/scrape — Lance un job de scraping asynchrone
|
||||
GET /api/leads/stats — Statistiques globales du CRM
|
||||
GET /api/leads/export — Export CSV des leads
|
||||
PATCH /api/leads/<id>/status — Met à jour le statut d'un lead
|
||||
|
||||
Port : 8775 (8769 occupé par depenses_trello/app.py, 8770 occupé par turf_scraper/crm_api.py — corrigé HRT-66)
|
||||
|
||||
Auteur: H3R7Tech Backend Engineer
|
||||
Issue: HRT-66
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from flask import Flask, jsonify, request, Response
|
||||
from flask_cors import CORS
|
||||
|
||||
# Import des modules LeadHunter
|
||||
from leadhunter_crm import (
|
||||
init_db,
|
||||
insert_leads,
|
||||
get_leads,
|
||||
get_lead_by_id,
|
||||
update_lead_status,
|
||||
get_stats,
|
||||
export_csv,
|
||||
VALID_STATUSES,
|
||||
DB_PATH,
|
||||
)
|
||||
from leadhunter_scraper import run_scraping, GOOGLE_PLACES_API_KEY
|
||||
from leadhunter_scorer import LeadScorer
|
||||
|
||||
# ─── Assertions au démarrage ─────────────────────────────────────────────────
|
||||
# Vérification obligatoire : la clé API doit être présente au démarrage
|
||||
assert os.environ.get("GOOGLE_PLACES_API_KEY"), (
|
||||
"GOOGLE_PLACES_API_KEY manquante. "
|
||||
"Ajouter dans /home/h3r7/.env : export GOOGLE_PLACES_API_KEY=xxx"
|
||||
)
|
||||
|
||||
# ─── Logging ────────────────────────────────────────────────────────────────
|
||||
logger = logging.getLogger("leadhunter.api")
|
||||
|
||||
_handler = RotatingFileHandler(
|
||||
"/home/h3r7/leadhunter.log",
|
||||
maxBytes=5 * 1024 * 1024,
|
||||
backupCount=3,
|
||||
)
|
||||
_handler.setFormatter(
|
||||
logging.Formatter("%(asctime)s %(levelname)-8s %(name)s — %(message)s")
|
||||
)
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
logger.addHandler(_handler)
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
|
||||
# ─── App Flask ───────────────────────────────────────────────────────────────
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
# Scorer singleton
|
||||
scorer = LeadScorer()
|
||||
|
||||
# État global du job de scraping (simple flag — pas de celery nécessaire pour le POC)
|
||||
_scrape_job = {
|
||||
"running": False,
|
||||
"last_run": None,
|
||||
"last_count": 0,
|
||||
"last_error": None,
|
||||
}
|
||||
_scrape_lock = threading.Lock()
|
||||
|
||||
# ─── Init DB ─────────────────────────────────────────────────────────────────
|
||||
init_db(DB_PATH)
|
||||
logger.info("LeadHunter API démarrée — DB initialisée.")
|
||||
|
||||
|
||||
# ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _run_scrape_job(max_leads: int, use_google: bool, use_osm: bool) -> None:
|
||||
"""Job de scraping exécuté dans un thread séparé."""
|
||||
with _scrape_lock:
|
||||
_scrape_job["running"] = True
|
||||
_scrape_job["last_error"] = None
|
||||
|
||||
try:
|
||||
leads_raw = run_scraping(
|
||||
max_leads=max_leads,
|
||||
use_google=use_google,
|
||||
use_osm=use_osm,
|
||||
)
|
||||
leads_scored = scorer.score_leads(leads_raw)
|
||||
inserted_ids = insert_leads(leads_scored)
|
||||
|
||||
with _scrape_lock:
|
||||
_scrape_job["last_count"] = len(inserted_ids)
|
||||
from datetime import datetime
|
||||
|
||||
_scrape_job["last_run"] = datetime.utcnow().isoformat() + "Z"
|
||||
|
||||
logger.info(f"Scrape job terminé : {len(inserted_ids)} leads insérés.")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Scrape job erreur : {e}")
|
||||
with _scrape_lock:
|
||||
_scrape_job["last_error"] = str(e)
|
||||
|
||||
finally:
|
||||
with _scrape_lock:
|
||||
_scrape_job["running"] = False
|
||||
|
||||
|
||||
# ─── Routes ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@app.route("/api/leads", methods=["GET"])
|
||||
def api_get_leads():
|
||||
"""
|
||||
Liste les leads du CRM.
|
||||
|
||||
Query params :
|
||||
- status (str, optional) : filtre sur new/contacted/closed/rejected
|
||||
- limit (int, default=50) : pagination
|
||||
- offset (int, default=0) : pagination
|
||||
"""
|
||||
status = request.args.get("status")
|
||||
try:
|
||||
limit = int(request.args.get("limit", 50))
|
||||
offset = int(request.args.get("offset", 0))
|
||||
except ValueError:
|
||||
return jsonify({"error": "limit et offset doivent être des entiers"}), 400
|
||||
|
||||
if status and status not in VALID_STATUSES:
|
||||
return jsonify(
|
||||
{"error": f"status invalide. Valeurs acceptées : {VALID_STATUSES}"}
|
||||
), 400
|
||||
|
||||
leads = get_leads(status=status, limit=limit, offset=offset)
|
||||
return jsonify(
|
||||
{
|
||||
"leads": leads,
|
||||
"count": len(leads),
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"status_filter": status,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.route("/api/leads/scrape", methods=["POST"])
|
||||
def api_scrape():
|
||||
"""
|
||||
Lance un job de scraping asynchrone.
|
||||
|
||||
Body JSON (optionnel) :
|
||||
- max_leads (int, default=100)
|
||||
- use_google (bool, default=true)
|
||||
- use_osm (bool, default=true)
|
||||
|
||||
Retourne immédiatement avec le statut du job.
|
||||
"""
|
||||
with _scrape_lock:
|
||||
if _scrape_job["running"]:
|
||||
return jsonify(
|
||||
{
|
||||
"status": "already_running",
|
||||
"message": "Un job de scraping est déjà en cours.",
|
||||
}
|
||||
), 409
|
||||
|
||||
body = request.get_json(silent=True) or {}
|
||||
max_leads = int(body.get("max_leads", 100))
|
||||
use_google = bool(body.get("use_google", True))
|
||||
use_osm = bool(body.get("use_osm", True))
|
||||
|
||||
thread = threading.Thread(
|
||||
target=_run_scrape_job,
|
||||
args=(max_leads, use_google, use_osm),
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
|
||||
logger.info(
|
||||
f"Job de scraping lancé (max_leads={max_leads}, "
|
||||
f"use_google={use_google}, use_osm={use_osm})"
|
||||
)
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"status": "started",
|
||||
"message": "Job de scraping démarré en arrière-plan.",
|
||||
"params": {
|
||||
"max_leads": max_leads,
|
||||
"use_google": use_google,
|
||||
"use_osm": use_osm,
|
||||
},
|
||||
}
|
||||
), 202
|
||||
|
||||
|
||||
@app.route("/api/leads/scrape/status", methods=["GET"])
|
||||
def api_scrape_status():
|
||||
"""Retourne l'état courant du job de scraping."""
|
||||
with _scrape_lock:
|
||||
return jsonify(dict(_scrape_job))
|
||||
|
||||
|
||||
@app.route("/api/leads/stats", methods=["GET"])
|
||||
def api_stats():
|
||||
"""
|
||||
Statistiques globales du CRM LeadHunter.
|
||||
|
||||
Retourne : total, by_status, by_source, avg_score, top_leads_count
|
||||
"""
|
||||
stats = get_stats()
|
||||
if not stats:
|
||||
return jsonify({"error": "Impossible de calculer les statistiques"}), 500
|
||||
return jsonify(stats)
|
||||
|
||||
|
||||
@app.route("/api/leads/export", methods=["GET"])
|
||||
def api_export():
|
||||
"""
|
||||
Export CSV de tous les leads (ou filtrés par status).
|
||||
|
||||
Query params :
|
||||
- status (str, optional)
|
||||
"""
|
||||
status = request.args.get("status")
|
||||
if status and status not in VALID_STATUSES:
|
||||
return jsonify({"error": f"status invalide : {VALID_STATUSES}"}), 400
|
||||
|
||||
csv_content = export_csv(status=status)
|
||||
filename = f"leadhunter_leads{'_' + status if status else ''}.csv"
|
||||
|
||||
return Response(
|
||||
csv_content,
|
||||
mimetype="text/csv",
|
||||
headers={
|
||||
"Content-Disposition": f"attachment; filename={filename}",
|
||||
"Content-Type": "text/csv; charset=utf-8",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.route("/api/leads/<int:lead_id>/status", methods=["PATCH"])
|
||||
def api_update_status(lead_id: int):
|
||||
"""
|
||||
Met à jour le statut d'un lead.
|
||||
|
||||
Body JSON :
|
||||
- status (str) : new | contacted | closed | rejected
|
||||
"""
|
||||
body = request.get_json(silent=True)
|
||||
if not body or "status" not in body:
|
||||
return jsonify({"error": "Body JSON requis avec le champ 'status'"}), 400
|
||||
|
||||
new_status = body["status"]
|
||||
if new_status not in VALID_STATUSES:
|
||||
return jsonify({"error": f"status invalide. Valeurs : {VALID_STATUSES}"}), 400
|
||||
|
||||
lead = get_lead_by_id(lead_id)
|
||||
if not lead:
|
||||
return jsonify({"error": f"Lead id={lead_id} introuvable"}), 404
|
||||
|
||||
success = update_lead_status(lead_id, new_status)
|
||||
if not success:
|
||||
return jsonify({"error": "Mise à jour échouée"}), 500
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"success": True,
|
||||
"lead_id": lead_id,
|
||||
"new_status": new_status,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.route("/health", methods=["GET"])
|
||||
def health():
|
||||
"""Healthcheck pour systemd / monitoring."""
|
||||
return jsonify(
|
||||
{
|
||||
"status": "ok",
|
||||
"service": "leadhunter-api",
|
||||
"port": 8775,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# ─── Entrypoint ──────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=8775, debug=False)
|
||||
349
leadhunter_crm.py
Normal file
349
leadhunter_crm.py
Normal file
@@ -0,0 +1,349 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
H3R7Tech — LeadHunter CRM (SQLite)
|
||||
=====================================
|
||||
Couche de persistance SQLite pour les leads LeadHunter.
|
||||
|
||||
Schéma validé CTO (HRT-66) :
|
||||
CREATE TABLE leads (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source TEXT NOT NULL, -- 'google_places' ou 'osm'
|
||||
name TEXT NOT NULL,
|
||||
address TEXT,
|
||||
phone TEXT,
|
||||
rating REAL,
|
||||
reviews_count INTEGER,
|
||||
website TEXT,
|
||||
score INTEGER,
|
||||
rgpd_ok BOOLEAN DEFAULT 1,
|
||||
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
status TEXT DEFAULT 'new' -- new, contacted, closed, rejected
|
||||
);
|
||||
|
||||
Auteur: H3R7Tech Backend Engineer
|
||||
Issue: HRT-66
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import logging
|
||||
import csv
|
||||
import io
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from typing import Optional
|
||||
|
||||
# ─── Logging ────────────────────────────────────────────────────────────────
|
||||
logger = logging.getLogger("leadhunter.crm")
|
||||
|
||||
_handler = RotatingFileHandler(
|
||||
"/home/h3r7/leadhunter.log",
|
||||
maxBytes=5 * 1024 * 1024,
|
||||
backupCount=3,
|
||||
)
|
||||
_handler.setFormatter(
|
||||
logging.Formatter("%(asctime)s %(levelname)-8s %(name)s — %(message)s")
|
||||
)
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
logger.addHandler(_handler)
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
|
||||
# ─── Chemin DB ───────────────────────────────────────────────────────────────
|
||||
DB_PATH = "/home/h3r7/leadhunter.db"
|
||||
|
||||
# Statuts valides pour un lead
|
||||
VALID_STATUSES = {"new", "contacted", "closed", "rejected"}
|
||||
|
||||
|
||||
# ─── Initialisation ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def init_db(db_path: str = DB_PATH) -> None:
|
||||
"""
|
||||
Crée la base SQLite et la table leads si elle n'existe pas.
|
||||
Idempotent — peut être appelé au démarrage de l'API.
|
||||
"""
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS leads (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
address TEXT,
|
||||
phone TEXT,
|
||||
rating REAL,
|
||||
reviews_count INTEGER,
|
||||
website TEXT,
|
||||
score INTEGER,
|
||||
rgpd_ok BOOLEAN DEFAULT 1,
|
||||
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
status TEXT DEFAULT 'new'
|
||||
)
|
||||
""")
|
||||
conn.commit()
|
||||
logger.info(f"DB initialisée : {db_path}")
|
||||
|
||||
|
||||
# ─── Context manager ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _get_conn(db_path: str = DB_PATH):
|
||||
"""Fournit une connexion SQLite avec row_factory."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
logger.warning(f"DB transaction rollback : {e}")
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ─── CRUD ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def insert_lead(lead: dict, db_path: str = DB_PATH) -> Optional[int]:
|
||||
"""
|
||||
Insère un lead normalisé dans la DB.
|
||||
|
||||
Args:
|
||||
lead: dict avec les champs normalisés (source, name, address, ...)
|
||||
db_path: chemin vers la DB SQLite.
|
||||
|
||||
Returns:
|
||||
L'id SQLite du lead inséré, ou None en cas d'erreur.
|
||||
"""
|
||||
try:
|
||||
with _get_conn(db_path) as conn:
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
INSERT INTO leads
|
||||
(source, name, address, phone, rating, reviews_count,
|
||||
website, score, rgpd_ok, status)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
lead.get("source", "unknown"),
|
||||
lead.get("name", ""),
|
||||
lead.get("address", ""),
|
||||
lead.get("phone", ""),
|
||||
lead.get("rating"),
|
||||
lead.get("reviews_count"),
|
||||
lead.get("website", ""),
|
||||
lead.get("score"),
|
||||
1 if lead.get("rgpd_ok", True) else 0,
|
||||
lead.get("status", "new"),
|
||||
),
|
||||
)
|
||||
lead_id = cursor.lastrowid
|
||||
logger.info(f"Lead inséré id={lead_id} : {lead.get('name')}")
|
||||
return lead_id
|
||||
except Exception as e:
|
||||
logger.warning(f"insert_lead error : {e}")
|
||||
return None
|
||||
|
||||
|
||||
def insert_leads(leads: list[dict], db_path: str = DB_PATH) -> list[int]:
|
||||
"""
|
||||
Insère une liste de leads en batch.
|
||||
|
||||
Returns:
|
||||
Liste des ids insérés.
|
||||
"""
|
||||
ids = []
|
||||
for lead in leads:
|
||||
lead_id = insert_lead(lead, db_path)
|
||||
if lead_id is not None:
|
||||
ids.append(lead_id)
|
||||
logger.info(f"insert_leads : {len(ids)}/{len(leads)} insérés.")
|
||||
return ids
|
||||
|
||||
|
||||
def get_leads(
|
||||
status: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
db_path: str = DB_PATH,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Récupère les leads avec filtre optionnel sur le statut.
|
||||
|
||||
Args:
|
||||
status: filtre sur le champ 'status' (new, contacted, closed, rejected).
|
||||
limit: pagination — nombre de résultats max.
|
||||
offset: pagination — décalage.
|
||||
|
||||
Returns:
|
||||
Liste de dicts (tous les champs de la table leads).
|
||||
"""
|
||||
try:
|
||||
with _get_conn(db_path) as conn:
|
||||
if status:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM leads WHERE status = ? ORDER BY score DESC, scraped_at DESC LIMIT ? OFFSET ?",
|
||||
(status, limit, offset),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM leads ORDER BY score DESC, scraped_at DESC LIMIT ? OFFSET ?",
|
||||
(limit, offset),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
except Exception as e:
|
||||
logger.warning(f"get_leads error : {e}")
|
||||
return []
|
||||
|
||||
|
||||
def get_lead_by_id(lead_id: int, db_path: str = DB_PATH) -> Optional[dict]:
|
||||
"""Récupère un lead par son id."""
|
||||
try:
|
||||
with _get_conn(db_path) as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM leads WHERE id = ?", (lead_id,)
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
except Exception as e:
|
||||
logger.warning(f"get_lead_by_id error : {e}")
|
||||
return None
|
||||
|
||||
|
||||
def update_lead_status(lead_id: int, status: str, db_path: str = DB_PATH) -> bool:
|
||||
"""
|
||||
Met à jour le statut d'un lead.
|
||||
|
||||
Args:
|
||||
lead_id: id du lead.
|
||||
status: nouveau statut ('new', 'contacted', 'closed', 'rejected').
|
||||
|
||||
Returns:
|
||||
True si mise à jour réussie, False sinon.
|
||||
"""
|
||||
if status not in VALID_STATUSES:
|
||||
logger.warning(f"update_lead_status : statut invalide '{status}'")
|
||||
return False
|
||||
try:
|
||||
with _get_conn(db_path) as conn:
|
||||
conn.execute(
|
||||
"UPDATE leads SET status = ? WHERE id = ?",
|
||||
(status, lead_id),
|
||||
)
|
||||
logger.info(f"Lead id={lead_id} statut → {status}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"update_lead_status error : {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_stats(db_path: str = DB_PATH) -> dict:
|
||||
"""
|
||||
Retourne les statistiques globales du CRM.
|
||||
|
||||
Returns:
|
||||
Dict avec total, by_status, by_source, avg_score, top_leads_count
|
||||
"""
|
||||
try:
|
||||
with _get_conn(db_path) as conn:
|
||||
total = conn.execute("SELECT COUNT(*) FROM leads").fetchone()[0]
|
||||
|
||||
by_status_rows = conn.execute(
|
||||
"SELECT status, COUNT(*) as cnt FROM leads GROUP BY status"
|
||||
).fetchall()
|
||||
by_status = {r["status"]: r["cnt"] for r in by_status_rows}
|
||||
|
||||
by_source_rows = conn.execute(
|
||||
"SELECT source, COUNT(*) as cnt FROM leads GROUP BY source"
|
||||
).fetchall()
|
||||
by_source = {r["source"]: r["cnt"] for r in by_source_rows}
|
||||
|
||||
avg_score_row = conn.execute(
|
||||
"SELECT AVG(score) FROM leads WHERE score IS NOT NULL"
|
||||
).fetchone()
|
||||
avg_score = round(avg_score_row[0] or 0, 2)
|
||||
|
||||
# Leads "chauds" = score ≥ 5
|
||||
top_count = conn.execute(
|
||||
"SELECT COUNT(*) FROM leads WHERE score >= 5"
|
||||
).fetchone()[0]
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"by_status": by_status,
|
||||
"by_source": by_source,
|
||||
"avg_score": avg_score,
|
||||
"top_leads_count": top_count,
|
||||
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(f"get_stats error : {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def export_csv(
|
||||
status: Optional[str] = None,
|
||||
db_path: str = DB_PATH,
|
||||
) -> str:
|
||||
"""
|
||||
Exporte les leads en CSV (string).
|
||||
|
||||
Args:
|
||||
status: filtre optionnel sur le statut.
|
||||
|
||||
Returns:
|
||||
Contenu CSV en string UTF-8.
|
||||
"""
|
||||
leads = get_leads(status=status, limit=10000, db_path=db_path)
|
||||
|
||||
output = io.StringIO()
|
||||
fieldnames = [
|
||||
"id",
|
||||
"source",
|
||||
"name",
|
||||
"address",
|
||||
"phone",
|
||||
"rating",
|
||||
"reviews_count",
|
||||
"website",
|
||||
"score",
|
||||
"rgpd_ok",
|
||||
"scraped_at",
|
||||
"status",
|
||||
]
|
||||
writer = csv.DictWriter(output, fieldnames=fieldnames, extrasaction="ignore")
|
||||
writer.writeheader()
|
||||
writer.writerows(leads)
|
||||
|
||||
logger.info(f"export_csv : {len(leads)} leads exportés.")
|
||||
return output.getvalue()
|
||||
|
||||
|
||||
# ─── CLI (debug) ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
init_db()
|
||||
|
||||
# Test insertion
|
||||
test_lead = {
|
||||
"source": "google_places",
|
||||
"name": "Restaurant Test",
|
||||
"address": "10 rue de la Paix, 59000 Lille",
|
||||
"phone": "+33 3 20 00 00 01",
|
||||
"rating": 4.5,
|
||||
"reviews_count": 120,
|
||||
"website": "",
|
||||
"score": 8,
|
||||
"rgpd_ok": True,
|
||||
"status": "new",
|
||||
}
|
||||
lead_id = insert_lead(test_lead)
|
||||
print(f"Lead inséré : id={lead_id}")
|
||||
|
||||
leads = get_leads()
|
||||
print(f"Leads en DB : {len(leads)}")
|
||||
|
||||
stats = get_stats()
|
||||
print(f"Stats : {stats}")
|
||||
193
leadhunter_scorer.py
Normal file
193
leadhunter_scorer.py
Normal file
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
H3R7Tech — LeadHunter Scorer
|
||||
================================
|
||||
Moteur de scoring des leads restaurants MEL.
|
||||
|
||||
Critères (ordre de priorité métier) :
|
||||
1. [+3] Site web absent ← CRITIQUE : raison d'être du produit
|
||||
2. [+2] Nombre d'avis élevé (≥ 50) : forte activité = bon prospect de vente
|
||||
3. [+2] Note Google élevée (≥ 4.0) : établissement sérieux
|
||||
4. [+1] Téléphone présent : facilite la prise de contact
|
||||
5. [-1] Note faible (< 3.0) : risque reputationnel pour la prestation web
|
||||
|
||||
Score maximum théorique : 8
|
||||
Score minimum : 0 (leads avec site web ne doivent pas passer ici)
|
||||
|
||||
Auteur: H3R7Tech Backend Engineer
|
||||
Issue: HRT-66
|
||||
"""
|
||||
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
# ─── Logging ────────────────────────────────────────────────────────────────
|
||||
logger = logging.getLogger("leadhunter.scorer")
|
||||
|
||||
_handler = RotatingFileHandler(
|
||||
"/home/h3r7/leadhunter.log",
|
||||
maxBytes=5 * 1024 * 1024,
|
||||
backupCount=3,
|
||||
)
|
||||
_handler.setFormatter(
|
||||
logging.Formatter("%(asctime)s %(levelname)-8s %(name)s — %(message)s")
|
||||
)
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
logger.addHandler(_handler)
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
|
||||
|
||||
# ─── Scorer ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class LeadScorer:
|
||||
"""
|
||||
Calcule le score de priorité d'un lead.
|
||||
|
||||
Le score sert à trier les leads dans le CRM :
|
||||
- Score élevé = prospect chaud (sans site + actif + bien noté)
|
||||
- Score faible = prospect froid (peut être ignoré ou traité en dernier)
|
||||
"""
|
||||
|
||||
def _calculate_score(self, lead: dict) -> int:
|
||||
"""
|
||||
Calcule le score d'un lead.
|
||||
|
||||
Args:
|
||||
lead: dict avec les champs normalisés du scraper
|
||||
(name, website, rating, reviews_count, phone, ...)
|
||||
|
||||
Returns:
|
||||
Score entier (0–8)
|
||||
"""
|
||||
score = 0
|
||||
|
||||
# ── Critère 1 : site web absent [CRITIQUE — logique métier centrale] ──
|
||||
# C'est le critère n°1 : on cherche des restaurants SANS site web
|
||||
# pour leur proposer une création de site à 800–1500€.
|
||||
website = lead.get("website", "")
|
||||
if not website or not website.strip():
|
||||
score += 3
|
||||
logger.debug(f"{lead.get('name')}: +3 (site web absent)")
|
||||
else:
|
||||
# Si le lead a un site web, score = 0 immédiatement.
|
||||
# Ce cas ne devrait pas se produire (filtre scraper),
|
||||
# mais on reste défensif.
|
||||
logger.warning(
|
||||
f"{lead.get('name')}: site web présent ({website}), "
|
||||
"lead ignoré pour scoring."
|
||||
)
|
||||
return 0
|
||||
|
||||
# ── Critère 2 : nombre d'avis élevé (≥ 50) ──────────────────────────
|
||||
reviews = lead.get("reviews_count")
|
||||
if reviews is not None:
|
||||
try:
|
||||
reviews = int(reviews)
|
||||
if reviews >= 50:
|
||||
score += 2
|
||||
logger.debug(f"{lead.get('name')}: +2 (avis ≥ 50 : {reviews})")
|
||||
except (TypeError, ValueError) as e:
|
||||
logger.warning(f"reviews_count invalide pour {lead.get('name')}: {e}")
|
||||
|
||||
# ── Critère 3 : bonne note Google (≥ 4.0) ───────────────────────────
|
||||
rating = lead.get("rating")
|
||||
if rating is not None:
|
||||
try:
|
||||
rating = float(rating)
|
||||
if rating >= 4.0:
|
||||
score += 2
|
||||
logger.debug(f"{lead.get('name')}: +2 (note ≥ 4.0 : {rating})")
|
||||
elif rating < 3.0:
|
||||
score -= 1
|
||||
logger.debug(f"{lead.get('name')}: -1 (note < 3.0 : {rating})")
|
||||
except (TypeError, ValueError) as e:
|
||||
logger.warning(f"rating invalide pour {lead.get('name')}: {e}")
|
||||
|
||||
# ── Critère 4 : téléphone présent ────────────────────────────────────
|
||||
phone = lead.get("phone", "")
|
||||
if phone and phone.strip():
|
||||
score += 1
|
||||
logger.debug(f"{lead.get('name')}: +1 (téléphone présent)")
|
||||
|
||||
# Plancher à 0
|
||||
score = max(0, score)
|
||||
logger.info(f"Score calculé pour '{lead.get('name')}' : {score}/8")
|
||||
return score
|
||||
|
||||
def score_lead(self, lead: dict) -> dict:
|
||||
"""
|
||||
Enrichit un lead avec son score.
|
||||
|
||||
Args:
|
||||
lead: dict normalisé du scraper.
|
||||
|
||||
Returns:
|
||||
Même dict avec le champ 'score' ajouté/mis à jour.
|
||||
"""
|
||||
lead = dict(lead) # copie défensive
|
||||
lead["score"] = self._calculate_score(lead)
|
||||
return lead
|
||||
|
||||
def score_leads(self, leads: list[dict]) -> list[dict]:
|
||||
"""
|
||||
Score et trie une liste de leads (score décroissant).
|
||||
|
||||
Args:
|
||||
leads: liste de dicts normalisés.
|
||||
|
||||
Returns:
|
||||
Liste triée par score décroissant.
|
||||
"""
|
||||
scored = [self.score_lead(lead) for lead in leads]
|
||||
scored.sort(key=lambda l: l.get("score", 0), reverse=True)
|
||||
logger.info(
|
||||
f"score_leads terminé : {len(scored)} leads scorés. "
|
||||
f"Score max = {scored[0]['score'] if scored else 0}, "
|
||||
f"Score min = {scored[-1]['score'] if scored else 0}"
|
||||
)
|
||||
return scored
|
||||
|
||||
|
||||
# ─── CLI (debug) ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Exemple de test rapide sans appel API
|
||||
test_leads = [
|
||||
{
|
||||
"name": "Restaurant A",
|
||||
"website": "",
|
||||
"rating": 4.5,
|
||||
"reviews_count": 120,
|
||||
"phone": "+33 3 20 00 00 01",
|
||||
},
|
||||
{
|
||||
"name": "Restaurant B",
|
||||
"website": "",
|
||||
"rating": 3.8,
|
||||
"reviews_count": 30,
|
||||
"phone": "",
|
||||
},
|
||||
{
|
||||
"name": "Café C",
|
||||
"website": "",
|
||||
"rating": 2.5,
|
||||
"reviews_count": 5,
|
||||
"phone": "+33 3 20 00 00 03",
|
||||
},
|
||||
{
|
||||
"name": "Bar D avec site",
|
||||
"website": "https://bar-d.fr",
|
||||
"rating": 4.2,
|
||||
"reviews_count": 80,
|
||||
"phone": "+33 3 20 00 00 04",
|
||||
},
|
||||
]
|
||||
|
||||
scorer = LeadScorer()
|
||||
results = scorer.score_leads(test_leads)
|
||||
|
||||
print("\n=== Résultats scoring ===")
|
||||
for r in results:
|
||||
print(f" [{r['score']:2d}/8] {r['name']}")
|
||||
397
leadhunter_scraper.py
Normal file
397
leadhunter_scraper.py
Normal file
@@ -0,0 +1,397 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
H3R7Tech — LeadHunter Scraper
|
||||
================================
|
||||
Agent de scraping pour la détection de restaurants sans site web
|
||||
dans la MEL (Métropole Européenne de Lille).
|
||||
|
||||
Sources :
|
||||
- Google Places API (primary)
|
||||
- OpenStreetMap / Overpass API (fallback)
|
||||
|
||||
Quota Google Places Free Tier :
|
||||
- 28 500 requêtes/mois ≈ 950/jour
|
||||
- Compteur persistent dans /home/h3r7/leadhunter_quota.json
|
||||
|
||||
Auteur: H3R7Tech Backend Engineer
|
||||
Issue: HRT-66
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
import requests
|
||||
from datetime import date, datetime
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
# ─── Logging ────────────────────────────────────────────────────────────────
|
||||
logger = logging.getLogger("leadhunter.scraper")
|
||||
|
||||
_handler = RotatingFileHandler(
|
||||
"/home/h3r7/leadhunter.log",
|
||||
maxBytes=5 * 1024 * 1024, # 5 MB
|
||||
backupCount=3,
|
||||
)
|
||||
_handler.setFormatter(
|
||||
logging.Formatter("%(asctime)s %(levelname)-8s %(name)s — %(message)s")
|
||||
)
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
logger.addHandler(_handler)
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
|
||||
# ─── Configuration ───────────────────────────────────────────────────────────
|
||||
GOOGLE_PLACES_API_KEY = os.environ.get("GOOGLE_PLACES_API_KEY")
|
||||
|
||||
# Quota journalier Google Places Free Tier
|
||||
DAILY_QUOTA_FILE = "/home/h3r7/leadhunter_quota.json"
|
||||
DAILY_QUOTA_LIMIT = 900 # marge de sécurité vs les 950 théoriques
|
||||
|
||||
# Délai entre requêtes Places pour éviter rate-limiting
|
||||
PLACES_SLEEP_S = 0.5
|
||||
|
||||
# Bounding box MEL (Métropole Européenne de Lille)
|
||||
MEL_CENTER_LAT = 50.6292
|
||||
MEL_CENTER_LNG = 3.0573
|
||||
MEL_RADIUS_M = 20000 # 20 km autour de Lille
|
||||
|
||||
# Types de lieux ciblés
|
||||
TARGET_TYPES = ["restaurant", "cafe", "bar", "bakery", "food"]
|
||||
|
||||
# Overpass API endpoint
|
||||
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
|
||||
|
||||
# Requête Overpass MEL — bounding box directe (50.4,2.8,50.8,3.3) couvrant la MEL
|
||||
# Fix HRT-72 : la résolution area["name"=...] échoue silencieusement sur l'API Overpass publique
|
||||
OVERPASS_MEL_QUERY = """
|
||||
[out:json][timeout:60];
|
||||
(
|
||||
node["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"][!"website"](50.4,2.8,50.8,3.3);
|
||||
way["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"][!"website"](50.4,2.8,50.8,3.3);
|
||||
);
|
||||
out center 200;
|
||||
"""
|
||||
|
||||
|
||||
# ─── Quota Manager ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _load_quota() -> dict:
|
||||
"""Charge le compteur quotidien depuis le fichier JSON."""
|
||||
today = str(date.today())
|
||||
if os.path.exists(DAILY_QUOTA_FILE):
|
||||
try:
|
||||
with open(DAILY_QUOTA_FILE, "r") as f:
|
||||
data = json.load(f)
|
||||
if data.get("date") == today:
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.warning(f"Impossible de lire le fichier quota : {e}")
|
||||
return {"date": today, "count": 0}
|
||||
|
||||
|
||||
def _save_quota(data: dict) -> None:
|
||||
"""Persiste le compteur quotidien."""
|
||||
try:
|
||||
with open(DAILY_QUOTA_FILE, "w") as f:
|
||||
json.dump(data, f)
|
||||
except Exception as e:
|
||||
logger.warning(f"Impossible d'écrire le fichier quota : {e}")
|
||||
|
||||
|
||||
def _increment_quota(n: int = 1) -> int:
|
||||
"""Incrémente le compteur et retourne le total du jour."""
|
||||
quota = _load_quota()
|
||||
quota["count"] += n
|
||||
_save_quota(quota)
|
||||
return quota["count"]
|
||||
|
||||
|
||||
def _quota_remaining() -> int:
|
||||
"""Retourne le nombre de requêtes restantes pour aujourd'hui."""
|
||||
quota = _load_quota()
|
||||
return max(0, DAILY_QUOTA_LIMIT - quota["count"])
|
||||
|
||||
|
||||
# ─── Google Places Scraper ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class GooglePlacesScraper:
|
||||
"""
|
||||
Scraping via Google Places API (Nearby Search + Place Details).
|
||||
Filtre les lieux sans site web côté API.
|
||||
"""
|
||||
|
||||
BASE_URL = "https://maps.googleapis.com/maps/api/place"
|
||||
|
||||
def __init__(self):
|
||||
if not GOOGLE_PLACES_API_KEY:
|
||||
raise EnvironmentError(
|
||||
"GOOGLE_PLACES_API_KEY non définie. "
|
||||
"Ajouter dans /home/h3r7/.env et relancer."
|
||||
)
|
||||
self.api_key = GOOGLE_PLACES_API_KEY
|
||||
|
||||
def _nearby_search(self, place_type: str, page_token: str = None) -> dict:
|
||||
"""Appel Nearby Search — 1 requête comptabilisée."""
|
||||
params = {
|
||||
"key": self.api_key,
|
||||
"location": f"{MEL_CENTER_LAT},{MEL_CENTER_LNG}",
|
||||
"radius": MEL_RADIUS_M,
|
||||
"type": place_type,
|
||||
}
|
||||
if page_token:
|
||||
params["pagetoken"] = page_token
|
||||
|
||||
_increment_quota()
|
||||
time.sleep(PLACES_SLEEP_S)
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
f"{self.BASE_URL}/nearbysearch/json",
|
||||
params=params,
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
except Exception as e:
|
||||
logger.warning(f"NearbySearch error (type={place_type}): {e}")
|
||||
return {}
|
||||
|
||||
def _place_details(self, place_id: str) -> dict:
|
||||
"""Place Details pour récupérer website, phone, rating, etc. — 1 requête."""
|
||||
params = {
|
||||
"key": self.api_key,
|
||||
"place_id": place_id,
|
||||
"fields": "name,formatted_address,formatted_phone_number,website,rating,user_ratings_total",
|
||||
}
|
||||
|
||||
_increment_quota()
|
||||
time.sleep(PLACES_SLEEP_S)
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
f"{self.BASE_URL}/details/json",
|
||||
params=params,
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("result", {})
|
||||
except Exception as e:
|
||||
logger.warning(f"PlaceDetails error (place_id={place_id}): {e}")
|
||||
return {}
|
||||
|
||||
def scrape(self, max_leads: int = 50) -> list[dict]:
|
||||
"""
|
||||
Scrape les restaurants/cafés/bars MEL sans site web.
|
||||
|
||||
Retourne une liste de dicts normalisés compatibles LeadHunter CRM :
|
||||
source, name, address, phone, rating, reviews_count, website, rgpd_ok
|
||||
"""
|
||||
leads = []
|
||||
seen_ids = set()
|
||||
|
||||
for place_type in TARGET_TYPES:
|
||||
if _quota_remaining() < 10:
|
||||
logger.warning(
|
||||
"Quota journalier presque épuisé — arrêt scraping Google Places."
|
||||
)
|
||||
break
|
||||
|
||||
logger.info(f"Scraping Google Places — type={place_type}")
|
||||
page_token = None
|
||||
|
||||
while True:
|
||||
if _quota_remaining() < 5:
|
||||
logger.warning("Quota insuffisant pour continuer la pagination.")
|
||||
break
|
||||
|
||||
data = self._nearby_search(place_type, page_token)
|
||||
results = data.get("results", [])
|
||||
|
||||
for place in results:
|
||||
if len(leads) >= max_leads:
|
||||
break
|
||||
|
||||
place_id = place.get("place_id", "")
|
||||
if not place_id or place_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(place_id)
|
||||
|
||||
if _quota_remaining() < 2:
|
||||
logger.warning("Quota épuisé avant details.")
|
||||
break
|
||||
|
||||
details = self._place_details(place_id)
|
||||
|
||||
# Filtre : on ne garde que les lieux SANS site web
|
||||
if details.get("website"):
|
||||
continue
|
||||
|
||||
lead = {
|
||||
"source": "google_places",
|
||||
"name": details.get("name") or place.get("name", ""),
|
||||
"address": details.get("formatted_address")
|
||||
or place.get("vicinity", ""),
|
||||
"phone": details.get("formatted_phone_number", ""),
|
||||
"rating": details.get("rating") or place.get("rating"),
|
||||
"reviews_count": details.get("user_ratings_total")
|
||||
or place.get("user_ratings_total"),
|
||||
"website": "",
|
||||
"rgpd_ok": True, # Données publiques Google Places uniquement
|
||||
}
|
||||
leads.append(lead)
|
||||
logger.info(f"Lead trouvé (Google Places) : {lead['name']}")
|
||||
|
||||
if len(leads) >= max_leads:
|
||||
break
|
||||
|
||||
page_token = data.get("next_page_token")
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
# L'API Google Places nécessite un délai avant d'utiliser next_page_token
|
||||
time.sleep(2)
|
||||
|
||||
logger.info(f"Google Places : {len(leads)} leads collectés.")
|
||||
return leads
|
||||
|
||||
|
||||
# ─── Overpass / OSM Fallback ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
class OverpassScraper:
|
||||
"""
|
||||
Fallback OSM via Overpass API.
|
||||
Cible les nœuds/ways dans la boundary MEL sans attribut 'website'.
|
||||
Données publiques ODbL — RGPD OK.
|
||||
"""
|
||||
|
||||
def scrape(self, max_leads: int = 100) -> list[dict]:
|
||||
"""
|
||||
Scrape via Overpass API — retourne des leads normalisés.
|
||||
"""
|
||||
logger.info("Scraping Overpass OSM — boundary MEL")
|
||||
leads = []
|
||||
|
||||
try:
|
||||
resp = requests.post(
|
||||
OVERPASS_URL,
|
||||
data={"data": OVERPASS_MEL_QUERY},
|
||||
headers={
|
||||
"Content-Type": "application/x-www-form-urlencoded", # Fix HRT-72 Bug2
|
||||
"User-Agent": "H3R7Tech-LeadHunter/1.0 (contact@h3r7tech.fr)", # Fix HRT-72 Bug3: overpass-api.de blocks python-requests UA
|
||||
},
|
||||
timeout=90,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
logger.warning(f"Overpass API error : {e}")
|
||||
return []
|
||||
|
||||
elements = data.get("elements", [])
|
||||
logger.info(f"Overpass : {len(elements)} éléments bruts reçus.")
|
||||
|
||||
for el in elements[:max_leads]:
|
||||
tags = el.get("tags", {})
|
||||
|
||||
# Coordonnées (pour les ways, Overpass retourne 'center')
|
||||
lat = el.get("lat") or (el.get("center") or {}).get("lat")
|
||||
lon = el.get("lon") or (el.get("center") or {}).get("lon")
|
||||
|
||||
name = tags.get("name", "")
|
||||
if not name:
|
||||
continue # Ignorer les lieux sans nom
|
||||
|
||||
addr_parts = [
|
||||
tags.get("addr:housenumber", ""),
|
||||
tags.get("addr:street", ""),
|
||||
tags.get("addr:city", ""),
|
||||
tags.get("addr:postcode", ""),
|
||||
]
|
||||
address = " ".join(p for p in addr_parts if p).strip()
|
||||
if not address and lat and lon:
|
||||
address = f"{lat:.4f},{lon:.4f}"
|
||||
|
||||
lead = {
|
||||
"source": "osm",
|
||||
"name": name,
|
||||
"address": address,
|
||||
"phone": tags.get("phone", tags.get("contact:phone", "")),
|
||||
"rating": None,
|
||||
"reviews_count": None,
|
||||
"website": "",
|
||||
"rgpd_ok": True, # Données publiques ODbL
|
||||
}
|
||||
leads.append(lead)
|
||||
logger.info(f"Lead trouvé (OSM) : {lead['name']}")
|
||||
|
||||
logger.info(f"Overpass : {len(leads)} leads collectés.")
|
||||
return leads
|
||||
|
||||
|
||||
# ─── Orchestrateur ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def run_scraping(
|
||||
max_leads: int = 100, use_google: bool = True, use_osm: bool = True
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Lance le scraping Google Places + fallback OSM.
|
||||
|
||||
Args:
|
||||
max_leads: nombre maximum de leads à collecter au total.
|
||||
use_google: activer Google Places (nécessite GOOGLE_PLACES_API_KEY).
|
||||
use_osm: activer le fallback Overpass OSM.
|
||||
|
||||
Returns:
|
||||
Liste de leads normalisés (dédupliqués par nom + adresse).
|
||||
"""
|
||||
all_leads = []
|
||||
seen_keys = set()
|
||||
|
||||
def _dedup_key(lead: dict) -> str:
|
||||
return f"{lead['name'].lower().strip()}|{lead['address'].lower().strip()[:40]}"
|
||||
|
||||
if use_google:
|
||||
try:
|
||||
scraper = GooglePlacesScraper()
|
||||
google_leads = scraper.scrape(max_leads=max_leads)
|
||||
for lead in google_leads:
|
||||
k = _dedup_key(lead)
|
||||
if k not in seen_keys:
|
||||
seen_keys.add(k)
|
||||
all_leads.append(lead)
|
||||
except EnvironmentError as e:
|
||||
logger.warning(f"Google Places désactivé : {e}")
|
||||
use_google = False
|
||||
|
||||
remaining = max_leads - len(all_leads)
|
||||
if use_osm and remaining > 0:
|
||||
osm_leads = OverpassScraper().scrape(max_leads=remaining)
|
||||
for lead in osm_leads:
|
||||
k = _dedup_key(lead)
|
||||
if k not in seen_keys:
|
||||
seen_keys.add(k)
|
||||
all_leads.append(lead)
|
||||
|
||||
logger.info(
|
||||
f"run_scraping terminé — {len(all_leads)} leads uniques "
|
||||
f"(Google={use_google}, OSM={use_osm}). "
|
||||
f"Quota restant aujourd'hui : {_quota_remaining()}"
|
||||
)
|
||||
return all_leads
|
||||
|
||||
|
||||
# ─── CLI (debug) ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
assert GOOGLE_PLACES_API_KEY, (
|
||||
"GOOGLE_PLACES_API_KEY manquante — "
|
||||
"ajouter 'export GOOGLE_PLACES_API_KEY=xxx' dans /home/h3r7/.env"
|
||||
)
|
||||
leads = run_scraping(max_leads=10)
|
||||
for i, l in enumerate(leads, 1):
|
||||
print(f"{i:02d}. [{l['source']}] {l['name']} — {l['address']}")
|
||||
@@ -5,8 +5,11 @@ import json
|
||||
import requests
|
||||
import subprocess
|
||||
import db
|
||||
from middleware import rate_limit_middleware, access_log_middleware
|
||||
|
||||
app = Flask(__name__)
|
||||
rate_limit_middleware(app)
|
||||
access_log_middleware(app)
|
||||
|
||||
DASHBOARD_API_URL = "http://localhost:8791"
|
||||
COMBINED_API_URL = "http://localhost:8790"
|
||||
@@ -740,19 +743,29 @@ def pod_static(filename=""):
|
||||
@app.route("/turf/api/")
|
||||
@app.route("/turf/api/<path:api_path>")
|
||||
def api_proxy(api_path=""):
|
||||
if api_path.startswith("vitesse"):
|
||||
url = f"{COMBINED_API_URL}/turf/api/{api_path}"
|
||||
elif api_path.startswith("n8n-proxy"):
|
||||
url = f"{COMBINED_API_URL}/turf/api/{api_path}"
|
||||
elif api_path.startswith("backtest"):
|
||||
url = f"{COMBINED_API_URL}/turf/api/{api_path}"
|
||||
elif api_path.startswith("stats"):
|
||||
url = f"{COMBINED_API_URL}/turf/api/{api_path}"
|
||||
elif api_path.startswith("predictions_analysis"):
|
||||
url = f"{COMBINED_API_URL}/turf/api/{api_path}"
|
||||
elif api_path.startswith("parisroi"):
|
||||
url = f"{COMBINED_API_URL}/turf/api/{api_path}"
|
||||
elif api_path.startswith("paris"):
|
||||
# Routes servies par combined_api.py (port 8790) :
|
||||
# backtest, stats, paris, parisroi, races, scores, report, ask, brave-search,
|
||||
# execute-sql, send-email, vitesse, n8n-proxy, predictions_analysis, ideas
|
||||
# Fix HRT-73 : alignement complet avec turf_scraper fix #23
|
||||
COMBINED_ROUTES = (
|
||||
"backtest",
|
||||
"stats",
|
||||
"parisroi",
|
||||
"paris",
|
||||
"predictions_analysis",
|
||||
"vitesse",
|
||||
"n8n-proxy",
|
||||
"races",
|
||||
"race/",
|
||||
"scores",
|
||||
"ask",
|
||||
"brave-search",
|
||||
"execute-sql",
|
||||
"send-email",
|
||||
"report",
|
||||
"ideas",
|
||||
)
|
||||
if any(api_path.startswith(r) for r in COMBINED_ROUTES):
|
||||
url = f"{COMBINED_API_URL}/turf/api/{api_path}"
|
||||
elif api_path.startswith("scoring"):
|
||||
url = f"{DASHBOARD_API_URL}/turf/api/{api_path}"
|
||||
@@ -767,11 +780,17 @@ def api_proxy(api_path=""):
|
||||
if fwd_method in ("POST", "PUT", "PATCH")
|
||||
else None
|
||||
)
|
||||
# Forwarder Authorization header (combined_api.py exige Basic h3r7:h3r7 pour parisroi/paris)
|
||||
fwd_headers = {"Content-Type": "application/json"}
|
||||
if request.headers.get("Authorization"):
|
||||
fwd_headers["Authorization"] = request.headers.get("Authorization")
|
||||
incoming_auth = request.headers.get("Authorization")
|
||||
if incoming_auth:
|
||||
fwd_headers["Authorization"] = incoming_auth
|
||||
resp = requests.request(
|
||||
method=fwd_method, url=url, json=fwd_json, timeout=30, headers=fwd_headers
|
||||
method=fwd_method,
|
||||
url=url,
|
||||
json=fwd_json,
|
||||
timeout=30,
|
||||
headers=fwd_headers,
|
||||
)
|
||||
return resp.content, resp.status_code, {"Content-Type": "application/json"}
|
||||
except Exception as e:
|
||||
|
||||
@@ -10,3 +10,4 @@ markers =
|
||||
load: Tests de charge Locust
|
||||
security: Tests de sécurité
|
||||
smoke: Tests rapides de smoke
|
||||
integration: Tests d'intégration DB et pipeline ML
|
||||
|
||||
43
saas_auth.py
43
saas_auth.py
@@ -14,6 +14,18 @@ import time
|
||||
import json
|
||||
from functools import wraps
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
from threading import Lock
|
||||
|
||||
# ─── Rate limiting login ───────────────────────────────────────────────────────
|
||||
_login_attempts: dict = defaultdict(
|
||||
lambda: {"count": 0, "window_start": 0.0, "blocked_until": 0.0}
|
||||
)
|
||||
_login_lock = Lock()
|
||||
|
||||
LOGIN_RATE_MAX = 5 # max tentatives par fenêtre
|
||||
LOGIN_RATE_WINDOW = 300 # 5 minutes (en secondes)
|
||||
LOGIN_BLOCK_DURATION = 900 # 15 min de blocage après dépassement
|
||||
|
||||
# ─── Blacklist mots de passe faibles ─────────────────────────────────────────
|
||||
# HRT-63 — Validation mots de passe faibles
|
||||
@@ -300,6 +312,37 @@ def login():
|
||||
if not email or not password:
|
||||
return jsonify({"error": "Email et mot de passe requis."}), 400
|
||||
|
||||
# ── Rate limit par IP ────────────────────────────────────────
|
||||
ip = request.remote_addr or "unknown"
|
||||
now = time.time()
|
||||
|
||||
with _login_lock:
|
||||
bucket = _login_attempts[ip]
|
||||
# Lever le blocage si la durée est écoulée
|
||||
if now >= bucket["blocked_until"]:
|
||||
if now - bucket["window_start"] >= LOGIN_RATE_WINDOW:
|
||||
bucket["count"] = 0
|
||||
bucket["window_start"] = now
|
||||
bucket["count"] += 1
|
||||
count = bucket["count"]
|
||||
if count > LOGIN_RATE_MAX:
|
||||
bucket["blocked_until"] = now + LOGIN_BLOCK_DURATION
|
||||
retry_after = LOGIN_BLOCK_DURATION
|
||||
blocked = True
|
||||
else:
|
||||
retry_after = int(LOGIN_RATE_WINDOW - (now - bucket["window_start"]))
|
||||
blocked = False
|
||||
else:
|
||||
blocked = True
|
||||
retry_after = int(bucket["blocked_until"] - now)
|
||||
|
||||
if blocked:
|
||||
resp = jsonify({"error": "Trop de tentatives. Réessayez plus tard."})
|
||||
resp.status_code = 429
|
||||
resp.headers["Retry-After"] = str(retry_after)
|
||||
return resp
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
|
||||
pw_hash = hash_password(password)
|
||||
conn = get_db()
|
||||
user = conn.execute(
|
||||
|
||||
479
scoring_v2.py
479
scoring_v2.py
@@ -11,29 +11,34 @@ import re
|
||||
from datetime import datetime
|
||||
|
||||
DB_PATH = "/home/h3r7/turf_saas/turf_saas.db"
|
||||
HEADERS = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
|
||||
HEADERS = {"User-Agent": "Mozilla/5.0", "Accept": "application/json"}
|
||||
|
||||
|
||||
def get_cote_from_db(horse_name, date_course):
|
||||
"""Recupere la cote depuis la table predictions (plus recente et non nulle)"""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
c = conn.execute("""
|
||||
c = conn.execute(
|
||||
"""
|
||||
SELECT odds FROM predictions
|
||||
WHERE date=? AND horse_name LIKE ? AND odds > 0
|
||||
ORDER BY created_at DESC LIMIT 1
|
||||
""", (date_course, f"%{horse_name}%"))
|
||||
""",
|
||||
(date_course, f"%{horse_name}%"),
|
||||
)
|
||||
r = c.fetchone()
|
||||
conn.close()
|
||||
return r['odds'] if r else 0
|
||||
return r["odds"] if r else 0
|
||||
|
||||
|
||||
def parse_musique(musique):
|
||||
if not musique:
|
||||
return {}
|
||||
clean = re.sub(r'\(\d+\)', '', musique)
|
||||
resultats = re.findall(r'(\d+|D|0)([amphsc]?)', clean)
|
||||
clean = re.sub(r"\(\d+\)", "", musique)
|
||||
resultats = re.findall(r"(\d+|D|0)([amphsc]?)", clean)
|
||||
positions = []
|
||||
for pos, disc in resultats[:10]:
|
||||
positions.append(99 if pos == 'D' else int(pos))
|
||||
positions.append(99 if pos == "D" else int(pos))
|
||||
if not positions:
|
||||
return {}
|
||||
nb_courses = len(positions)
|
||||
@@ -41,222 +46,385 @@ def parse_musique(musique):
|
||||
nb_places = sum(1 for p in positions if 1 <= p <= 3)
|
||||
recentes = [p for p in positions[:3] if p != 99]
|
||||
forme_recente = sum(recentes) / len(recentes) if recentes else 99
|
||||
tendance = (sum(positions[-4:]) / 4 - sum(positions[:4]) / 4) if len(positions) >= 4 else 0
|
||||
tendance = (
|
||||
(sum(positions[-4:]) / 4 - sum(positions[:4]) / 4) if len(positions) >= 4 else 0
|
||||
)
|
||||
return {
|
||||
'forme_recente': round(forme_recente, 1),
|
||||
'tendance': round(tendance, 1),
|
||||
'tx_victoire': round(nb_victoires / nb_courses * 100, 1) if nb_courses else 0,
|
||||
'tx_place': round(nb_places / nb_courses * 100, 1) if nb_courses else 0,
|
||||
"forme_recente": round(forme_recente, 1),
|
||||
"tendance": round(tendance, 1),
|
||||
"tx_victoire": round(nb_victoires / nb_courses * 100, 1) if nb_courses else 0,
|
||||
"tx_place": round(nb_places / nb_courses * 100, 1) if nb_courses else 0,
|
||||
}
|
||||
|
||||
def score_cheval_v2(p, all_participants, today):
|
||||
|
||||
def get_terrain_condition(penetrometre_intitule: str | None) -> str:
|
||||
"""Normalise le pénétromètre PMU en condition terrain standardisée."""
|
||||
if not penetrometre_intitule:
|
||||
return "inconnu"
|
||||
val = penetrometre_intitule.upper()
|
||||
if any(k in val for k in ("TRES BON", "TRÈS BON", "FERME", "FIRM")):
|
||||
return "bon"
|
||||
if any(k in val for k in ("BON", "GOOD", "STANDARD")):
|
||||
return "bon"
|
||||
if any(k in val for k in ("SOUPLE", "YIELDING", "COLLANT")):
|
||||
return "souple"
|
||||
if any(k in val for k in ("LOURD", "HEAVY", "TRES SOUPLE", "TRÈS SOUPLE")):
|
||||
return "lourd"
|
||||
if any(k in val for k in ("SOFT", "MOU")):
|
||||
return "souple"
|
||||
return "inconnu"
|
||||
|
||||
|
||||
def compute_weather_impact(weather_data: dict | None, terrain_condition: str) -> float:
|
||||
"""
|
||||
Calcule un score d'impact météo/terrain sur [−5, +5].
|
||||
weather_data keys attendues : nebulositecode, temperature, force_vent
|
||||
terrain_condition : 'bon' | 'souple' | 'lourd' | 'inconnu'
|
||||
Retourne un delta de score ML (positif = favorable, négatif = défavorable).
|
||||
"""
|
||||
if not weather_data:
|
||||
return 0.0
|
||||
|
||||
delta = 0.0
|
||||
|
||||
# Terrain
|
||||
if terrain_condition == "lourd":
|
||||
delta -= 3.0
|
||||
elif terrain_condition == "souple":
|
||||
delta -= 1.5
|
||||
elif terrain_condition == "bon":
|
||||
delta += 1.0
|
||||
# inconnu → 0
|
||||
|
||||
# Vent
|
||||
force_vent = weather_data.get("force_vent") or 0
|
||||
try:
|
||||
force_vent = float(force_vent)
|
||||
except (TypeError, ValueError):
|
||||
force_vent = 0.0
|
||||
if force_vent >= 50:
|
||||
delta -= 2.0
|
||||
elif force_vent >= 30:
|
||||
delta -= 1.0
|
||||
|
||||
# Températures extrêmes
|
||||
temperature = weather_data.get("temperature")
|
||||
try:
|
||||
temperature = float(temperature) if temperature is not None else None
|
||||
except (TypeError, ValueError):
|
||||
temperature = None
|
||||
if temperature is not None:
|
||||
if temperature <= 0:
|
||||
delta -= 1.0
|
||||
elif temperature >= 35:
|
||||
delta -= 1.0
|
||||
|
||||
return round(max(-5.0, min(5.0, delta)), 2)
|
||||
|
||||
|
||||
def score_cheval_v2(p, all_participants, today, weather_data=None):
|
||||
"""
|
||||
Score un cheval pour le modèle V2.
|
||||
weather_data (optionnel) : dict issu de pmu_meteo pour cette réunion.
|
||||
Backward-compatible : weather_data=None → comportement identique à avant HRT-83.
|
||||
"""
|
||||
score = 0
|
||||
details = {}
|
||||
|
||||
|
||||
# 1. COTE - Essaye PMU API, sinon DB
|
||||
horse_name = p.get('nom', '')
|
||||
horse_name = p.get("nom", "")
|
||||
cote = 0
|
||||
|
||||
|
||||
# Essayer d'abord depuis l'API PMU
|
||||
rapport = p.get('dernierRapportDirect', {})
|
||||
rapport = p.get("dernierRapportDirect", {})
|
||||
if rapport:
|
||||
cote = rapport.get('rapport', 0)
|
||||
cote = rapport.get("rapport", 0)
|
||||
if not cote:
|
||||
rapport_ref = p.get('dernierRapportReference', {})
|
||||
cote = rapport_ref.get('rapport', 0) if rapport_ref else 0
|
||||
|
||||
rapport_ref = p.get("dernierRapportReference", {})
|
||||
cote = rapport_ref.get("rapport", 0) if rapport_ref else 0
|
||||
|
||||
# Fallback: aller chercher dans la DB
|
||||
if not cote or cote == 0:
|
||||
cote = get_cote_from_db(horse_name, today)
|
||||
|
||||
|
||||
# Si toujours pas de cote, utiliser 99 comme valeur par defaut
|
||||
if not cote or cote == 0:
|
||||
cote = 99.0
|
||||
|
||||
|
||||
score_cote = max(2, min(10, 20 / (1 + cote * 0.15))) if cote > 0 else 2
|
||||
score += score_cote
|
||||
details['cote'] = round(cote, 1)
|
||||
details['score_cote'] = round(score_cote, 1)
|
||||
|
||||
details["cote"] = round(cote, 1)
|
||||
details["score_cote"] = round(score_cote, 1)
|
||||
|
||||
# 2. FORME - AUGMENTE a 30 pts
|
||||
musique_stats = parse_musique(p.get('musique', ''))
|
||||
forme = musique_stats.get('forme_recente', 99)
|
||||
score_forme = 30 if forme <= 1 else 25 if forme <= 2 else 20 if forme <= 3 else 15 if forme <= 5 else 8 if forme <= 8 else 0
|
||||
musique_stats = parse_musique(p.get("musique", ""))
|
||||
forme = musique_stats.get("forme_recente", 99)
|
||||
score_forme = (
|
||||
30
|
||||
if forme <= 1
|
||||
else 25
|
||||
if forme <= 2
|
||||
else 20
|
||||
if forme <= 3
|
||||
else 15
|
||||
if forme <= 5
|
||||
else 8
|
||||
if forme <= 8
|
||||
else 0
|
||||
)
|
||||
score += score_forme
|
||||
details['forme_recente'] = forme
|
||||
details['score_forme'] = score_forme
|
||||
|
||||
details["forme_recente"] = forme
|
||||
details["score_forme"] = score_forme
|
||||
|
||||
# 3. TAUX VICTOIRE (15 pts)
|
||||
nb_courses_total = p.get('nombreCourses', 0)
|
||||
nb_victoires_total = p.get('nombreVictoires', 0)
|
||||
nb_courses_total = p.get("nombreCourses", 0)
|
||||
nb_victoires_total = p.get("nombreVictoires", 0)
|
||||
tx_vic = (nb_victoires_total / nb_courses_total * 100) if nb_courses_total else 0
|
||||
score_vic = min(15, tx_vic * 0.5)
|
||||
score += score_vic
|
||||
details['tx_victoire'] = round(tx_vic, 1)
|
||||
details['score_victoire'] = round(score_vic, 1)
|
||||
|
||||
details["tx_victoire"] = round(tx_vic, 1)
|
||||
details["score_victoire"] = round(score_vic, 1)
|
||||
|
||||
# 4. TAUX PLACE (15 pts)
|
||||
nb_places_total = p.get('nombrePlaces', 0)
|
||||
nb_places_total = p.get("nombrePlaces", 0)
|
||||
tx_place = (nb_places_total / nb_courses_total * 100) if nb_courses_total else 0
|
||||
score_place = min(15, tx_place * 0.2)
|
||||
score += score_place
|
||||
details['tx_place'] = round(tx_place, 1)
|
||||
details['score_place'] = round(score_place, 1)
|
||||
|
||||
details["tx_place"] = round(tx_place, 1)
|
||||
details["score_place"] = round(score_place, 1)
|
||||
|
||||
# 5. REDUCTION KM (10 pts)
|
||||
rk = p.get('reductionKilometrique', 0)
|
||||
all_rk = [x.get('reductionKilometrique', 0) for x in all_participants if x.get('reductionKilometrique', 0) > 0]
|
||||
rk = p.get("reductionKilometrique", 0)
|
||||
all_rk = [
|
||||
x.get("reductionKilometrique", 0)
|
||||
for x in all_participants
|
||||
if x.get("reductionKilometrique", 0) > 0
|
||||
]
|
||||
if rk > 0 and all_rk:
|
||||
score_rk = 10 * (1 - (rk - min(all_rk)) / (max(all_rk) - min(all_rk))) if max(all_rk) > min(all_rk) else 5
|
||||
score_rk = (
|
||||
10 * (1 - (rk - min(all_rk)) / (max(all_rk) - min(all_rk)))
|
||||
if max(all_rk) > min(all_rk)
|
||||
else 5
|
||||
)
|
||||
else:
|
||||
score_rk = 0
|
||||
score += score_rk
|
||||
details['rk'] = rk
|
||||
details['score_rk'] = round(score_rk, 1)
|
||||
|
||||
details["rk"] = rk
|
||||
details["score_rk"] = round(score_rk, 1)
|
||||
|
||||
# 6. TENDANCE (10 pts)
|
||||
tendance = musique_stats.get('tendance', 0)
|
||||
tendance = musique_stats.get("tendance", 0)
|
||||
score_tendance = min(10, max(0, 5 + tendance))
|
||||
score += score_tendance
|
||||
details['tendance'] = tendance
|
||||
details['score_tendance'] = round(score_tendance, 1)
|
||||
|
||||
details["tendance"] = tendance
|
||||
details["score_tendance"] = round(score_tendance, 1)
|
||||
|
||||
# 7. AVIS ENTRAINEUR (5 pts)
|
||||
avis = p.get('avisEntraineur', 'NEUTRE')
|
||||
score_avis = {'POSITIF': 5, 'TRES_POSITIF': 5, 'NEUTRE': 2, 'NEGATIF': 0, 'TRES_NEGATIF': 0}.get(avis, 2)
|
||||
avis = p.get("avisEntraineur", "NEUTRE")
|
||||
score_avis = {
|
||||
"POSITIF": 5,
|
||||
"TRES_POSITIF": 5,
|
||||
"NEUTRE": 2,
|
||||
"NEGATIF": 0,
|
||||
"TRES_NEGATIF": 0,
|
||||
}.get(avis, 2)
|
||||
score += score_avis
|
||||
details['avis_entraineur'] = avis
|
||||
details['score_avis'] = score_avis
|
||||
|
||||
details["avis_entraineur"] = avis
|
||||
details["score_avis"] = score_avis
|
||||
|
||||
# 8. BONUS OUTSIDER (5 pts)
|
||||
bonus_outsider = 5 if forme <= 3 and cote >= 10 else 0
|
||||
score += bonus_outsider
|
||||
details['bonus_outsider'] = bonus_outsider
|
||||
|
||||
details["bonus_outsider"] = bonus_outsider
|
||||
|
||||
# Driver change penalty
|
||||
if p.get('driverChange', False):
|
||||
if p.get("driverChange", False):
|
||||
score -= 3
|
||||
details['driver_change'] = True
|
||||
|
||||
details['score_total'] = round(score, 1)
|
||||
details['musique'] = p.get('musique', '')
|
||||
details['nb_victoires'] = nb_victoires_total
|
||||
details['nb_places'] = nb_places_total
|
||||
details['nb_courses'] = nb_courses_total
|
||||
|
||||
details["driver_change"] = True
|
||||
|
||||
# 9. METEO & TERRAIN (HRT-83) — premium feature, weather_data=None → skip
|
||||
penetrometre = p.get("penetrometre_intitule", "") or ""
|
||||
terrain_condition = (
|
||||
get_terrain_condition(penetrometre) if penetrometre else "inconnu"
|
||||
)
|
||||
weather_impact = 0.0
|
||||
if weather_data is not None:
|
||||
weather_impact = compute_weather_impact(weather_data, terrain_condition)
|
||||
score += weather_impact
|
||||
details["terrain_condition"] = terrain_condition
|
||||
details["weather_impact"] = weather_impact
|
||||
|
||||
details["score_total"] = round(score, 1)
|
||||
details["musique"] = p.get("musique", "")
|
||||
details["nb_victoires"] = nb_victoires_total
|
||||
details["nb_places"] = nb_places_total
|
||||
details["nb_courses"] = nb_courses_total
|
||||
|
||||
return round(score, 1), details
|
||||
|
||||
|
||||
def get_ze2sur4_combinaisons(top4):
|
||||
combinaisons = []
|
||||
for i in range(4):
|
||||
for j in range(i+1, 4):
|
||||
for j in range(i + 1, 4):
|
||||
c1 = top4[i]
|
||||
c2 = top4[j]
|
||||
combinaisons.append({
|
||||
'cheval1': c1['nom'],
|
||||
'numero1': c1['numero'],
|
||||
'cheval2': c2['nom'],
|
||||
'numero2': c2['numero'],
|
||||
'mise': 1.0,
|
||||
})
|
||||
combinaisons.append(
|
||||
{
|
||||
"cheval1": c1["nom"],
|
||||
"numero1": c1["numero"],
|
||||
"cheval2": c2["nom"],
|
||||
"numero2": c2["numero"],
|
||||
"mise": 1.0,
|
||||
}
|
||||
)
|
||||
return combinaisons
|
||||
|
||||
|
||||
def build_recommendations_v2(scored_horses):
|
||||
ranked = sorted(scored_horses, key=lambda x: x['score'], reverse=True)
|
||||
ranked = sorted(scored_horses, key=lambda x: x["score"], reverse=True)
|
||||
if len(ranked) < 4:
|
||||
return None
|
||||
|
||||
|
||||
top1, top2, top3, top4 = ranked[0], ranked[1], ranked[2], ranked[3]
|
||||
top4_list = ranked[:4]
|
||||
|
||||
|
||||
def confiance(s):
|
||||
return "FORTE" if s >= 55 else "BONNE" if s >= 45 else "MOYENNE" if s >= 35 else "FAIBLE"
|
||||
|
||||
return (
|
||||
"FORTE"
|
||||
if s >= 55
|
||||
else "BONNE"
|
||||
if s >= 45
|
||||
else "MOYENNE"
|
||||
if s >= 35
|
||||
else "FAIBLE"
|
||||
)
|
||||
|
||||
ze2_combinaisons = get_ze2sur4_combinaisons(top4_list)
|
||||
mise_ze2 = len(ze2_combinaisons) * 1.0
|
||||
|
||||
|
||||
return {
|
||||
'simple_gagnant': {
|
||||
'cheval': top1['nom'], 'numero': top1['numero'], 'cote': top1['details']['cote'],
|
||||
'score': top1['score'], 'confiance': confiance(top1['score']),
|
||||
'mise_suggeree': 2.0, 'gain_potentiel': round(2.0 * top1['details']['cote'], 2)
|
||||
"simple_gagnant": {
|
||||
"cheval": top1["nom"],
|
||||
"numero": top1["numero"],
|
||||
"cote": top1["details"]["cote"],
|
||||
"score": top1["score"],
|
||||
"confiance": confiance(top1["score"]),
|
||||
"mise_suggeree": 2.0,
|
||||
"gain_potentiel": round(2.0 * top1["details"]["cote"], 2),
|
||||
},
|
||||
'ze2_sur_4': {
|
||||
'top4': [{'nom': h['nom'], 'numero': h['numero']} for h in top4_list],
|
||||
'combinaisons': ze2_combinaisons,
|
||||
'mise_totale': mise_ze2,
|
||||
'nb_combinaisons': len(ze2_combinaisons),
|
||||
'confiance': confiance((top1['score'] + top2['score'] + top3['score'] + top4['score']) / 4),
|
||||
'explication': 'Jouer les 6 combinaisons de 2 chevaux parmi les 4 premiers'
|
||||
"ze2_sur_4": {
|
||||
"top4": [{"nom": h["nom"], "numero": h["numero"]} for h in top4_list],
|
||||
"combinaisons": ze2_combinaisons,
|
||||
"mise_totale": mise_ze2,
|
||||
"nb_combinaisons": len(ze2_combinaisons),
|
||||
"confiance": confiance(
|
||||
(top1["score"] + top2["score"] + top3["score"] + top4["score"]) / 4
|
||||
),
|
||||
"explication": "Jouer les 6 combinaisons de 2 chevaux parmi les 4 premiers",
|
||||
},
|
||||
'outsider': _find_outsider(ranked),
|
||||
'budget_total': 2.0 + mise_ze2,
|
||||
"outsider": _find_outsider(ranked),
|
||||
"budget_total": 2.0 + mise_ze2,
|
||||
}
|
||||
|
||||
|
||||
def _find_outsider(ranked):
|
||||
for h in ranked[3:7]:
|
||||
d = h['details']
|
||||
if d['cote'] >= 12 and d['forme_recente'] <= 4 and d['bonus_outsider'] == 5:
|
||||
d = h["details"]
|
||||
if d["cote"] >= 12 and d["forme_recente"] <= 4 and d["bonus_outsider"] == 5:
|
||||
return {
|
||||
'cheval': h['nom'], 'numero': h['numero'], 'cote': d['cote'],
|
||||
'mise_suggeree': 1.0, 'gain_potentiel': round(1.0 * d['cote'], 2)
|
||||
"cheval": h["nom"],
|
||||
"numero": h["numero"],
|
||||
"cote": d["cote"],
|
||||
"mise_suggeree": 1.0,
|
||||
"gain_potentiel": round(1.0 * d["cote"], 2),
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def save_to_db(scored_horses, date_course, hippodrome, libelle):
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
|
||||
cursor.execute("DELETE FROM scoring WHERE date = ?", (date_course,))
|
||||
|
||||
|
||||
for i, h in enumerate(scored_horses, 1):
|
||||
d = h['details']
|
||||
cursor.execute("""
|
||||
d = h["details"]
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO scoring (date, race_name, horse_number, horse_name, score,
|
||||
score_cote, score_forme, score_victoire, score_place, score_rk,
|
||||
score_tendance, score_avis, cote, forme_recente, tx_victoire, tx_place,
|
||||
avis_entraineur, musique, rang_scoring, scoring_version)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'v2')
|
||||
""", (date_course, libelle, h['numero'], h['nom'], h['score'],
|
||||
d.get('score_cote', 0), d.get('score_forme', 0), d.get('score_victoire', 0),
|
||||
d.get('score_place', 0), d.get('score_rk', 0), d.get('score_tendance', 0),
|
||||
d.get('score_avis', 0), d.get('cote', 0), d.get('forme_recente', 0),
|
||||
d.get('tx_victoire', 0), d.get('tx_place', 0), d.get('avis_entraineur', ''),
|
||||
d.get('musique', ''), i))
|
||||
|
||||
""",
|
||||
(
|
||||
date_course,
|
||||
libelle,
|
||||
h["numero"],
|
||||
h["nom"],
|
||||
h["score"],
|
||||
d.get("score_cote", 0),
|
||||
d.get("score_forme", 0),
|
||||
d.get("score_victoire", 0),
|
||||
d.get("score_place", 0),
|
||||
d.get("score_rk", 0),
|
||||
d.get("score_tendance", 0),
|
||||
d.get("score_avis", 0),
|
||||
d.get("cote", 0),
|
||||
d.get("forme_recente", 0),
|
||||
d.get("tx_victoire", 0),
|
||||
d.get("tx_place", 0),
|
||||
d.get("avis_entraineur", ""),
|
||||
d.get("musique", ""),
|
||||
i,
|
||||
),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(f"💾 {len(scored_horses)} scores enregistres en BDD pour {date_course}")
|
||||
|
||||
|
||||
def main():
|
||||
today = datetime.now().strftime('%Y-%m-%d')
|
||||
date_pmu = datetime.now().strftime('%d%m%Y')
|
||||
print(f"=== SCORING V2 - ZE2 SUR4 OPTIMISE === {datetime.now().strftime('%d/%m/%Y %H:%M')} ===")
|
||||
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
date_pmu = datetime.now().strftime("%d%m%Y")
|
||||
print(
|
||||
f"=== SCORING V2 - ZE2 SUR4 OPTIMISE === {datetime.now().strftime('%d/%m/%Y %H:%M')} ==="
|
||||
)
|
||||
|
||||
try:
|
||||
url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_pmu}/reunions"
|
||||
r = requests.get(url, headers=HEADERS, timeout=15)
|
||||
reunions = r.json().get('programme', {}).get('reunions', [])
|
||||
reunions = r.json().get("programme", {}).get("reunions", [])
|
||||
except Exception as e:
|
||||
print(f"Erreur: {e}")
|
||||
return
|
||||
|
||||
|
||||
quinte = None
|
||||
for reunion in reunions:
|
||||
for course in reunion.get('courses', []):
|
||||
for course in reunion.get("courses", []):
|
||||
paris_types = [p["typePari"] for p in course.get("paris", [])]
|
||||
if any("QUINTE" in p for p in paris_types) or "PARIS-TURF" in course.get('libelle', ''):
|
||||
quinte = (reunion['numOfficiel'], course['numOrdre'], course.get('libelle', ''),
|
||||
reunion['hippodrome']['libelleCourt'], course.get('heureDepart', 0))
|
||||
if any("QUINTE" in p for p in paris_types) or "PARIS-TURF" in course.get(
|
||||
"libelle", ""
|
||||
):
|
||||
quinte = (
|
||||
reunion["numOfficiel"],
|
||||
course["numOrdre"],
|
||||
course.get("libelle", ""),
|
||||
reunion["hippodrome"]["libelleCourt"],
|
||||
course.get("heureDepart", 0),
|
||||
)
|
||||
break
|
||||
if quinte:
|
||||
break
|
||||
|
||||
|
||||
if not quinte:
|
||||
# Fallback: utiliser la premiere reunion francaise avec predictions
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
r = conn.execute("""
|
||||
r = conn.execute(
|
||||
"""
|
||||
SELECT r.num_reunion, r.hippodrome_court, c.num_course, c.libelle
|
||||
FROM pmu_courses c
|
||||
JOIN pmu_reunions r ON r.date_programme=c.date_programme AND r.num_reunion=c.num_reunion
|
||||
@@ -264,57 +432,82 @@ def main():
|
||||
AND EXISTS (SELECT 1 FROM predictions p WHERE p.date=? AND p.source='canalturf_partants'
|
||||
AND p.race_name LIKE '%' || c.libelle || '%')
|
||||
ORDER BY c.heure_depart_str ASC LIMIT 1
|
||||
""", (today, today)).fetchone()
|
||||
""",
|
||||
(today, today),
|
||||
).fetchone()
|
||||
conn.close()
|
||||
if r:
|
||||
quinte = (r['num_reunion'], r['num_course'], r['libelle'], r['hippodrome_court'], 0)
|
||||
quinte = (
|
||||
r["num_reunion"],
|
||||
r["num_course"],
|
||||
r["libelle"],
|
||||
r["hippodrome_court"],
|
||||
0,
|
||||
)
|
||||
else:
|
||||
print("Aucune course trouvee")
|
||||
return
|
||||
|
||||
|
||||
num_r, num_c, libelle, hippodrome, heure_ts = quinte
|
||||
heure = datetime.fromtimestamp(heure_ts/1000).strftime('%H:%M') if heure_ts else '13:55'
|
||||
heure = (
|
||||
datetime.fromtimestamp(heure_ts / 1000).strftime("%H:%M")
|
||||
if heure_ts
|
||||
else "13:55"
|
||||
)
|
||||
print(f"Course: {libelle} - {hippodrome} {heure}")
|
||||
|
||||
|
||||
try:
|
||||
url = f"https://turfinfo.api.pmu.fr/rest/client/1/programme/{date_pmu}/R{num_r}/C{num_c}/participants"
|
||||
r = requests.get(url, headers=HEADERS, timeout=15)
|
||||
participants = [p for p in r.json().get('participants', []) if p.get('statut') == 'PARTANT']
|
||||
participants = [
|
||||
p for p in r.json().get("participants", []) if p.get("statut") == "PARTANT"
|
||||
]
|
||||
except Exception as e:
|
||||
print(f"Erreur: {e}")
|
||||
return
|
||||
|
||||
|
||||
scored_horses = []
|
||||
for p in participants:
|
||||
score, details = score_cheval_v2(p, participants, today)
|
||||
scored_horses.append({'nom': p['nom'], 'numero': p['numPmu'], 'score': score, 'details': details})
|
||||
|
||||
ranked = sorted(scored_horses, key=lambda x: x['score'], reverse=True)
|
||||
scored_horses.append(
|
||||
{"nom": p["nom"], "numero": p["numPmu"], "score": score, "details": details}
|
||||
)
|
||||
|
||||
ranked = sorted(scored_horses, key=lambda x: x["score"], reverse=True)
|
||||
print(f"\n=== TOP 4 ===")
|
||||
for i, h in enumerate(ranked[:4], 1):
|
||||
d = h['details']
|
||||
print(f"{i}. #{h['numero']:>2} {h['nom']:<20} Score:{h['score']:.1f} Cote:{d['cote']:.1f}")
|
||||
|
||||
d = h["details"]
|
||||
print(
|
||||
f"{i}. #{h['numero']:>2} {h['nom']:<20} Score:{h['score']:.1f} Cote:{d['cote']:.1f}"
|
||||
)
|
||||
|
||||
save_to_db(ranked, today, hippodrome, libelle)
|
||||
|
||||
|
||||
reco = build_recommendations_v2(scored_horses)
|
||||
if reco:
|
||||
print(f"\n=== RECOMMANDATIONS ===")
|
||||
sg = reco['simple_gagnant']
|
||||
sg = reco["simple_gagnant"]
|
||||
print(f"\n🎯 SIMPLE GAGNANT:")
|
||||
print(f" #{sg['numero']} {sg['cheval']} @ {sg['cote']}/1 (mise {sg['mise_suggeree']}EUR)")
|
||||
|
||||
ze2 = reco['ze2_sur_4']
|
||||
print(
|
||||
f" #{sg['numero']} {sg['cheval']} @ {sg['cote']}/1 (mise {sg['mise_suggeree']}EUR)"
|
||||
)
|
||||
|
||||
ze2 = reco["ze2_sur_4"]
|
||||
print(f"\n🎰 ZE 2 SUR 4 (TOP 4: {', '.join([h['nom'] for h in ze2['top4']])}")
|
||||
print(f" Mise totale: {ze2['mise_totale']}EUR ({ze2['nb_combinaisons']} combis x 1EUR)")
|
||||
print(
|
||||
f" Mise totale: {ze2['mise_totale']}EUR ({ze2['nb_combinaisons']} combis x 1EUR)"
|
||||
)
|
||||
print(f" Confiance: {ze2['confiance']}")
|
||||
print(f" Combinaisons:")
|
||||
for c in ze2['combinaisons']:
|
||||
print(f" {c['numero1']}-{c['cheval1']} + {c['numero2']}-{c['cheval2']}")
|
||||
|
||||
for c in ze2["combinaisons"]:
|
||||
print(
|
||||
f" {c['numero1']}-{c['cheval1']} + {c['numero2']}-{c['cheval2']}"
|
||||
)
|
||||
|
||||
print(f"\n💰 BUDGET TOTAL: {reco['budget_total']}EUR")
|
||||
print(f" - Simple Gagnant: 2EUR")
|
||||
print(f" - ZE 2 sur 4: {ze2['mise_totale']}EUR")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -141,7 +141,7 @@ class TestJWTAuthentication:
|
||||
"invalid_signature_here"
|
||||
)
|
||||
resp = requests.get(
|
||||
f"{BASE_URL}/api/races",
|
||||
f"{BASE_URL}/api/v1/predictions/today",
|
||||
headers={"Authorization": f"Bearer {expired_token}"},
|
||||
timeout=5,
|
||||
)
|
||||
@@ -153,7 +153,7 @@ class TestJWTAuthentication:
|
||||
"""Un token JWT malformé doit être rejeté."""
|
||||
for bad_token in ["not.a.jwt", "Bearer", "null", "undefined", ""]:
|
||||
resp = requests.get(
|
||||
f"{BASE_URL}/api/races",
|
||||
f"{BASE_URL}/api/v1/predictions/today",
|
||||
headers={"Authorization": f"Bearer {bad_token}"},
|
||||
timeout=5,
|
||||
)
|
||||
@@ -163,7 +163,7 @@ class TestJWTAuthentication:
|
||||
|
||||
def test_jwt_sans_token(self):
|
||||
"""Sans token, les routes protégées doivent retourner 401."""
|
||||
resp = requests.get(f"{BASE_URL}/api/export/csv", timeout=5)
|
||||
resp = requests.get(f"{BASE_URL}/api/v1/export/csv", timeout=5)
|
||||
assert resp.status_code in (401, 403), (
|
||||
f"Route protégée accessible sans token: status={resp.status_code}"
|
||||
)
|
||||
@@ -386,6 +386,53 @@ class TestWeakPasswordRejection:
|
||||
assert resp.status_code == 400, (
|
||||
f"Mot de passe sans lettre accepté: status={resp.status_code}"
|
||||
)
|
||||
# === Tests rate limiting login ===
|
||||
|
||||
|
||||
class TestLoginRateLimit:
|
||||
"""Tests rate limiting sur /api/v1/auth/login."""
|
||||
|
||||
TARGET_URL = (
|
||||
os.environ.get("APP_URL", "http://localhost:8792") + "/api/v1/auth/login"
|
||||
)
|
||||
|
||||
def test_login_brute_force_blocked_after_5_attempts(self):
|
||||
"""Après 5 tentatives, le 6ème appel doit retourner 429."""
|
||||
# Utiliser un email unique pour isoler le test
|
||||
email = f"ratelimit_test_{int(time.time())}@h3r7.tech"
|
||||
for i in range(5):
|
||||
resp = requests.post(
|
||||
self.TARGET_URL,
|
||||
json={"email": email, "password": "wrong_password"},
|
||||
timeout=5,
|
||||
)
|
||||
assert resp.status_code in (400, 401), (
|
||||
f"Tentative {i + 1}: status inattendu {resp.status_code}"
|
||||
)
|
||||
# La 6ème tentative doit être bloquée
|
||||
resp = requests.post(
|
||||
self.TARGET_URL,
|
||||
json={"email": email, "password": "wrong_password"},
|
||||
timeout=5,
|
||||
)
|
||||
assert resp.status_code == 429, (
|
||||
f"Rate limit non appliqué après 5 tentatives: got {resp.status_code}"
|
||||
)
|
||||
assert "Retry-After" in resp.headers, "Header Retry-After manquant sur 429"
|
||||
|
||||
def test_login_429_has_retry_after_header(self):
|
||||
"""La réponse 429 doit inclure Retry-After."""
|
||||
email = f"ratelimit_test2_{int(time.time())}@h3r7.tech"
|
||||
for _ in range(6):
|
||||
requests.post(
|
||||
self.TARGET_URL, json={"email": email, "password": "x"}, timeout=5
|
||||
)
|
||||
resp = requests.post(
|
||||
self.TARGET_URL, json={"email": email, "password": "x"}, timeout=5
|
||||
)
|
||||
if resp.status_code == 429:
|
||||
assert "Retry-After" in resp.headers
|
||||
assert int(resp.headers["Retry-After"]) > 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
300
tests/test_ml_cache_integrity.py
Normal file
300
tests/test_ml_cache_integrity.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""
|
||||
test_ml_cache_integrity.py — Test d'intégration : zéro NULL dans ml_predictions_cache
|
||||
SaaS Turf Prédictions IA
|
||||
Ticket: HRT-43 (suite au fix HRT-41 — métadonnées manquantes dans le cache ML)
|
||||
|
||||
Ces tests vérifient que la table ml_predictions_cache ne contient aucune ligne
|
||||
avec des métadonnées NULL (hippodrome, race_label, heure) pour la date courante,
|
||||
après le job ML de 19h30.
|
||||
|
||||
Usage:
|
||||
pytest tests/test_ml_cache_integrity.py -v -m integration
|
||||
pytest tests/test_ml_cache_integrity.py -v -m integration --date 2026-04-26
|
||||
|
||||
Variables d'environnement:
|
||||
TURF_DB_PATH : chemin vers turf.db (défaut: /home/h3r7/turf_scraper/turf.db)
|
||||
TEST_DATE : date cible au format YYYY-MM-DD (défaut: date du jour)
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
import pytest
|
||||
from datetime import date, datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Configuration
|
||||
# ============================================================
|
||||
|
||||
DEFAULT_DB_PATH = "/home/h3r7/turf_scraper/turf.db"
|
||||
DB_PATH = os.environ.get("TURF_DB_PATH", DEFAULT_DB_PATH)
|
||||
|
||||
|
||||
def _get_test_date() -> str:
|
||||
"""Retourne la date cible pour les tests (env TEST_DATE ou date du jour)."""
|
||||
env_date = os.environ.get("TEST_DATE", "")
|
||||
if env_date:
|
||||
try:
|
||||
datetime.strptime(env_date, "%Y-%m-%d")
|
||||
return env_date
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
f"TEST_DATE invalide : '{env_date}'. Format attendu : YYYY-MM-DD"
|
||||
)
|
||||
return date.today().isoformat()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Fixture : connexion DB en lecture seule
|
||||
# ============================================================
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def db_connection():
|
||||
"""
|
||||
Connexion SQLite en mode lecture seule (uri=True + ?mode=ro).
|
||||
Garantit qu'aucune modification accidentelle de la DB de prod n'est possible.
|
||||
"""
|
||||
db_path = Path(DB_PATH)
|
||||
if not db_path.exists():
|
||||
pytest.skip(
|
||||
f"Base de données introuvable : {DB_PATH}. "
|
||||
"Définir TURF_DB_PATH ou vérifier le chemin."
|
||||
)
|
||||
|
||||
uri = f"file:{db_path.as_posix()}?mode=ro"
|
||||
conn = sqlite3.connect(uri, uri=True)
|
||||
conn.row_factory = sqlite3.Row
|
||||
yield conn
|
||||
conn.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def target_date():
|
||||
"""Date cible pour les tests (date du jour ou TEST_DATE)."""
|
||||
return _get_test_date()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Tests d'intégration
|
||||
# ============================================================
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
class TestMlCacheNullIntegrity:
|
||||
"""
|
||||
Vérifie qu'après le job ML de 19h30, la table ml_predictions_cache
|
||||
ne contient aucune métadonnée NULL pour la date courante.
|
||||
|
||||
Régression testée : HRT-41 (Fix #17 — métadonnées manquantes dans le cache ML)
|
||||
"""
|
||||
|
||||
def test_table_exists(self, db_connection):
|
||||
"""Vérifie que la table ml_predictions_cache existe dans la DB."""
|
||||
cursor = db_connection.execute(
|
||||
"SELECT name FROM sqlite_master "
|
||||
"WHERE type='table' AND name='ml_predictions_cache'"
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
assert row is not None, (
|
||||
"La table ml_predictions_cache est introuvable dans la base de données. "
|
||||
"Vérifier que le job ML a bien créé la table."
|
||||
)
|
||||
|
||||
def test_rows_exist_for_today(self, db_connection, target_date):
|
||||
"""
|
||||
Vérifie que des prédictions existent pour la date cible.
|
||||
|
||||
Ce test passe en skip si aucune ligne n'existe (ex: avant le job 19h30).
|
||||
Il échoue uniquement si le job a manifestement tourné mais a laissé 0 lignes.
|
||||
"""
|
||||
cursor = db_connection.execute(
|
||||
"SELECT COUNT(*) as cnt FROM ml_predictions_cache WHERE date = ?",
|
||||
(target_date,),
|
||||
)
|
||||
count = cursor.fetchone()["cnt"]
|
||||
|
||||
if count == 0:
|
||||
pytest.skip(
|
||||
f"Aucune prédiction en cache pour le {target_date}. "
|
||||
"Ce test doit être exécuté après le job ML de 19h30."
|
||||
)
|
||||
|
||||
def test_zero_null_hippodrome_today(self, db_connection, target_date):
|
||||
"""
|
||||
CRITÈRE D'ACCEPTATION PRINCIPAL (HRT-43) :
|
||||
Vérifie que COUNT(*) WHERE date = today AND hippodrome IS NULL = 0.
|
||||
|
||||
Régression directe du bug HRT-41 : le champ hippodrome était NULL
|
||||
pour toutes les prédictions du cache ML.
|
||||
"""
|
||||
# Vérifier si des données existent avant de tester les NULLs
|
||||
cursor_total = db_connection.execute(
|
||||
"SELECT COUNT(*) as cnt FROM ml_predictions_cache WHERE date = ?",
|
||||
(target_date,),
|
||||
)
|
||||
total = cursor_total.fetchone()["cnt"]
|
||||
if total == 0:
|
||||
pytest.skip(
|
||||
f"Aucune prédiction en cache pour le {target_date}. "
|
||||
"Lancer ce test après le job ML de 19h30."
|
||||
)
|
||||
|
||||
cursor = db_connection.execute(
|
||||
"SELECT COUNT(*) as cnt FROM ml_predictions_cache "
|
||||
"WHERE date = ? AND hippodrome IS NULL",
|
||||
(target_date,),
|
||||
)
|
||||
null_count = cursor.fetchone()["cnt"]
|
||||
|
||||
assert null_count == 0, (
|
||||
f"RÉGRESSION HRT-41 DÉTECTÉE : {null_count} ligne(s) avec hippodrome IS NULL "
|
||||
f"dans ml_predictions_cache pour le {target_date}. "
|
||||
"Le patch de métadonnées n'a pas été appliqué correctement."
|
||||
)
|
||||
|
||||
def test_zero_null_race_label_today(self, db_connection, target_date):
|
||||
"""
|
||||
Vérifie que COUNT(*) WHERE date = today AND race_label IS NULL = 0.
|
||||
|
||||
Complément du test hippodrome : vérifie que le libellé de course
|
||||
est bien renseigné pour toutes les prédictions.
|
||||
"""
|
||||
cursor_total = db_connection.execute(
|
||||
"SELECT COUNT(*) as cnt FROM ml_predictions_cache WHERE date = ?",
|
||||
(target_date,),
|
||||
)
|
||||
total = cursor_total.fetchone()["cnt"]
|
||||
if total == 0:
|
||||
pytest.skip(
|
||||
f"Aucune prédiction en cache pour le {target_date}. "
|
||||
"Lancer ce test après le job ML de 19h30."
|
||||
)
|
||||
|
||||
cursor = db_connection.execute(
|
||||
"SELECT COUNT(*) as cnt FROM ml_predictions_cache "
|
||||
"WHERE date = ? AND race_label IS NULL",
|
||||
(target_date,),
|
||||
)
|
||||
null_count = cursor.fetchone()["cnt"]
|
||||
|
||||
assert null_count == 0, (
|
||||
f"ANOMALIE : {null_count} ligne(s) avec race_label IS NULL "
|
||||
f"dans ml_predictions_cache pour le {target_date}. "
|
||||
"Vérifier le pipeline de patch de métadonnées."
|
||||
)
|
||||
|
||||
def test_zero_null_heure_today(self, db_connection, target_date):
|
||||
"""
|
||||
Vérifie que COUNT(*) WHERE date = today AND heure IS NULL = 0.
|
||||
|
||||
Vérifie que l'heure de course est bien renseignée pour toutes les prédictions.
|
||||
"""
|
||||
cursor_total = db_connection.execute(
|
||||
"SELECT COUNT(*) as cnt FROM ml_predictions_cache WHERE date = ?",
|
||||
(target_date,),
|
||||
)
|
||||
total = cursor_total.fetchone()["cnt"]
|
||||
if total == 0:
|
||||
pytest.skip(
|
||||
f"Aucune prédiction en cache pour le {target_date}. "
|
||||
"Lancer ce test après le job ML de 19h30."
|
||||
)
|
||||
|
||||
cursor = db_connection.execute(
|
||||
"SELECT COUNT(*) as cnt FROM ml_predictions_cache "
|
||||
"WHERE date = ? AND heure IS NULL",
|
||||
(target_date,),
|
||||
)
|
||||
null_count = cursor.fetchone()["cnt"]
|
||||
|
||||
assert null_count == 0, (
|
||||
f"ANOMALIE : {null_count} ligne(s) avec heure IS NULL "
|
||||
f"dans ml_predictions_cache pour le {target_date}. "
|
||||
"Vérifier le pipeline de patch de métadonnées."
|
||||
)
|
||||
|
||||
def test_full_metadata_coverage_today(self, db_connection, target_date):
|
||||
"""
|
||||
Test de couverture globale : aucune des trois colonnes critiques
|
||||
(hippodrome, race_label, heure) n'est NULL pour une même ligne.
|
||||
|
||||
Retourne les 5 premières lignes problématiques pour faciliter le débogage.
|
||||
"""
|
||||
cursor_total = db_connection.execute(
|
||||
"SELECT COUNT(*) as cnt FROM ml_predictions_cache WHERE date = ?",
|
||||
(target_date,),
|
||||
)
|
||||
total = cursor_total.fetchone()["cnt"]
|
||||
if total == 0:
|
||||
pytest.skip(
|
||||
f"Aucune prédiction en cache pour le {target_date}. "
|
||||
"Lancer ce test après le job ML de 19h30."
|
||||
)
|
||||
|
||||
cursor = db_connection.execute(
|
||||
"SELECT id, num_reunion, num_course, horse_name, hippodrome, race_label, heure "
|
||||
"FROM ml_predictions_cache "
|
||||
"WHERE date = ? "
|
||||
"AND (hippodrome IS NULL OR race_label IS NULL OR heure IS NULL) "
|
||||
"LIMIT 5",
|
||||
(target_date,),
|
||||
)
|
||||
bad_rows = cursor.fetchall()
|
||||
|
||||
assert len(bad_rows) == 0, (
|
||||
f"ANOMALIE : {len(bad_rows)} ligne(s) avec au moins une métadonnée NULL "
|
||||
f"(hippodrome, race_label ou heure) pour le {target_date}.\n"
|
||||
"Exemples de lignes affectées :\n"
|
||||
+ "\n".join(
|
||||
f" - id={r['id']} R{r['num_reunion']}C{r['num_course']} "
|
||||
f"{r['horse_name']} | hippodrome={r['hippodrome']!r} "
|
||||
f"race_label={r['race_label']!r} heure={r['heure']!r}"
|
||||
for r in bad_rows
|
||||
)
|
||||
)
|
||||
|
||||
def test_metadata_completeness_summary(self, db_connection, target_date):
|
||||
"""
|
||||
Résumé diagnostique : affiche les statistiques de complétude des métadonnées
|
||||
pour la date cible. Toujours en mode informatif (pas de assertion stricte),
|
||||
utile pour le monitoring et les logs CI.
|
||||
"""
|
||||
cursor = db_connection.execute(
|
||||
"""
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
SUM(CASE WHEN hippodrome IS NULL THEN 1 ELSE 0 END) as null_hippodrome,
|
||||
SUM(CASE WHEN race_label IS NULL THEN 1 ELSE 0 END) as null_race_label,
|
||||
SUM(CASE WHEN heure IS NULL THEN 1 ELSE 0 END) as null_heure,
|
||||
COUNT(DISTINCT hippodrome) as distinct_hippodromes,
|
||||
COUNT(DISTINCT race_label) as distinct_race_labels
|
||||
FROM ml_predictions_cache
|
||||
WHERE date = ?
|
||||
""",
|
||||
(target_date,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
total = row["total"]
|
||||
|
||||
if total == 0:
|
||||
pytest.skip(
|
||||
f"Aucune prédiction en cache pour le {target_date}. "
|
||||
"Lancer ce test après le job ML de 19h30."
|
||||
)
|
||||
|
||||
# Afficher les statistiques (visibles avec pytest -v -s)
|
||||
print(f"\n=== Statistiques ml_predictions_cache pour le {target_date} ===")
|
||||
print(f" Total lignes : {total}")
|
||||
print(f" NULL hippodrome : {row['null_hippodrome']}")
|
||||
print(f" NULL race_label : {row['null_race_label']}")
|
||||
print(f" NULL heure : {row['null_heure']}")
|
||||
print(f" Hippodromes distincts: {row['distinct_hippodromes']}")
|
||||
print(f" Race labels distincts: {row['distinct_race_labels']}")
|
||||
|
||||
# L'assertion ici reste stricte pour hippodrome (bug HRT-41 critique)
|
||||
assert row["null_hippodrome"] == 0, (
|
||||
f"RÉGRESSION HRT-41 : {row['null_hippodrome']}/{total} lignes "
|
||||
f"avec hippodrome IS NULL pour le {target_date}."
|
||||
)
|
||||
Reference in New Issue
Block a user