Compare commits
14 Commits
feature/HR
...
feature/HR
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
86e85aa1c6 | ||
| 5aa6013c52 | |||
|
|
4b4323f707 | ||
|
|
356bdf5bec | ||
|
|
f9a45e6deb | ||
|
|
cfc0f038f9 | ||
|
|
c999285895 | ||
|
|
e517741c97 | ||
| 837a0845ec | |||
|
|
4bf458f1b8 | ||
|
|
099286b078 | ||
|
|
d39c7d3319 | ||
|
|
8c5fdf1e9c | ||
|
|
7f5573f076 |
@@ -24,9 +24,9 @@ import os
|
|||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
import stripe
|
import stripe
|
||||||
from flask import Blueprint, g, jsonify, request
|
from flask import Blueprint, jsonify, request
|
||||||
|
|
||||||
from auth import jwt_required_middleware
|
from saas_auth import require_auth as jwt_required_middleware
|
||||||
from billing_db import get_db, migrate_billing_tables
|
from billing_db import get_db, migrate_billing_tables
|
||||||
|
|
||||||
logger = logging.getLogger("turf_saas.billing")
|
logger = logging.getLogger("turf_saas.billing")
|
||||||
@@ -73,18 +73,18 @@ def _sget(obj, key, default=None):
|
|||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
def _get_active_subscription(db, user_id: int):
|
def _get_active_subscription(db, user_id):
|
||||||
"""Return the most recent active subscription row for a user."""
|
"""Return the most recent active subscription row for a user."""
|
||||||
return db.execute(
|
return db.execute(
|
||||||
"""SELECT * FROM subscriptions
|
"""SELECT * FROM saas_subscriptions
|
||||||
WHERE user_id = ?
|
WHERE user_id = ?
|
||||||
ORDER BY start_date DESC
|
ORDER BY start_date DESC
|
||||||
LIMIT 1""",
|
LIMIT 1""",
|
||||||
(user_id,),
|
(str(user_id),),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
|
|
||||||
|
|
||||||
def _upsert_subscription(db, user_id: int, **fields):
|
def _upsert_subscription(db, user_id, **fields):
|
||||||
"""
|
"""
|
||||||
Update existing subscription or insert a new one.
|
Update existing subscription or insert a new one.
|
||||||
fields: plan, stripe_customer_id, stripe_subscription_id,
|
fields: plan, stripe_customer_id, stripe_subscription_id,
|
||||||
@@ -95,19 +95,19 @@ def _upsert_subscription(db, user_id: int, **fields):
|
|||||||
# Build SET clause dynamically from provided fields
|
# Build SET clause dynamically from provided fields
|
||||||
set_parts = ", ".join(f"{k} = ?" for k in fields)
|
set_parts = ", ".join(f"{k} = ?" for k in fields)
|
||||||
values = list(fields.values()) + [existing["id"]]
|
values = list(fields.values()) + [existing["id"]]
|
||||||
db.execute(f"UPDATE subscriptions SET {set_parts} WHERE id = ?", values)
|
db.execute(f"UPDATE saas_subscriptions SET {set_parts} WHERE id = ?", values)
|
||||||
else:
|
else:
|
||||||
cols = ", ".join(["user_id"] + list(fields.keys()))
|
cols = ", ".join(["user_id"] + list(fields.keys()))
|
||||||
placeholders = ", ".join(["?"] * (1 + len(fields)))
|
placeholders = ", ".join(["?"] * (1 + len(fields)))
|
||||||
values = [user_id] + list(fields.values())
|
values = [str(user_id)] + list(fields.values())
|
||||||
db.execute(
|
db.execute(
|
||||||
f"INSERT INTO subscriptions ({cols}) VALUES ({placeholders})", values
|
f"INSERT INTO saas_subscriptions ({cols}) VALUES ({placeholders})", values
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _update_user_plan(db, user_id: int, plan: str):
|
def _update_user_plan(db, user_id, plan: str):
|
||||||
"""Sync users.plan field to match active subscription."""
|
"""Sync saas_users.plan field to match active subscription."""
|
||||||
db.execute("UPDATE users SET plan = ? WHERE id = ?", (plan, user_id))
|
db.execute("UPDATE saas_users SET plan = ? WHERE id = ?", (plan, str(user_id)))
|
||||||
|
|
||||||
|
|
||||||
def _get_or_create_stripe_customer(user, db) -> str:
|
def _get_or_create_stripe_customer(user, db) -> str:
|
||||||
@@ -198,7 +198,7 @@ def create_checkout():
|
|||||||
if not price_id:
|
if not price_id:
|
||||||
return jsonify({"error": f"Prix Stripe non configuré pour le plan {plan}"}), 503
|
return jsonify({"error": f"Prix Stripe non configuré pour le plan {plan}"}), 503
|
||||||
|
|
||||||
user = g.current_user
|
user = request.current_user
|
||||||
if user["plan"] == plan:
|
if user["plan"] == plan:
|
||||||
return jsonify({"error": f"Vous êtes déjà sur le plan {plan}"}), 400
|
return jsonify({"error": f"Vous êtes déjà sur le plan {plan}"}), 400
|
||||||
|
|
||||||
@@ -263,7 +263,7 @@ def create_portal():
|
|||||||
if not stripe.api_key:
|
if not stripe.api_key:
|
||||||
return jsonify({"error": "Stripe non configuré"}), 503
|
return jsonify({"error": "Stripe non configuré"}), 503
|
||||||
|
|
||||||
user = g.current_user
|
user = request.current_user
|
||||||
db = get_db()
|
db = get_db()
|
||||||
try:
|
try:
|
||||||
sub = _get_active_subscription(db, user["id"])
|
sub = _get_active_subscription(db, user["id"])
|
||||||
@@ -309,7 +309,7 @@ def billing_status():
|
|||||||
200:
|
200:
|
||||||
description: Subscription status
|
description: Subscription status
|
||||||
"""
|
"""
|
||||||
user = g.current_user
|
user = request.current_user
|
||||||
db = get_db()
|
db = get_db()
|
||||||
try:
|
try:
|
||||||
sub = _get_active_subscription(db, user["id"])
|
sub = _get_active_subscription(db, user["id"])
|
||||||
@@ -428,7 +428,7 @@ def stripe_webhook():
|
|||||||
def _resolve_user_from_customer(db, customer_id: str):
|
def _resolve_user_from_customer(db, customer_id: str):
|
||||||
"""Look up user_id via subscriptions.stripe_customer_id."""
|
"""Look up user_id via subscriptions.stripe_customer_id."""
|
||||||
row = db.execute(
|
row = db.execute(
|
||||||
"SELECT user_id FROM subscriptions WHERE stripe_customer_id = ? LIMIT 1",
|
"SELECT user_id FROM saas_subscriptions WHERE stripe_customer_id = ? LIMIT 1",
|
||||||
(customer_id,),
|
(customer_id,),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
if row:
|
if row:
|
||||||
@@ -465,7 +465,7 @@ def _handle_checkout_completed(db, event):
|
|||||||
user_id = _sget(metadata, "user_id")
|
user_id = _sget(metadata, "user_id")
|
||||||
|
|
||||||
if user_id:
|
if user_id:
|
||||||
user_id = int(user_id)
|
user_id = str(user_id)
|
||||||
else:
|
else:
|
||||||
user_id = _resolve_user_from_customer(db, customer_id)
|
user_id = _resolve_user_from_customer(db, customer_id)
|
||||||
|
|
||||||
@@ -531,7 +531,7 @@ def _handle_subscription_updated(db, event):
|
|||||||
meta = _sget(sub_obj, "metadata") or {}
|
meta = _sget(sub_obj, "metadata") or {}
|
||||||
meta_uid = _sget(meta, "user_id")
|
meta_uid = _sget(meta, "user_id")
|
||||||
if meta_uid:
|
if meta_uid:
|
||||||
user_id = int(meta_uid)
|
user_id = str(meta_uid)
|
||||||
|
|
||||||
if not user_id:
|
if not user_id:
|
||||||
logger.error(
|
logger.error(
|
||||||
@@ -565,7 +565,7 @@ def _handle_subscription_deleted(db, event):
|
|||||||
meta = _sget(sub_obj, "metadata") or {}
|
meta = _sget(sub_obj, "metadata") or {}
|
||||||
meta_uid = _sget(meta, "user_id")
|
meta_uid = _sget(meta, "user_id")
|
||||||
if meta_uid:
|
if meta_uid:
|
||||||
user_id = int(meta_uid)
|
user_id = str(meta_uid)
|
||||||
|
|
||||||
if not user_id:
|
if not user_id:
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|||||||
@@ -76,14 +76,30 @@ def migrate_billing_tables():
|
|||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
stripe_event_id TEXT NOT NULL UNIQUE,
|
stripe_event_id TEXT NOT NULL UNIQUE,
|
||||||
event_type TEXT NOT NULL,
|
event_type TEXT NOT NULL,
|
||||||
user_id INTEGER REFERENCES users(id),
|
user_id TEXT,
|
||||||
payload TEXT,
|
payload TEXT,
|
||||||
processed_at DATETIME NOT NULL DEFAULT (datetime('now'))
|
processed_at DATETIME NOT NULL DEFAULT (datetime('now'))
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_billing_events_user ON billing_events(user_id);
|
CREATE TABLE IF NOT EXISTS saas_subscriptions (
|
||||||
CREATE INDEX IF NOT EXISTS idx_billing_events_type ON billing_events(event_type);
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
CREATE INDEX IF NOT EXISTS idx_subscriptions_stripe ON subscriptions(stripe_subscription_id);
|
user_id TEXT NOT NULL,
|
||||||
|
plan TEXT NOT NULL DEFAULT 'free',
|
||||||
|
start_date DATETIME DEFAULT (datetime('now')),
|
||||||
|
end_date DATETIME,
|
||||||
|
stripe_customer_id TEXT,
|
||||||
|
stripe_subscription_id TEXT,
|
||||||
|
status TEXT NOT NULL DEFAULT 'active',
|
||||||
|
grace_period_end DATETIME,
|
||||||
|
current_period_end DATETIME
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_billing_events_user ON billing_events(user_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_billing_events_type ON billing_events(event_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_saas_subs_user ON saas_subscriptions(user_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_saas_subs_customer ON saas_subscriptions(stripe_customer_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_saas_subs_stripe ON saas_subscriptions(stripe_subscription_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_subscriptions_stripe ON subscriptions(stripe_subscription_id);
|
||||||
CREATE INDEX IF NOT EXISTS idx_subscriptions_customer ON subscriptions(stripe_customer_id);
|
CREATE INDEX IF NOT EXISTS idx_subscriptions_customer ON subscriptions(stripe_customer_id);
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|||||||
21
infra/turf-saas-leadhunter.service
Normal file
21
infra/turf-saas-leadhunter.service
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=H3R7Tech LeadHunter API (Port 8775)
|
||||||
|
Documentation=https://portal-kolifee.duckdns.org
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=h3r7
|
||||||
|
WorkingDirectory=/home/h3r7/turf_saas
|
||||||
|
|
||||||
|
# Charger les variables d'environnement depuis /home/h3r7/.env
|
||||||
|
# (notamment GOOGLE_PLACES_API_KEY)
|
||||||
|
EnvironmentFile=/home/h3r7/.env
|
||||||
|
|
||||||
|
ExecStart=/home/h3r7/turf_saas/venv/bin/python3 /home/h3r7/turf_saas/leadhunter_api.py
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10
|
||||||
|
Environment=PYTHONPATH=/home/h3r7/turf_saas
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
303
leadhunter_api.py
Normal file
303
leadhunter_api.py
Normal file
@@ -0,0 +1,303 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
H3R7Tech — LeadHunter API
|
||||||
|
===========================
|
||||||
|
Service Flask sur port 8775 exposant les endpoints LeadHunter.
|
||||||
|
|
||||||
|
Endpoints :
|
||||||
|
GET /api/leads — Liste les leads (filtres: status, limit, offset)
|
||||||
|
POST /api/leads/scrape — Lance un job de scraping asynchrone
|
||||||
|
GET /api/leads/stats — Statistiques globales du CRM
|
||||||
|
GET /api/leads/export — Export CSV des leads
|
||||||
|
PATCH /api/leads/<id>/status — Met à jour le statut d'un lead
|
||||||
|
|
||||||
|
Port : 8775 (8769 occupé par depenses_trello/app.py, 8770 occupé par turf_scraper/crm_api.py — corrigé HRT-66)
|
||||||
|
|
||||||
|
Auteur: H3R7Tech Backend Engineer
|
||||||
|
Issue: HRT-66
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import logging
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
from flask import Flask, jsonify, request, Response
|
||||||
|
from flask_cors import CORS
|
||||||
|
|
||||||
|
# Import des modules LeadHunter
|
||||||
|
from leadhunter_crm import (
|
||||||
|
init_db,
|
||||||
|
insert_leads,
|
||||||
|
get_leads,
|
||||||
|
get_lead_by_id,
|
||||||
|
update_lead_status,
|
||||||
|
get_stats,
|
||||||
|
export_csv,
|
||||||
|
VALID_STATUSES,
|
||||||
|
DB_PATH,
|
||||||
|
)
|
||||||
|
from leadhunter_scraper import run_scraping, GOOGLE_PLACES_API_KEY
|
||||||
|
from leadhunter_scorer import LeadScorer
|
||||||
|
|
||||||
|
# ─── Assertions au démarrage ─────────────────────────────────────────────────
|
||||||
|
# Vérification obligatoire : la clé API doit être présente au démarrage
|
||||||
|
assert os.environ.get("GOOGLE_PLACES_API_KEY"), (
|
||||||
|
"GOOGLE_PLACES_API_KEY manquante. "
|
||||||
|
"Ajouter dans /home/h3r7/.env : export GOOGLE_PLACES_API_KEY=xxx"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ─── Logging ────────────────────────────────────────────────────────────────
|
||||||
|
logger = logging.getLogger("leadhunter.api")
|
||||||
|
|
||||||
|
_handler = RotatingFileHandler(
|
||||||
|
"/home/h3r7/leadhunter.log",
|
||||||
|
maxBytes=5 * 1024 * 1024,
|
||||||
|
backupCount=3,
|
||||||
|
)
|
||||||
|
_handler.setFormatter(
|
||||||
|
logging.Formatter("%(asctime)s %(levelname)-8s %(name)s — %(message)s")
|
||||||
|
)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
if not logger.handlers:
|
||||||
|
logger.addHandler(_handler)
|
||||||
|
logger.addHandler(logging.StreamHandler())
|
||||||
|
|
||||||
|
# ─── App Flask ───────────────────────────────────────────────────────────────
|
||||||
|
app = Flask(__name__)
|
||||||
|
CORS(app)
|
||||||
|
|
||||||
|
# Scorer singleton
|
||||||
|
scorer = LeadScorer()
|
||||||
|
|
||||||
|
# État global du job de scraping (simple flag — pas de celery nécessaire pour le POC)
|
||||||
|
_scrape_job = {
|
||||||
|
"running": False,
|
||||||
|
"last_run": None,
|
||||||
|
"last_count": 0,
|
||||||
|
"last_error": None,
|
||||||
|
}
|
||||||
|
_scrape_lock = threading.Lock()
|
||||||
|
|
||||||
|
# ─── Init DB ─────────────────────────────────────────────────────────────────
|
||||||
|
init_db(DB_PATH)
|
||||||
|
logger.info("LeadHunter API démarrée — DB initialisée.")
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _run_scrape_job(max_leads: int, use_google: bool, use_osm: bool) -> None:
|
||||||
|
"""Job de scraping exécuté dans un thread séparé."""
|
||||||
|
with _scrape_lock:
|
||||||
|
_scrape_job["running"] = True
|
||||||
|
_scrape_job["last_error"] = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
leads_raw = run_scraping(
|
||||||
|
max_leads=max_leads,
|
||||||
|
use_google=use_google,
|
||||||
|
use_osm=use_osm,
|
||||||
|
)
|
||||||
|
leads_scored = scorer.score_leads(leads_raw)
|
||||||
|
inserted_ids = insert_leads(leads_scored)
|
||||||
|
|
||||||
|
with _scrape_lock:
|
||||||
|
_scrape_job["last_count"] = len(inserted_ids)
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
_scrape_job["last_run"] = datetime.utcnow().isoformat() + "Z"
|
||||||
|
|
||||||
|
logger.info(f"Scrape job terminé : {len(inserted_ids)} leads insérés.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Scrape job erreur : {e}")
|
||||||
|
with _scrape_lock:
|
||||||
|
_scrape_job["last_error"] = str(e)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
with _scrape_lock:
|
||||||
|
_scrape_job["running"] = False
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Routes ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/leads", methods=["GET"])
|
||||||
|
def api_get_leads():
|
||||||
|
"""
|
||||||
|
Liste les leads du CRM.
|
||||||
|
|
||||||
|
Query params :
|
||||||
|
- status (str, optional) : filtre sur new/contacted/closed/rejected
|
||||||
|
- limit (int, default=50) : pagination
|
||||||
|
- offset (int, default=0) : pagination
|
||||||
|
"""
|
||||||
|
status = request.args.get("status")
|
||||||
|
try:
|
||||||
|
limit = int(request.args.get("limit", 50))
|
||||||
|
offset = int(request.args.get("offset", 0))
|
||||||
|
except ValueError:
|
||||||
|
return jsonify({"error": "limit et offset doivent être des entiers"}), 400
|
||||||
|
|
||||||
|
if status and status not in VALID_STATUSES:
|
||||||
|
return jsonify(
|
||||||
|
{"error": f"status invalide. Valeurs acceptées : {VALID_STATUSES}"}
|
||||||
|
), 400
|
||||||
|
|
||||||
|
leads = get_leads(status=status, limit=limit, offset=offset)
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"leads": leads,
|
||||||
|
"count": len(leads),
|
||||||
|
"limit": limit,
|
||||||
|
"offset": offset,
|
||||||
|
"status_filter": status,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/leads/scrape", methods=["POST"])
|
||||||
|
def api_scrape():
|
||||||
|
"""
|
||||||
|
Lance un job de scraping asynchrone.
|
||||||
|
|
||||||
|
Body JSON (optionnel) :
|
||||||
|
- max_leads (int, default=100)
|
||||||
|
- use_google (bool, default=true)
|
||||||
|
- use_osm (bool, default=true)
|
||||||
|
|
||||||
|
Retourne immédiatement avec le statut du job.
|
||||||
|
"""
|
||||||
|
with _scrape_lock:
|
||||||
|
if _scrape_job["running"]:
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"status": "already_running",
|
||||||
|
"message": "Un job de scraping est déjà en cours.",
|
||||||
|
}
|
||||||
|
), 409
|
||||||
|
|
||||||
|
body = request.get_json(silent=True) or {}
|
||||||
|
max_leads = int(body.get("max_leads", 100))
|
||||||
|
use_google = bool(body.get("use_google", True))
|
||||||
|
use_osm = bool(body.get("use_osm", True))
|
||||||
|
|
||||||
|
thread = threading.Thread(
|
||||||
|
target=_run_scrape_job,
|
||||||
|
args=(max_leads, use_google, use_osm),
|
||||||
|
daemon=True,
|
||||||
|
)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Job de scraping lancé (max_leads={max_leads}, "
|
||||||
|
f"use_google={use_google}, use_osm={use_osm})"
|
||||||
|
)
|
||||||
|
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"status": "started",
|
||||||
|
"message": "Job de scraping démarré en arrière-plan.",
|
||||||
|
"params": {
|
||||||
|
"max_leads": max_leads,
|
||||||
|
"use_google": use_google,
|
||||||
|
"use_osm": use_osm,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
), 202
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/leads/scrape/status", methods=["GET"])
|
||||||
|
def api_scrape_status():
|
||||||
|
"""Retourne l'état courant du job de scraping."""
|
||||||
|
with _scrape_lock:
|
||||||
|
return jsonify(dict(_scrape_job))
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/leads/stats", methods=["GET"])
|
||||||
|
def api_stats():
|
||||||
|
"""
|
||||||
|
Statistiques globales du CRM LeadHunter.
|
||||||
|
|
||||||
|
Retourne : total, by_status, by_source, avg_score, top_leads_count
|
||||||
|
"""
|
||||||
|
stats = get_stats()
|
||||||
|
if not stats:
|
||||||
|
return jsonify({"error": "Impossible de calculer les statistiques"}), 500
|
||||||
|
return jsonify(stats)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/leads/export", methods=["GET"])
|
||||||
|
def api_export():
|
||||||
|
"""
|
||||||
|
Export CSV de tous les leads (ou filtrés par status).
|
||||||
|
|
||||||
|
Query params :
|
||||||
|
- status (str, optional)
|
||||||
|
"""
|
||||||
|
status = request.args.get("status")
|
||||||
|
if status and status not in VALID_STATUSES:
|
||||||
|
return jsonify({"error": f"status invalide : {VALID_STATUSES}"}), 400
|
||||||
|
|
||||||
|
csv_content = export_csv(status=status)
|
||||||
|
filename = f"leadhunter_leads{'_' + status if status else ''}.csv"
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
csv_content,
|
||||||
|
mimetype="text/csv",
|
||||||
|
headers={
|
||||||
|
"Content-Disposition": f"attachment; filename={filename}",
|
||||||
|
"Content-Type": "text/csv; charset=utf-8",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/leads/<int:lead_id>/status", methods=["PATCH"])
|
||||||
|
def api_update_status(lead_id: int):
|
||||||
|
"""
|
||||||
|
Met à jour le statut d'un lead.
|
||||||
|
|
||||||
|
Body JSON :
|
||||||
|
- status (str) : new | contacted | closed | rejected
|
||||||
|
"""
|
||||||
|
body = request.get_json(silent=True)
|
||||||
|
if not body or "status" not in body:
|
||||||
|
return jsonify({"error": "Body JSON requis avec le champ 'status'"}), 400
|
||||||
|
|
||||||
|
new_status = body["status"]
|
||||||
|
if new_status not in VALID_STATUSES:
|
||||||
|
return jsonify({"error": f"status invalide. Valeurs : {VALID_STATUSES}"}), 400
|
||||||
|
|
||||||
|
lead = get_lead_by_id(lead_id)
|
||||||
|
if not lead:
|
||||||
|
return jsonify({"error": f"Lead id={lead_id} introuvable"}), 404
|
||||||
|
|
||||||
|
success = update_lead_status(lead_id, new_status)
|
||||||
|
if not success:
|
||||||
|
return jsonify({"error": "Mise à jour échouée"}), 500
|
||||||
|
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"success": True,
|
||||||
|
"lead_id": lead_id,
|
||||||
|
"new_status": new_status,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/health", methods=["GET"])
|
||||||
|
def health():
|
||||||
|
"""Healthcheck pour systemd / monitoring."""
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"service": "leadhunter-api",
|
||||||
|
"port": 8775,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Entrypoint ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run(host="0.0.0.0", port=8775, debug=False)
|
||||||
349
leadhunter_crm.py
Normal file
349
leadhunter_crm.py
Normal file
@@ -0,0 +1,349 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
H3R7Tech — LeadHunter CRM (SQLite)
|
||||||
|
=====================================
|
||||||
|
Couche de persistance SQLite pour les leads LeadHunter.
|
||||||
|
|
||||||
|
Schéma validé CTO (HRT-66) :
|
||||||
|
CREATE TABLE leads (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
source TEXT NOT NULL, -- 'google_places' ou 'osm'
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
address TEXT,
|
||||||
|
phone TEXT,
|
||||||
|
rating REAL,
|
||||||
|
reviews_count INTEGER,
|
||||||
|
website TEXT,
|
||||||
|
score INTEGER,
|
||||||
|
rgpd_ok BOOLEAN DEFAULT 1,
|
||||||
|
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
status TEXT DEFAULT 'new' -- new, contacted, closed, rejected
|
||||||
|
);
|
||||||
|
|
||||||
|
Auteur: H3R7Tech Backend Engineer
|
||||||
|
Issue: HRT-66
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import logging
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from datetime import datetime
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# ─── Logging ────────────────────────────────────────────────────────────────
|
||||||
|
logger = logging.getLogger("leadhunter.crm")
|
||||||
|
|
||||||
|
_handler = RotatingFileHandler(
|
||||||
|
"/home/h3r7/leadhunter.log",
|
||||||
|
maxBytes=5 * 1024 * 1024,
|
||||||
|
backupCount=3,
|
||||||
|
)
|
||||||
|
_handler.setFormatter(
|
||||||
|
logging.Formatter("%(asctime)s %(levelname)-8s %(name)s — %(message)s")
|
||||||
|
)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
if not logger.handlers:
|
||||||
|
logger.addHandler(_handler)
|
||||||
|
logger.addHandler(logging.StreamHandler())
|
||||||
|
|
||||||
|
# ─── Chemin DB ───────────────────────────────────────────────────────────────
|
||||||
|
DB_PATH = "/home/h3r7/leadhunter.db"
|
||||||
|
|
||||||
|
# Statuts valides pour un lead
|
||||||
|
VALID_STATUSES = {"new", "contacted", "closed", "rejected"}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Initialisation ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def init_db(db_path: str = DB_PATH) -> None:
|
||||||
|
"""
|
||||||
|
Crée la base SQLite et la table leads si elle n'existe pas.
|
||||||
|
Idempotent — peut être appelé au démarrage de l'API.
|
||||||
|
"""
|
||||||
|
with sqlite3.connect(db_path) as conn:
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS leads (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
source TEXT NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
address TEXT,
|
||||||
|
phone TEXT,
|
||||||
|
rating REAL,
|
||||||
|
reviews_count INTEGER,
|
||||||
|
website TEXT,
|
||||||
|
score INTEGER,
|
||||||
|
rgpd_ok BOOLEAN DEFAULT 1,
|
||||||
|
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
status TEXT DEFAULT 'new'
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
logger.info(f"DB initialisée : {db_path}")
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Context manager ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _get_conn(db_path: str = DB_PATH):
|
||||||
|
"""Fournit une connexion SQLite avec row_factory."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
conn.commit()
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
logger.warning(f"DB transaction rollback : {e}")
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── CRUD ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def insert_lead(lead: dict, db_path: str = DB_PATH) -> Optional[int]:
|
||||||
|
"""
|
||||||
|
Insère un lead normalisé dans la DB.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lead: dict avec les champs normalisés (source, name, address, ...)
|
||||||
|
db_path: chemin vers la DB SQLite.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
L'id SQLite du lead inséré, ou None en cas d'erreur.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with _get_conn(db_path) as conn:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO leads
|
||||||
|
(source, name, address, phone, rating, reviews_count,
|
||||||
|
website, score, rgpd_ok, status)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
lead.get("source", "unknown"),
|
||||||
|
lead.get("name", ""),
|
||||||
|
lead.get("address", ""),
|
||||||
|
lead.get("phone", ""),
|
||||||
|
lead.get("rating"),
|
||||||
|
lead.get("reviews_count"),
|
||||||
|
lead.get("website", ""),
|
||||||
|
lead.get("score"),
|
||||||
|
1 if lead.get("rgpd_ok", True) else 0,
|
||||||
|
lead.get("status", "new"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
lead_id = cursor.lastrowid
|
||||||
|
logger.info(f"Lead inséré id={lead_id} : {lead.get('name')}")
|
||||||
|
return lead_id
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"insert_lead error : {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def insert_leads(leads: list[dict], db_path: str = DB_PATH) -> list[int]:
|
||||||
|
"""
|
||||||
|
Insère une liste de leads en batch.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste des ids insérés.
|
||||||
|
"""
|
||||||
|
ids = []
|
||||||
|
for lead in leads:
|
||||||
|
lead_id = insert_lead(lead, db_path)
|
||||||
|
if lead_id is not None:
|
||||||
|
ids.append(lead_id)
|
||||||
|
logger.info(f"insert_leads : {len(ids)}/{len(leads)} insérés.")
|
||||||
|
return ids
|
||||||
|
|
||||||
|
|
||||||
|
def get_leads(
|
||||||
|
status: Optional[str] = None,
|
||||||
|
limit: int = 100,
|
||||||
|
offset: int = 0,
|
||||||
|
db_path: str = DB_PATH,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Récupère les leads avec filtre optionnel sur le statut.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status: filtre sur le champ 'status' (new, contacted, closed, rejected).
|
||||||
|
limit: pagination — nombre de résultats max.
|
||||||
|
offset: pagination — décalage.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste de dicts (tous les champs de la table leads).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with _get_conn(db_path) as conn:
|
||||||
|
if status:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM leads WHERE status = ? ORDER BY score DESC, scraped_at DESC LIMIT ? OFFSET ?",
|
||||||
|
(status, limit, offset),
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM leads ORDER BY score DESC, scraped_at DESC LIMIT ? OFFSET ?",
|
||||||
|
(limit, offset),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"get_leads error : {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_lead_by_id(lead_id: int, db_path: str = DB_PATH) -> Optional[dict]:
|
||||||
|
"""Récupère un lead par son id."""
|
||||||
|
try:
|
||||||
|
with _get_conn(db_path) as conn:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM leads WHERE id = ?", (lead_id,)
|
||||||
|
).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"get_lead_by_id error : {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def update_lead_status(lead_id: int, status: str, db_path: str = DB_PATH) -> bool:
|
||||||
|
"""
|
||||||
|
Met à jour le statut d'un lead.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lead_id: id du lead.
|
||||||
|
status: nouveau statut ('new', 'contacted', 'closed', 'rejected').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True si mise à jour réussie, False sinon.
|
||||||
|
"""
|
||||||
|
if status not in VALID_STATUSES:
|
||||||
|
logger.warning(f"update_lead_status : statut invalide '{status}'")
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
with _get_conn(db_path) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE leads SET status = ? WHERE id = ?",
|
||||||
|
(status, lead_id),
|
||||||
|
)
|
||||||
|
logger.info(f"Lead id={lead_id} statut → {status}")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"update_lead_status error : {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_stats(db_path: str = DB_PATH) -> dict:
|
||||||
|
"""
|
||||||
|
Retourne les statistiques globales du CRM.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec total, by_status, by_source, avg_score, top_leads_count
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with _get_conn(db_path) as conn:
|
||||||
|
total = conn.execute("SELECT COUNT(*) FROM leads").fetchone()[0]
|
||||||
|
|
||||||
|
by_status_rows = conn.execute(
|
||||||
|
"SELECT status, COUNT(*) as cnt FROM leads GROUP BY status"
|
||||||
|
).fetchall()
|
||||||
|
by_status = {r["status"]: r["cnt"] for r in by_status_rows}
|
||||||
|
|
||||||
|
by_source_rows = conn.execute(
|
||||||
|
"SELECT source, COUNT(*) as cnt FROM leads GROUP BY source"
|
||||||
|
).fetchall()
|
||||||
|
by_source = {r["source"]: r["cnt"] for r in by_source_rows}
|
||||||
|
|
||||||
|
avg_score_row = conn.execute(
|
||||||
|
"SELECT AVG(score) FROM leads WHERE score IS NOT NULL"
|
||||||
|
).fetchone()
|
||||||
|
avg_score = round(avg_score_row[0] or 0, 2)
|
||||||
|
|
||||||
|
# Leads "chauds" = score ≥ 5
|
||||||
|
top_count = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM leads WHERE score >= 5"
|
||||||
|
).fetchone()[0]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total": total,
|
||||||
|
"by_status": by_status,
|
||||||
|
"by_source": by_source,
|
||||||
|
"avg_score": avg_score,
|
||||||
|
"top_leads_count": top_count,
|
||||||
|
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"get_stats error : {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def export_csv(
|
||||||
|
status: Optional[str] = None,
|
||||||
|
db_path: str = DB_PATH,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Exporte les leads en CSV (string).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status: filtre optionnel sur le statut.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Contenu CSV en string UTF-8.
|
||||||
|
"""
|
||||||
|
leads = get_leads(status=status, limit=10000, db_path=db_path)
|
||||||
|
|
||||||
|
output = io.StringIO()
|
||||||
|
fieldnames = [
|
||||||
|
"id",
|
||||||
|
"source",
|
||||||
|
"name",
|
||||||
|
"address",
|
||||||
|
"phone",
|
||||||
|
"rating",
|
||||||
|
"reviews_count",
|
||||||
|
"website",
|
||||||
|
"score",
|
||||||
|
"rgpd_ok",
|
||||||
|
"scraped_at",
|
||||||
|
"status",
|
||||||
|
]
|
||||||
|
writer = csv.DictWriter(output, fieldnames=fieldnames, extrasaction="ignore")
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(leads)
|
||||||
|
|
||||||
|
logger.info(f"export_csv : {len(leads)} leads exportés.")
|
||||||
|
return output.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
# ─── CLI (debug) ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
init_db()
|
||||||
|
|
||||||
|
# Test insertion
|
||||||
|
test_lead = {
|
||||||
|
"source": "google_places",
|
||||||
|
"name": "Restaurant Test",
|
||||||
|
"address": "10 rue de la Paix, 59000 Lille",
|
||||||
|
"phone": "+33 3 20 00 00 01",
|
||||||
|
"rating": 4.5,
|
||||||
|
"reviews_count": 120,
|
||||||
|
"website": "",
|
||||||
|
"score": 8,
|
||||||
|
"rgpd_ok": True,
|
||||||
|
"status": "new",
|
||||||
|
}
|
||||||
|
lead_id = insert_lead(test_lead)
|
||||||
|
print(f"Lead inséré : id={lead_id}")
|
||||||
|
|
||||||
|
leads = get_leads()
|
||||||
|
print(f"Leads en DB : {len(leads)}")
|
||||||
|
|
||||||
|
stats = get_stats()
|
||||||
|
print(f"Stats : {stats}")
|
||||||
193
leadhunter_scorer.py
Normal file
193
leadhunter_scorer.py
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
H3R7Tech — LeadHunter Scorer
|
||||||
|
================================
|
||||||
|
Moteur de scoring des leads restaurants MEL.
|
||||||
|
|
||||||
|
Critères (ordre de priorité métier) :
|
||||||
|
1. [+3] Site web absent ← CRITIQUE : raison d'être du produit
|
||||||
|
2. [+2] Nombre d'avis élevé (≥ 50) : forte activité = bon prospect de vente
|
||||||
|
3. [+2] Note Google élevée (≥ 4.0) : établissement sérieux
|
||||||
|
4. [+1] Téléphone présent : facilite la prise de contact
|
||||||
|
5. [-1] Note faible (< 3.0) : risque reputationnel pour la prestation web
|
||||||
|
|
||||||
|
Score maximum théorique : 8
|
||||||
|
Score minimum : 0 (leads avec site web ne doivent pas passer ici)
|
||||||
|
|
||||||
|
Auteur: H3R7Tech Backend Engineer
|
||||||
|
Issue: HRT-66
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
|
# ─── Logging ────────────────────────────────────────────────────────────────
|
||||||
|
logger = logging.getLogger("leadhunter.scorer")
|
||||||
|
|
||||||
|
_handler = RotatingFileHandler(
|
||||||
|
"/home/h3r7/leadhunter.log",
|
||||||
|
maxBytes=5 * 1024 * 1024,
|
||||||
|
backupCount=3,
|
||||||
|
)
|
||||||
|
_handler.setFormatter(
|
||||||
|
logging.Formatter("%(asctime)s %(levelname)-8s %(name)s — %(message)s")
|
||||||
|
)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
if not logger.handlers:
|
||||||
|
logger.addHandler(_handler)
|
||||||
|
logger.addHandler(logging.StreamHandler())
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Scorer ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class LeadScorer:
|
||||||
|
"""
|
||||||
|
Calcule le score de priorité d'un lead.
|
||||||
|
|
||||||
|
Le score sert à trier les leads dans le CRM :
|
||||||
|
- Score élevé = prospect chaud (sans site + actif + bien noté)
|
||||||
|
- Score faible = prospect froid (peut être ignoré ou traité en dernier)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _calculate_score(self, lead: dict) -> int:
|
||||||
|
"""
|
||||||
|
Calcule le score d'un lead.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lead: dict avec les champs normalisés du scraper
|
||||||
|
(name, website, rating, reviews_count, phone, ...)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Score entier (0–8)
|
||||||
|
"""
|
||||||
|
score = 0
|
||||||
|
|
||||||
|
# ── Critère 1 : site web absent [CRITIQUE — logique métier centrale] ──
|
||||||
|
# C'est le critère n°1 : on cherche des restaurants SANS site web
|
||||||
|
# pour leur proposer une création de site à 800–1500€.
|
||||||
|
website = lead.get("website", "")
|
||||||
|
if not website or not website.strip():
|
||||||
|
score += 3
|
||||||
|
logger.debug(f"{lead.get('name')}: +3 (site web absent)")
|
||||||
|
else:
|
||||||
|
# Si le lead a un site web, score = 0 immédiatement.
|
||||||
|
# Ce cas ne devrait pas se produire (filtre scraper),
|
||||||
|
# mais on reste défensif.
|
||||||
|
logger.warning(
|
||||||
|
f"{lead.get('name')}: site web présent ({website}), "
|
||||||
|
"lead ignoré pour scoring."
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# ── Critère 2 : nombre d'avis élevé (≥ 50) ──────────────────────────
|
||||||
|
reviews = lead.get("reviews_count")
|
||||||
|
if reviews is not None:
|
||||||
|
try:
|
||||||
|
reviews = int(reviews)
|
||||||
|
if reviews >= 50:
|
||||||
|
score += 2
|
||||||
|
logger.debug(f"{lead.get('name')}: +2 (avis ≥ 50 : {reviews})")
|
||||||
|
except (TypeError, ValueError) as e:
|
||||||
|
logger.warning(f"reviews_count invalide pour {lead.get('name')}: {e}")
|
||||||
|
|
||||||
|
# ── Critère 3 : bonne note Google (≥ 4.0) ───────────────────────────
|
||||||
|
rating = lead.get("rating")
|
||||||
|
if rating is not None:
|
||||||
|
try:
|
||||||
|
rating = float(rating)
|
||||||
|
if rating >= 4.0:
|
||||||
|
score += 2
|
||||||
|
logger.debug(f"{lead.get('name')}: +2 (note ≥ 4.0 : {rating})")
|
||||||
|
elif rating < 3.0:
|
||||||
|
score -= 1
|
||||||
|
logger.debug(f"{lead.get('name')}: -1 (note < 3.0 : {rating})")
|
||||||
|
except (TypeError, ValueError) as e:
|
||||||
|
logger.warning(f"rating invalide pour {lead.get('name')}: {e}")
|
||||||
|
|
||||||
|
# ── Critère 4 : téléphone présent ────────────────────────────────────
|
||||||
|
phone = lead.get("phone", "")
|
||||||
|
if phone and phone.strip():
|
||||||
|
score += 1
|
||||||
|
logger.debug(f"{lead.get('name')}: +1 (téléphone présent)")
|
||||||
|
|
||||||
|
# Plancher à 0
|
||||||
|
score = max(0, score)
|
||||||
|
logger.info(f"Score calculé pour '{lead.get('name')}' : {score}/8")
|
||||||
|
return score
|
||||||
|
|
||||||
|
def score_lead(self, lead: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Enrichit un lead avec son score.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lead: dict normalisé du scraper.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Même dict avec le champ 'score' ajouté/mis à jour.
|
||||||
|
"""
|
||||||
|
lead = dict(lead) # copie défensive
|
||||||
|
lead["score"] = self._calculate_score(lead)
|
||||||
|
return lead
|
||||||
|
|
||||||
|
def score_leads(self, leads: list[dict]) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Score et trie une liste de leads (score décroissant).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
leads: liste de dicts normalisés.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste triée par score décroissant.
|
||||||
|
"""
|
||||||
|
scored = [self.score_lead(lead) for lead in leads]
|
||||||
|
scored.sort(key=lambda l: l.get("score", 0), reverse=True)
|
||||||
|
logger.info(
|
||||||
|
f"score_leads terminé : {len(scored)} leads scorés. "
|
||||||
|
f"Score max = {scored[0]['score'] if scored else 0}, "
|
||||||
|
f"Score min = {scored[-1]['score'] if scored else 0}"
|
||||||
|
)
|
||||||
|
return scored
|
||||||
|
|
||||||
|
|
||||||
|
# ─── CLI (debug) ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Exemple de test rapide sans appel API
|
||||||
|
test_leads = [
|
||||||
|
{
|
||||||
|
"name": "Restaurant A",
|
||||||
|
"website": "",
|
||||||
|
"rating": 4.5,
|
||||||
|
"reviews_count": 120,
|
||||||
|
"phone": "+33 3 20 00 00 01",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Restaurant B",
|
||||||
|
"website": "",
|
||||||
|
"rating": 3.8,
|
||||||
|
"reviews_count": 30,
|
||||||
|
"phone": "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Café C",
|
||||||
|
"website": "",
|
||||||
|
"rating": 2.5,
|
||||||
|
"reviews_count": 5,
|
||||||
|
"phone": "+33 3 20 00 00 03",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Bar D avec site",
|
||||||
|
"website": "https://bar-d.fr",
|
||||||
|
"rating": 4.2,
|
||||||
|
"reviews_count": 80,
|
||||||
|
"phone": "+33 3 20 00 00 04",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
scorer = LeadScorer()
|
||||||
|
results = scorer.score_leads(test_leads)
|
||||||
|
|
||||||
|
print("\n=== Résultats scoring ===")
|
||||||
|
for r in results:
|
||||||
|
print(f" [{r['score']:2d}/8] {r['name']}")
|
||||||
397
leadhunter_scraper.py
Normal file
397
leadhunter_scraper.py
Normal file
@@ -0,0 +1,397 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
H3R7Tech — LeadHunter Scraper
|
||||||
|
================================
|
||||||
|
Agent de scraping pour la détection de restaurants sans site web
|
||||||
|
dans la MEL (Métropole Européenne de Lille).
|
||||||
|
|
||||||
|
Sources :
|
||||||
|
- Google Places API (primary)
|
||||||
|
- OpenStreetMap / Overpass API (fallback)
|
||||||
|
|
||||||
|
Quota Google Places Free Tier :
|
||||||
|
- 28 500 requêtes/mois ≈ 950/jour
|
||||||
|
- Compteur persistent dans /home/h3r7/leadhunter_quota.json
|
||||||
|
|
||||||
|
Auteur: H3R7Tech Backend Engineer
|
||||||
|
Issue: HRT-66
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
from datetime import date, datetime
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
|
# ─── Logging ────────────────────────────────────────────────────────────────
|
||||||
|
logger = logging.getLogger("leadhunter.scraper")
|
||||||
|
|
||||||
|
_handler = RotatingFileHandler(
|
||||||
|
"/home/h3r7/leadhunter.log",
|
||||||
|
maxBytes=5 * 1024 * 1024, # 5 MB
|
||||||
|
backupCount=3,
|
||||||
|
)
|
||||||
|
_handler.setFormatter(
|
||||||
|
logging.Formatter("%(asctime)s %(levelname)-8s %(name)s — %(message)s")
|
||||||
|
)
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
if not logger.handlers:
|
||||||
|
logger.addHandler(_handler)
|
||||||
|
logger.addHandler(logging.StreamHandler())
|
||||||
|
|
||||||
|
# ─── Configuration ───────────────────────────────────────────────────────────
|
||||||
|
GOOGLE_PLACES_API_KEY = os.environ.get("GOOGLE_PLACES_API_KEY")
|
||||||
|
|
||||||
|
# Quota journalier Google Places Free Tier
|
||||||
|
DAILY_QUOTA_FILE = "/home/h3r7/leadhunter_quota.json"
|
||||||
|
DAILY_QUOTA_LIMIT = 900 # marge de sécurité vs les 950 théoriques
|
||||||
|
|
||||||
|
# Délai entre requêtes Places pour éviter rate-limiting
|
||||||
|
PLACES_SLEEP_S = 0.5
|
||||||
|
|
||||||
|
# Bounding box MEL (Métropole Européenne de Lille)
|
||||||
|
MEL_CENTER_LAT = 50.6292
|
||||||
|
MEL_CENTER_LNG = 3.0573
|
||||||
|
MEL_RADIUS_M = 20000 # 20 km autour de Lille
|
||||||
|
|
||||||
|
# Types de lieux ciblés
|
||||||
|
TARGET_TYPES = ["restaurant", "cafe", "bar", "bakery", "food"]
|
||||||
|
|
||||||
|
# Overpass API endpoint
|
||||||
|
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
|
||||||
|
|
||||||
|
# Requête Overpass MEL — bounding box directe (50.4,2.8,50.8,3.3) couvrant la MEL
|
||||||
|
# Fix HRT-72 : la résolution area["name"=...] échoue silencieusement sur l'API Overpass publique
|
||||||
|
OVERPASS_MEL_QUERY = """
|
||||||
|
[out:json][timeout:60];
|
||||||
|
(
|
||||||
|
node["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"][!"website"](50.4,2.8,50.8,3.3);
|
||||||
|
way["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"][!"website"](50.4,2.8,50.8,3.3);
|
||||||
|
);
|
||||||
|
out center 200;
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Quota Manager ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _load_quota() -> dict:
|
||||||
|
"""Charge le compteur quotidien depuis le fichier JSON."""
|
||||||
|
today = str(date.today())
|
||||||
|
if os.path.exists(DAILY_QUOTA_FILE):
|
||||||
|
try:
|
||||||
|
with open(DAILY_QUOTA_FILE, "r") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
if data.get("date") == today:
|
||||||
|
return data
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Impossible de lire le fichier quota : {e}")
|
||||||
|
return {"date": today, "count": 0}
|
||||||
|
|
||||||
|
|
||||||
|
def _save_quota(data: dict) -> None:
|
||||||
|
"""Persiste le compteur quotidien."""
|
||||||
|
try:
|
||||||
|
with open(DAILY_QUOTA_FILE, "w") as f:
|
||||||
|
json.dump(data, f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Impossible d'écrire le fichier quota : {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def _increment_quota(n: int = 1) -> int:
|
||||||
|
"""Incrémente le compteur et retourne le total du jour."""
|
||||||
|
quota = _load_quota()
|
||||||
|
quota["count"] += n
|
||||||
|
_save_quota(quota)
|
||||||
|
return quota["count"]
|
||||||
|
|
||||||
|
|
||||||
|
def _quota_remaining() -> int:
|
||||||
|
"""Retourne le nombre de requêtes restantes pour aujourd'hui."""
|
||||||
|
quota = _load_quota()
|
||||||
|
return max(0, DAILY_QUOTA_LIMIT - quota["count"])
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Google Places Scraper ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class GooglePlacesScraper:
|
||||||
|
"""
|
||||||
|
Scraping via Google Places API (Nearby Search + Place Details).
|
||||||
|
Filtre les lieux sans site web côté API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
BASE_URL = "https://maps.googleapis.com/maps/api/place"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if not GOOGLE_PLACES_API_KEY:
|
||||||
|
raise EnvironmentError(
|
||||||
|
"GOOGLE_PLACES_API_KEY non définie. "
|
||||||
|
"Ajouter dans /home/h3r7/.env et relancer."
|
||||||
|
)
|
||||||
|
self.api_key = GOOGLE_PLACES_API_KEY
|
||||||
|
|
||||||
|
def _nearby_search(self, place_type: str, page_token: str = None) -> dict:
|
||||||
|
"""Appel Nearby Search — 1 requête comptabilisée."""
|
||||||
|
params = {
|
||||||
|
"key": self.api_key,
|
||||||
|
"location": f"{MEL_CENTER_LAT},{MEL_CENTER_LNG}",
|
||||||
|
"radius": MEL_RADIUS_M,
|
||||||
|
"type": place_type,
|
||||||
|
}
|
||||||
|
if page_token:
|
||||||
|
params["pagetoken"] = page_token
|
||||||
|
|
||||||
|
_increment_quota()
|
||||||
|
time.sleep(PLACES_SLEEP_S)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.get(
|
||||||
|
f"{self.BASE_URL}/nearbysearch/json",
|
||||||
|
params=params,
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"NearbySearch error (type={place_type}): {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _place_details(self, place_id: str) -> dict:
|
||||||
|
"""Place Details pour récupérer website, phone, rating, etc. — 1 requête."""
|
||||||
|
params = {
|
||||||
|
"key": self.api_key,
|
||||||
|
"place_id": place_id,
|
||||||
|
"fields": "name,formatted_address,formatted_phone_number,website,rating,user_ratings_total",
|
||||||
|
}
|
||||||
|
|
||||||
|
_increment_quota()
|
||||||
|
time.sleep(PLACES_SLEEP_S)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.get(
|
||||||
|
f"{self.BASE_URL}/details/json",
|
||||||
|
params=params,
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json().get("result", {})
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"PlaceDetails error (place_id={place_id}): {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def scrape(self, max_leads: int = 50) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Scrape les restaurants/cafés/bars MEL sans site web.
|
||||||
|
|
||||||
|
Retourne une liste de dicts normalisés compatibles LeadHunter CRM :
|
||||||
|
source, name, address, phone, rating, reviews_count, website, rgpd_ok
|
||||||
|
"""
|
||||||
|
leads = []
|
||||||
|
seen_ids = set()
|
||||||
|
|
||||||
|
for place_type in TARGET_TYPES:
|
||||||
|
if _quota_remaining() < 10:
|
||||||
|
logger.warning(
|
||||||
|
"Quota journalier presque épuisé — arrêt scraping Google Places."
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info(f"Scraping Google Places — type={place_type}")
|
||||||
|
page_token = None
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if _quota_remaining() < 5:
|
||||||
|
logger.warning("Quota insuffisant pour continuer la pagination.")
|
||||||
|
break
|
||||||
|
|
||||||
|
data = self._nearby_search(place_type, page_token)
|
||||||
|
results = data.get("results", [])
|
||||||
|
|
||||||
|
for place in results:
|
||||||
|
if len(leads) >= max_leads:
|
||||||
|
break
|
||||||
|
|
||||||
|
place_id = place.get("place_id", "")
|
||||||
|
if not place_id or place_id in seen_ids:
|
||||||
|
continue
|
||||||
|
seen_ids.add(place_id)
|
||||||
|
|
||||||
|
if _quota_remaining() < 2:
|
||||||
|
logger.warning("Quota épuisé avant details.")
|
||||||
|
break
|
||||||
|
|
||||||
|
details = self._place_details(place_id)
|
||||||
|
|
||||||
|
# Filtre : on ne garde que les lieux SANS site web
|
||||||
|
if details.get("website"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
lead = {
|
||||||
|
"source": "google_places",
|
||||||
|
"name": details.get("name") or place.get("name", ""),
|
||||||
|
"address": details.get("formatted_address")
|
||||||
|
or place.get("vicinity", ""),
|
||||||
|
"phone": details.get("formatted_phone_number", ""),
|
||||||
|
"rating": details.get("rating") or place.get("rating"),
|
||||||
|
"reviews_count": details.get("user_ratings_total")
|
||||||
|
or place.get("user_ratings_total"),
|
||||||
|
"website": "",
|
||||||
|
"rgpd_ok": True, # Données publiques Google Places uniquement
|
||||||
|
}
|
||||||
|
leads.append(lead)
|
||||||
|
logger.info(f"Lead trouvé (Google Places) : {lead['name']}")
|
||||||
|
|
||||||
|
if len(leads) >= max_leads:
|
||||||
|
break
|
||||||
|
|
||||||
|
page_token = data.get("next_page_token")
|
||||||
|
if not page_token:
|
||||||
|
break
|
||||||
|
|
||||||
|
# L'API Google Places nécessite un délai avant d'utiliser next_page_token
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
logger.info(f"Google Places : {len(leads)} leads collectés.")
|
||||||
|
return leads
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Overpass / OSM Fallback ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class OverpassScraper:
|
||||||
|
"""
|
||||||
|
Fallback OSM via Overpass API.
|
||||||
|
Cible les nœuds/ways dans la boundary MEL sans attribut 'website'.
|
||||||
|
Données publiques ODbL — RGPD OK.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def scrape(self, max_leads: int = 100) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Scrape via Overpass API — retourne des leads normalisés.
|
||||||
|
"""
|
||||||
|
logger.info("Scraping Overpass OSM — boundary MEL")
|
||||||
|
leads = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.post(
|
||||||
|
OVERPASS_URL,
|
||||||
|
data={"data": OVERPASS_MEL_QUERY},
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded", # Fix HRT-72 Bug2
|
||||||
|
"User-Agent": "H3R7Tech-LeadHunter/1.0 (contact@h3r7tech.fr)", # Fix HRT-72 Bug3: overpass-api.de blocks python-requests UA
|
||||||
|
},
|
||||||
|
timeout=90,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Overpass API error : {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
elements = data.get("elements", [])
|
||||||
|
logger.info(f"Overpass : {len(elements)} éléments bruts reçus.")
|
||||||
|
|
||||||
|
for el in elements[:max_leads]:
|
||||||
|
tags = el.get("tags", {})
|
||||||
|
|
||||||
|
# Coordonnées (pour les ways, Overpass retourne 'center')
|
||||||
|
lat = el.get("lat") or (el.get("center") or {}).get("lat")
|
||||||
|
lon = el.get("lon") or (el.get("center") or {}).get("lon")
|
||||||
|
|
||||||
|
name = tags.get("name", "")
|
||||||
|
if not name:
|
||||||
|
continue # Ignorer les lieux sans nom
|
||||||
|
|
||||||
|
addr_parts = [
|
||||||
|
tags.get("addr:housenumber", ""),
|
||||||
|
tags.get("addr:street", ""),
|
||||||
|
tags.get("addr:city", ""),
|
||||||
|
tags.get("addr:postcode", ""),
|
||||||
|
]
|
||||||
|
address = " ".join(p for p in addr_parts if p).strip()
|
||||||
|
if not address and lat and lon:
|
||||||
|
address = f"{lat:.4f},{lon:.4f}"
|
||||||
|
|
||||||
|
lead = {
|
||||||
|
"source": "osm",
|
||||||
|
"name": name,
|
||||||
|
"address": address,
|
||||||
|
"phone": tags.get("phone", tags.get("contact:phone", "")),
|
||||||
|
"rating": None,
|
||||||
|
"reviews_count": None,
|
||||||
|
"website": "",
|
||||||
|
"rgpd_ok": True, # Données publiques ODbL
|
||||||
|
}
|
||||||
|
leads.append(lead)
|
||||||
|
logger.info(f"Lead trouvé (OSM) : {lead['name']}")
|
||||||
|
|
||||||
|
logger.info(f"Overpass : {len(leads)} leads collectés.")
|
||||||
|
return leads
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Orchestrateur ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def run_scraping(
|
||||||
|
max_leads: int = 100, use_google: bool = True, use_osm: bool = True
|
||||||
|
) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Lance le scraping Google Places + fallback OSM.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
max_leads: nombre maximum de leads à collecter au total.
|
||||||
|
use_google: activer Google Places (nécessite GOOGLE_PLACES_API_KEY).
|
||||||
|
use_osm: activer le fallback Overpass OSM.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste de leads normalisés (dédupliqués par nom + adresse).
|
||||||
|
"""
|
||||||
|
all_leads = []
|
||||||
|
seen_keys = set()
|
||||||
|
|
||||||
|
def _dedup_key(lead: dict) -> str:
|
||||||
|
return f"{lead['name'].lower().strip()}|{lead['address'].lower().strip()[:40]}"
|
||||||
|
|
||||||
|
if use_google:
|
||||||
|
try:
|
||||||
|
scraper = GooglePlacesScraper()
|
||||||
|
google_leads = scraper.scrape(max_leads=max_leads)
|
||||||
|
for lead in google_leads:
|
||||||
|
k = _dedup_key(lead)
|
||||||
|
if k not in seen_keys:
|
||||||
|
seen_keys.add(k)
|
||||||
|
all_leads.append(lead)
|
||||||
|
except EnvironmentError as e:
|
||||||
|
logger.warning(f"Google Places désactivé : {e}")
|
||||||
|
use_google = False
|
||||||
|
|
||||||
|
remaining = max_leads - len(all_leads)
|
||||||
|
if use_osm and remaining > 0:
|
||||||
|
osm_leads = OverpassScraper().scrape(max_leads=remaining)
|
||||||
|
for lead in osm_leads:
|
||||||
|
k = _dedup_key(lead)
|
||||||
|
if k not in seen_keys:
|
||||||
|
seen_keys.add(k)
|
||||||
|
all_leads.append(lead)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"run_scraping terminé — {len(all_leads)} leads uniques "
|
||||||
|
f"(Google={use_google}, OSM={use_osm}). "
|
||||||
|
f"Quota restant aujourd'hui : {_quota_remaining()}"
|
||||||
|
)
|
||||||
|
return all_leads
|
||||||
|
|
||||||
|
|
||||||
|
# ─── CLI (debug) ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
assert GOOGLE_PLACES_API_KEY, (
|
||||||
|
"GOOGLE_PLACES_API_KEY manquante — "
|
||||||
|
"ajouter 'export GOOGLE_PLACES_API_KEY=xxx' dans /home/h3r7/.env"
|
||||||
|
)
|
||||||
|
leads = run_scraping(max_leads=10)
|
||||||
|
for i, l in enumerate(leads, 1):
|
||||||
|
print(f"{i:02d}. [{l['source']}] {l['name']} — {l['address']}")
|
||||||
@@ -15,7 +15,7 @@ import sqlite3
|
|||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
|
||||||
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
|
DB_PATH = "/home/h3r7/turf_saas/turf_saas.db"
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
'Accept-Language': 'fr-FR,fr;q=0.9,en;q=0.8',
|
'Accept-Language': 'fr-FR,fr;q=0.9,en;q=0.8',
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ from pathlib import Path
|
|||||||
# ─────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────
|
||||||
# CONFIG
|
# CONFIG
|
||||||
# ─────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────
|
||||||
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
|
DB_PATH = "/home/h3r7/turf_saas/turf_saas.db"
|
||||||
OUTPUT_DIR = Path("/home/h3r7/turf_scraper")
|
OUTPUT_DIR = Path("/home/h3r7/turf_scraper")
|
||||||
API_BASE = "https://online.turfinfo.api.pmu.fr/rest/client/7"
|
API_BASE = "https://online.turfinfo.api.pmu.fr/rest/client/7"
|
||||||
|
|
||||||
|
|||||||
@@ -5,8 +5,11 @@ import json
|
|||||||
import requests
|
import requests
|
||||||
import subprocess
|
import subprocess
|
||||||
import db
|
import db
|
||||||
|
from middleware import rate_limit_middleware, access_log_middleware
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
rate_limit_middleware(app)
|
||||||
|
access_log_middleware(app)
|
||||||
|
|
||||||
DASHBOARD_API_URL = "http://localhost:8791"
|
DASHBOARD_API_URL = "http://localhost:8791"
|
||||||
COMBINED_API_URL = "http://localhost:8790"
|
COMBINED_API_URL = "http://localhost:8790"
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from flask import Blueprint, request, jsonify
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from .saas_auth import require_auth
|
from saas_auth import require_auth
|
||||||
|
|
||||||
DB_PATH = os.environ.get("TURF_SAAS_DB", "/home/h3r7/turf_saas/turf_saas.db")
|
DB_PATH = os.environ.get("TURF_SAAS_DB", "/home/h3r7/turf_saas/turf_saas.db")
|
||||||
|
|
||||||
@@ -255,3 +255,28 @@ def export_csv():
|
|||||||
"Content-Disposition": f"attachment; filename=turf_ia_{date_param}.csv"
|
"Content-Disposition": f"attachment; filename=turf_ia_{date_param}.csv"
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Billing Blueprint (Stripe) + JWT init — HRT-49 ─────────────────────────
|
||||||
|
# Registers /api/v1/billing/* routes via nested Blueprint (Flask 2.0+)
|
||||||
|
# Also initializes JWTManager on the Flask app (required for jwt_required_middleware)
|
||||||
|
try:
|
||||||
|
from flask_jwt_extended import JWTManager
|
||||||
|
from api_v1.routes.billing import billing_bp
|
||||||
|
|
||||||
|
# Initialize JWTManager on the Flask app when api_v1_bp is registered
|
||||||
|
@api_v1_bp.record_once
|
||||||
|
def _init_jwt(state):
|
||||||
|
app = state.app
|
||||||
|
if not app.config.get('JWT_SECRET_KEY'):
|
||||||
|
import os
|
||||||
|
app.config['JWT_SECRET_KEY'] = os.environ.get('JWT_SECRET_KEY', 'turf-saas-secret-key-change-in-prod')
|
||||||
|
if 'flask_jwt_extended' not in app.extensions:
|
||||||
|
JWTManager(app)
|
||||||
|
|
||||||
|
# Register billing blueprint with url_prefix='/billing'
|
||||||
|
# (parent api_v1_bp has '/api/v1', so result is /api/v1/billing/*)
|
||||||
|
api_v1_bp.register_blueprint(billing_bp, url_prefix='/billing')
|
||||||
|
print('[saas_api_v1] Billing blueprint (Stripe) + JWT registered ✅')
|
||||||
|
except Exception as _billing_err:
|
||||||
|
print(f'[saas_api_v1] Warning: billing blueprint not loaded: {_billing_err}')
|
||||||
|
|||||||
172
saas_auth.py
172
saas_auth.py
@@ -14,6 +14,135 @@ import time
|
|||||||
import json
|
import json
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from collections import defaultdict
|
||||||
|
from threading import Lock
|
||||||
|
|
||||||
|
# ─── Rate limiting login ───────────────────────────────────────────────────────
|
||||||
|
_login_attempts: dict = defaultdict(
|
||||||
|
lambda: {"count": 0, "window_start": 0.0, "blocked_until": 0.0}
|
||||||
|
)
|
||||||
|
_login_lock = Lock()
|
||||||
|
|
||||||
|
LOGIN_RATE_MAX = 5 # max tentatives par fenêtre
|
||||||
|
LOGIN_RATE_WINDOW = 300 # 5 minutes (en secondes)
|
||||||
|
LOGIN_BLOCK_DURATION = 900 # 15 min de blocage après dépassement
|
||||||
|
|
||||||
|
# ─── Blacklist mots de passe faibles ─────────────────────────────────────────
|
||||||
|
# HRT-63 — Validation mots de passe faibles
|
||||||
|
WEAK_PASSWORDS = {
|
||||||
|
"password",
|
||||||
|
"password1",
|
||||||
|
"password123",
|
||||||
|
"passw0rd",
|
||||||
|
"12345678",
|
||||||
|
"123456789",
|
||||||
|
"1234567890",
|
||||||
|
"123456",
|
||||||
|
"12345",
|
||||||
|
"1234",
|
||||||
|
"qwerty",
|
||||||
|
"qwerty123",
|
||||||
|
"qwertyuiop",
|
||||||
|
"azerty",
|
||||||
|
"azertyuiop",
|
||||||
|
"letmein",
|
||||||
|
"letmein1",
|
||||||
|
"iloveyou",
|
||||||
|
"iloveyou1",
|
||||||
|
"admin",
|
||||||
|
"admin123",
|
||||||
|
"admin1234",
|
||||||
|
"administrator",
|
||||||
|
"welcome",
|
||||||
|
"welcome1",
|
||||||
|
"welcome123",
|
||||||
|
"monkey",
|
||||||
|
"monkey1",
|
||||||
|
"dragon",
|
||||||
|
"dragon1",
|
||||||
|
"master",
|
||||||
|
"master1",
|
||||||
|
"football",
|
||||||
|
"soccer",
|
||||||
|
"baseball",
|
||||||
|
"basketball",
|
||||||
|
"superman",
|
||||||
|
"batman",
|
||||||
|
"starwars",
|
||||||
|
"starwars1",
|
||||||
|
"princess",
|
||||||
|
"princess1",
|
||||||
|
"sunshine",
|
||||||
|
"sunshine1",
|
||||||
|
"shadow",
|
||||||
|
"shadow1",
|
||||||
|
"michael",
|
||||||
|
"michael1",
|
||||||
|
"jessica",
|
||||||
|
"jessica1",
|
||||||
|
"abc123",
|
||||||
|
"abc1234",
|
||||||
|
"abcd1234",
|
||||||
|
"abcdefgh",
|
||||||
|
"login",
|
||||||
|
"login123",
|
||||||
|
"pass",
|
||||||
|
"pass1234",
|
||||||
|
"test",
|
||||||
|
"test1234",
|
||||||
|
"test123456",
|
||||||
|
"hello",
|
||||||
|
"hello123",
|
||||||
|
"hello1234",
|
||||||
|
"changeme",
|
||||||
|
"changeme1",
|
||||||
|
"secret",
|
||||||
|
"secret1",
|
||||||
|
"secret123",
|
||||||
|
"trustno1",
|
||||||
|
"zaq1zaq1",
|
||||||
|
"qazwsx",
|
||||||
|
"qazwsxedc",
|
||||||
|
"111111",
|
||||||
|
"1111111",
|
||||||
|
"11111111",
|
||||||
|
"000000",
|
||||||
|
"00000000",
|
||||||
|
"123123",
|
||||||
|
"1231234",
|
||||||
|
"321321",
|
||||||
|
"p@ssword",
|
||||||
|
"p@ssw0rd",
|
||||||
|
"pa$$word",
|
||||||
|
"turf",
|
||||||
|
"turf123",
|
||||||
|
"cheval",
|
||||||
|
"cheval123",
|
||||||
|
"pmu",
|
||||||
|
"pmu123",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_password_strength(password: str):
|
||||||
|
"""
|
||||||
|
Valide la complexité d'un mot de passe.
|
||||||
|
Retourne None si OK, sinon un message d'erreur (str).
|
||||||
|
Règles :
|
||||||
|
- 8 caractères minimum
|
||||||
|
- absent de la blacklist WEAK_PASSWORDS
|
||||||
|
- au moins 1 chiffre
|
||||||
|
- au moins 1 lettre
|
||||||
|
"""
|
||||||
|
if len(password) < 8:
|
||||||
|
return "Mot de passe trop court (8 caractères minimum)."
|
||||||
|
if password.lower() in WEAK_PASSWORDS:
|
||||||
|
return "Mot de passe trop commun. Choisissez un mot de passe plus sécurisé."
|
||||||
|
if not any(c.isdigit() for c in password):
|
||||||
|
return "Le mot de passe doit contenir au moins 1 chiffre."
|
||||||
|
if not any(c.isalpha() for c in password):
|
||||||
|
return "Le mot de passe doit contenir au moins 1 lettre."
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# ─── Config ───────────────────────────────────────────────────────────────────
|
# ─── Config ───────────────────────────────────────────────────────────────────
|
||||||
DB_PATH = os.environ.get("TURF_SAAS_DB", "/home/h3r7/turf_saas/turf_saas.db")
|
DB_PATH = os.environ.get("TURF_SAAS_DB", "/home/h3r7/turf_saas/turf_saas.db")
|
||||||
@@ -148,10 +277,9 @@ def register():
|
|||||||
|
|
||||||
if not email or "@" not in email:
|
if not email or "@" not in email:
|
||||||
return jsonify({"error": "Adresse email invalide."}), 400
|
return jsonify({"error": "Adresse email invalide."}), 400
|
||||||
if len(password) < 8:
|
pwd_error = validate_password_strength(password)
|
||||||
return jsonify(
|
if pwd_error:
|
||||||
{"error": "Mot de passe trop court (8 caractères minimum)."}
|
return jsonify({"error": pwd_error}), 400
|
||||||
), 400
|
|
||||||
if plan not in ("free", "premium", "pro"):
|
if plan not in ("free", "premium", "pro"):
|
||||||
plan = "free"
|
plan = "free"
|
||||||
|
|
||||||
@@ -184,6 +312,37 @@ def login():
|
|||||||
if not email or not password:
|
if not email or not password:
|
||||||
return jsonify({"error": "Email et mot de passe requis."}), 400
|
return jsonify({"error": "Email et mot de passe requis."}), 400
|
||||||
|
|
||||||
|
# ── Rate limit par IP ────────────────────────────────────────
|
||||||
|
ip = request.remote_addr or "unknown"
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
with _login_lock:
|
||||||
|
bucket = _login_attempts[ip]
|
||||||
|
# Lever le blocage si la durée est écoulée
|
||||||
|
if now >= bucket["blocked_until"]:
|
||||||
|
if now - bucket["window_start"] >= LOGIN_RATE_WINDOW:
|
||||||
|
bucket["count"] = 0
|
||||||
|
bucket["window_start"] = now
|
||||||
|
bucket["count"] += 1
|
||||||
|
count = bucket["count"]
|
||||||
|
if count > LOGIN_RATE_MAX:
|
||||||
|
bucket["blocked_until"] = now + LOGIN_BLOCK_DURATION
|
||||||
|
retry_after = LOGIN_BLOCK_DURATION
|
||||||
|
blocked = True
|
||||||
|
else:
|
||||||
|
retry_after = int(LOGIN_RATE_WINDOW - (now - bucket["window_start"]))
|
||||||
|
blocked = False
|
||||||
|
else:
|
||||||
|
blocked = True
|
||||||
|
retry_after = int(bucket["blocked_until"] - now)
|
||||||
|
|
||||||
|
if blocked:
|
||||||
|
resp = jsonify({"error": "Trop de tentatives. Réessayez plus tard."})
|
||||||
|
resp.status_code = 429
|
||||||
|
resp.headers["Retry-After"] = str(retry_after)
|
||||||
|
return resp
|
||||||
|
# ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
pw_hash = hash_password(password)
|
pw_hash = hash_password(password)
|
||||||
conn = get_db()
|
conn = get_db()
|
||||||
user = conn.execute(
|
user = conn.execute(
|
||||||
@@ -249,8 +408,9 @@ def change_password():
|
|||||||
cur_pwd = data.get("current_password") or ""
|
cur_pwd = data.get("current_password") or ""
|
||||||
new_pwd = data.get("new_password") or ""
|
new_pwd = data.get("new_password") or ""
|
||||||
|
|
||||||
if len(new_pwd) < 8:
|
pwd_error = validate_password_strength(new_pwd)
|
||||||
return jsonify({"error": "Nouveau mot de passe trop court."}), 400
|
if pwd_error:
|
||||||
|
return jsonify({"error": pwd_error}), 400
|
||||||
|
|
||||||
conn = get_db()
|
conn = get_db()
|
||||||
user = conn.execute(
|
user = conn.execute(
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
|
DB_PATH = "/home/h3r7/turf_saas/turf_saas.db"
|
||||||
HEADERS = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
|
HEADERS = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
|
||||||
|
|
||||||
def get_cote_from_db(horse_name, date_course):
|
def get_cote_from_db(horse_name, date_course):
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ class TestJWTAuthentication:
|
|||||||
"invalid_signature_here"
|
"invalid_signature_here"
|
||||||
)
|
)
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
f"{BASE_URL}/api/races",
|
f"{BASE_URL}/api/v1/predictions/today",
|
||||||
headers={"Authorization": f"Bearer {expired_token}"},
|
headers={"Authorization": f"Bearer {expired_token}"},
|
||||||
timeout=5,
|
timeout=5,
|
||||||
)
|
)
|
||||||
@@ -153,7 +153,7 @@ class TestJWTAuthentication:
|
|||||||
"""Un token JWT malformé doit être rejeté."""
|
"""Un token JWT malformé doit être rejeté."""
|
||||||
for bad_token in ["not.a.jwt", "Bearer", "null", "undefined", ""]:
|
for bad_token in ["not.a.jwt", "Bearer", "null", "undefined", ""]:
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
f"{BASE_URL}/api/races",
|
f"{BASE_URL}/api/v1/predictions/today",
|
||||||
headers={"Authorization": f"Bearer {bad_token}"},
|
headers={"Authorization": f"Bearer {bad_token}"},
|
||||||
timeout=5,
|
timeout=5,
|
||||||
)
|
)
|
||||||
@@ -163,7 +163,7 @@ class TestJWTAuthentication:
|
|||||||
|
|
||||||
def test_jwt_sans_token(self):
|
def test_jwt_sans_token(self):
|
||||||
"""Sans token, les routes protégées doivent retourner 401."""
|
"""Sans token, les routes protégées doivent retourner 401."""
|
||||||
resp = requests.get(f"{BASE_URL}/api/export/csv", timeout=5)
|
resp = requests.get(f"{BASE_URL}/api/v1/export/csv", timeout=5)
|
||||||
assert resp.status_code in (401, 403), (
|
assert resp.status_code in (401, 403), (
|
||||||
f"Route protégée accessible sans token: status={resp.status_code}"
|
f"Route protégée accessible sans token: status={resp.status_code}"
|
||||||
)
|
)
|
||||||
@@ -303,6 +303,138 @@ class TestPlanAuthorisation:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# === Tests validation mots de passe faibles (HRT-63) ===
|
||||||
|
|
||||||
|
|
||||||
|
class TestWeakPasswordRejection:
|
||||||
|
"""Tests rejet mots de passe faibles : blacklist + complexité (HRT-63)."""
|
||||||
|
|
||||||
|
REGISTER_URL = (
|
||||||
|
os.environ.get("APP_URL", "http://localhost:8792") + "/api/v1/auth/register"
|
||||||
|
)
|
||||||
|
|
||||||
|
WEAK_PASSWORDS = [
|
||||||
|
"password",
|
||||||
|
"12345678",
|
||||||
|
"qwerty123",
|
||||||
|
"letmein1",
|
||||||
|
"admin123",
|
||||||
|
"welcome1",
|
||||||
|
"iloveyou",
|
||||||
|
"abc1234",
|
||||||
|
"sunshine",
|
||||||
|
"111111111",
|
||||||
|
]
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("weak_pwd", WEAK_PASSWORDS)
|
||||||
|
def test_weak_password_rejected(self, weak_pwd):
|
||||||
|
"""Les mots de passe faibles/blacklistés doivent retourner 400."""
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
unique_email = f"test_weak_{int(_time.time() * 1000)}_{weak_pwd[:4]}@h3r7.tech"
|
||||||
|
resp = requests.post(
|
||||||
|
self.REGISTER_URL,
|
||||||
|
json={"email": unique_email, "password": weak_pwd, "plan": "free"},
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
assert resp.status_code == 400, (
|
||||||
|
f"Mot de passe faible accepté: pwd={weak_pwd!r}, status={resp.status_code}"
|
||||||
|
)
|
||||||
|
body = resp.json()
|
||||||
|
assert "error" in body, f"Pas de champ 'error' dans la réponse: {body}"
|
||||||
|
|
||||||
|
def test_strong_password_accepted(self):
|
||||||
|
"""Un mot de passe fort doit permettre l'inscription (retourne 201)."""
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
unique_email = f"test_strong_{int(_time.time() * 1000)}@h3r7.tech"
|
||||||
|
resp = requests.post(
|
||||||
|
self.REGISTER_URL,
|
||||||
|
json={"email": unique_email, "password": "Tr0ub4d@ur!", "plan": "free"},
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
assert resp.status_code == 201, (
|
||||||
|
f"Mot de passe fort rejeté: status={resp.status_code}, body={resp.text}"
|
||||||
|
)
|
||||||
|
data = resp.json()
|
||||||
|
assert "token" in data, f"Pas de token dans la réponse: {data}"
|
||||||
|
|
||||||
|
def test_no_digit_rejected(self):
|
||||||
|
"""Un mot de passe sans chiffre doit être rejeté."""
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
unique_email = f"test_nodigit_{int(_time.time() * 1000)}@h3r7.tech"
|
||||||
|
resp = requests.post(
|
||||||
|
self.REGISTER_URL,
|
||||||
|
json={"email": unique_email, "password": "NoDigitPassword", "plan": "free"},
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
assert resp.status_code == 400, (
|
||||||
|
f"Mot de passe sans chiffre accepté: status={resp.status_code}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_letter_rejected(self):
|
||||||
|
"""Un mot de passe sans lettre doit être rejeté."""
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
unique_email = f"test_noletter_{int(_time.time() * 1000)}@h3r7.tech"
|
||||||
|
resp = requests.post(
|
||||||
|
self.REGISTER_URL,
|
||||||
|
json={"email": unique_email, "password": "12345678901", "plan": "free"},
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
assert resp.status_code == 400, (
|
||||||
|
f"Mot de passe sans lettre accepté: status={resp.status_code}"
|
||||||
|
)
|
||||||
|
# === Tests rate limiting login ===
|
||||||
|
|
||||||
|
|
||||||
|
class TestLoginRateLimit:
|
||||||
|
"""Tests rate limiting sur /api/v1/auth/login."""
|
||||||
|
|
||||||
|
TARGET_URL = (
|
||||||
|
os.environ.get("APP_URL", "http://localhost:8792") + "/api/v1/auth/login"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_login_brute_force_blocked_after_5_attempts(self):
|
||||||
|
"""Après 5 tentatives, le 6ème appel doit retourner 429."""
|
||||||
|
# Utiliser un email unique pour isoler le test
|
||||||
|
email = f"ratelimit_test_{int(time.time())}@h3r7.tech"
|
||||||
|
for i in range(5):
|
||||||
|
resp = requests.post(
|
||||||
|
self.TARGET_URL,
|
||||||
|
json={"email": email, "password": "wrong_password"},
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
assert resp.status_code in (400, 401), (
|
||||||
|
f"Tentative {i + 1}: status inattendu {resp.status_code}"
|
||||||
|
)
|
||||||
|
# La 6ème tentative doit être bloquée
|
||||||
|
resp = requests.post(
|
||||||
|
self.TARGET_URL,
|
||||||
|
json={"email": email, "password": "wrong_password"},
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
assert resp.status_code == 429, (
|
||||||
|
f"Rate limit non appliqué après 5 tentatives: got {resp.status_code}"
|
||||||
|
)
|
||||||
|
assert "Retry-After" in resp.headers, "Header Retry-After manquant sur 429"
|
||||||
|
|
||||||
|
def test_login_429_has_retry_after_header(self):
|
||||||
|
"""La réponse 429 doit inclure Retry-After."""
|
||||||
|
email = f"ratelimit_test2_{int(time.time())}@h3r7.tech"
|
||||||
|
for _ in range(6):
|
||||||
|
requests.post(
|
||||||
|
self.TARGET_URL, json={"email": email, "password": "x"}, timeout=5
|
||||||
|
)
|
||||||
|
resp = requests.post(
|
||||||
|
self.TARGET_URL, json={"email": email, "password": "x"}, timeout=5
|
||||||
|
)
|
||||||
|
if resp.status_code == 429:
|
||||||
|
assert "Retry-After" in resp.headers
|
||||||
|
assert int(resp.headers["Retry-After"]) > 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user