turf_saas/leadhunter_api.py

#!/usr/bin/env python3
"""
H3R7Tech — LeadHunter API
===========================
Service Flask sur port 8775 exposant les endpoints LeadHunter.

Endpoints :
  GET  /api/leads              — Liste les leads (filtres: status, limit, offset)
  POST /api/leads/scrape       — Lance un job de scraping asynchrone
  GET  /api/leads/stats        — Statistiques globales du CRM
  GET  /api/leads/export       — Export CSV des leads
  PATCH /api/leads/<id>/status — Met à jour le statut d'un lead

Port : 8775 (8769 occupé par depenses_trello/app.py, 8770 occupé par turf_scraper/crm_api.py — corrigé HRT-66)

Auteur: H3R7Tech Backend Engineer
Issue: HRT-66
"""

import os
import threading
import logging
from logging.handlers import RotatingFileHandler
from flask import Flask, jsonify, request, Response
from flask_cors import CORS

# Import des modules LeadHunter
from leadhunter_crm import (
    init_db,
    insert_leads,
    get_leads,
    get_lead_by_id,
    update_lead_status,
    get_stats,
    export_csv,
    VALID_STATUSES,
    DB_PATH,
)
from leadhunter_scraper import run_scraping, GOOGLE_PLACES_API_KEY
from leadhunter_scorer import LeadScorer

# ─── Assertions au démarrage ─────────────────────────────────────────────────
# Vérification obligatoire : la clé API doit être présente au démarrage
assert os.environ.get("GOOGLE_PLACES_API_KEY"), (
    "GOOGLE_PLACES_API_KEY manquante. "
    "Ajouter dans /home/h3r7/.env : export GOOGLE_PLACES_API_KEY=xxx"
)

# ─── Logging ────────────────────────────────────────────────────────────────
logger = logging.getLogger("leadhunter.api")

_handler = RotatingFileHandler(
    "/home/h3r7/leadhunter.log",
    maxBytes=5 * 1024 * 1024,
    backupCount=3,
)
_handler.setFormatter(
    logging.Formatter("%(asctime)s %(levelname)-8s %(name)s — %(message)s")
)
logger.setLevel(logging.INFO)
if not logger.handlers:
    logger.addHandler(_handler)
    logger.addHandler(logging.StreamHandler())

# ─── App Flask ───────────────────────────────────────────────────────────────
app = Flask(__name__)
CORS(app)

# Scorer singleton
scorer = LeadScorer()

# État global du job de scraping (simple flag — pas de celery nécessaire pour le POC)
_scrape_job = {
    "running": False,
    "last_run": None,
    "last_count": 0,
    "last_error": None,
}
_scrape_lock = threading.Lock()

# ─── Init DB ─────────────────────────────────────────────────────────────────
init_db(DB_PATH)
logger.info("LeadHunter API démarrée — DB initialisée.")


# ─── Helpers ─────────────────────────────────────────────────────────────────


def _run_scrape_job(max_leads: int, use_google: bool, use_osm: bool) -> None:
    """Job de scraping exécuté dans un thread séparé."""
    with _scrape_lock:
        _scrape_job["running"] = True
        _scrape_job["last_error"] = None

    try:
        leads_raw = run_scraping(
            max_leads=max_leads,
            use_google=use_google,
            use_osm=use_osm,
        )
        leads_scored = scorer.score_leads(leads_raw)
        inserted_ids = insert_leads(leads_scored)

        with _scrape_lock:
            _scrape_job["last_count"] = len(inserted_ids)
            from datetime import datetime

            _scrape_job["last_run"] = datetime.utcnow().isoformat() + "Z"

        logger.info(f"Scrape job terminé : {len(inserted_ids)} leads insérés.")

    except Exception as e:
        logger.warning(f"Scrape job erreur : {e}")
        with _scrape_lock:
            _scrape_job["last_error"] = str(e)

    finally:
        with _scrape_lock:
            _scrape_job["running"] = False


# ─── Routes ──────────────────────────────────────────────────────────────────


@app.route("/api/leads", methods=["GET"])
def api_get_leads():
    """
    Liste les leads du CRM.

    Query params :
      - status (str, optional) : filtre sur new/contacted/closed/rejected
      - limit (int, default=50) : pagination
      - offset (int, default=0) : pagination
    """
    status = request.args.get("status")
    try:
        limit = int(request.args.get("limit", 50))
        offset = int(request.args.get("offset", 0))
    except ValueError:
        return jsonify({"error": "limit et offset doivent être des entiers"}), 400

    if status and status not in VALID_STATUSES:
        return jsonify(
            {"error": f"status invalide. Valeurs acceptées : {VALID_STATUSES}"}
        ), 400

    leads = get_leads(status=status, limit=limit, offset=offset)
    return jsonify(
        {
            "leads": leads,
            "count": len(leads),
            "limit": limit,
            "offset": offset,
            "status_filter": status,
        }
    )


@app.route("/api/leads/scrape", methods=["POST"])
def api_scrape():
    """
    Lance un job de scraping asynchrone.

    Body JSON (optionnel) :
      - max_leads (int, default=100)
      - use_google (bool, default=true)
      - use_osm (bool, default=true)

    Retourne immédiatement avec le statut du job.
    """
    with _scrape_lock:
        if _scrape_job["running"]:
            return jsonify(
                {
                    "status": "already_running",
                    "message": "Un job de scraping est déjà en cours.",
                }
            ), 409

    body = request.get_json(silent=True) or {}
    max_leads = int(body.get("max_leads", 100))
    use_google = bool(body.get("use_google", True))
    use_osm = bool(body.get("use_osm", True))

    thread = threading.Thread(
        target=_run_scrape_job,
        args=(max_leads, use_google, use_osm),
        daemon=True,
    )
    thread.start()

    logger.info(
        f"Job de scraping lancé (max_leads={max_leads}, "
        f"use_google={use_google}, use_osm={use_osm})"
    )

    return jsonify(
        {
            "status": "started",
            "message": "Job de scraping démarré en arrière-plan.",
            "params": {
                "max_leads": max_leads,
                "use_google": use_google,
                "use_osm": use_osm,
            },
        }
    ), 202


@app.route("/api/leads/scrape/status", methods=["GET"])
def api_scrape_status():
    """Retourne l'état courant du job de scraping."""
    with _scrape_lock:
        return jsonify(dict(_scrape_job))


@app.route("/api/leads/stats", methods=["GET"])
def api_stats():
    """
    Statistiques globales du CRM LeadHunter.

    Retourne : total, by_status, by_source, avg_score, top_leads_count
    """
    stats = get_stats()
    if not stats:
        return jsonify({"error": "Impossible de calculer les statistiques"}), 500
    return jsonify(stats)


@app.route("/api/leads/export", methods=["GET"])
def api_export():
    """
    Export CSV de tous les leads (ou filtrés par status).

    Query params :
      - status (str, optional)
    """
    status = request.args.get("status")
    if status and status not in VALID_STATUSES:
        return jsonify({"error": f"status invalide : {VALID_STATUSES}"}), 400

    csv_content = export_csv(status=status)
    filename = f"leadhunter_leads{'_' + status if status else ''}.csv"

    return Response(
        csv_content,
        mimetype="text/csv",
        headers={
            "Content-Disposition": f"attachment; filename={filename}",
            "Content-Type": "text/csv; charset=utf-8",
        },
    )


@app.route("/api/leads/<int:lead_id>/status", methods=["PATCH"])
def api_update_status(lead_id: int):
    """
    Met à jour le statut d'un lead.

    Body JSON :
      - status (str) : new | contacted | closed | rejected
    """
    body = request.get_json(silent=True)
    if not body or "status" not in body:
        return jsonify({"error": "Body JSON requis avec le champ 'status'"}), 400

    new_status = body["status"]
    if new_status not in VALID_STATUSES:
        return jsonify({"error": f"status invalide. Valeurs : {VALID_STATUSES}"}), 400

    lead = get_lead_by_id(lead_id)
    if not lead:
        return jsonify({"error": f"Lead id={lead_id} introuvable"}), 404

    success = update_lead_status(lead_id, new_status)
    if not success:
        return jsonify({"error": "Mise à jour échouée"}), 500

    return jsonify(
        {
            "success": True,
            "lead_id": lead_id,
            "new_status": new_status,
        }
    )


@app.route("/health", methods=["GET"])
def health():
    """Healthcheck pour systemd / monitoring."""
    return jsonify(
        {
            "status": "ok",
            "service": "leadhunter-api",
            "port": 8775,
        }
    )


# ─── Entrypoint ──────────────────────────────────────────────────────────────

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8775, debug=False)