Files
turf_saas/llm_cache.py
2026-04-25 17:18:43 +02:00

261 lines
7.7 KiB
Python

#!/usr/bin/env python3
"""
Cache LLM - Turf Scraper
Réduction des appels API par mise en cache des réponses
"""
import json
import hashlib
import os
import time
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional, Any
class LLMCache:
"""Cache pour réponses LLM avec expiration"""
def __init__(self, cache_dir: str = None, ttl_hours: int = 24):
"""
Args:
cache_dir: Répertoire pour le cache (défaut: ~/.cache/turf_llm/)
ttl_hours: Time-to-live en heures (défaut: 24h)
"""
if cache_dir is None:
cache_dir = os.path.expanduser("~/.cache/turf_llm")
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.ttl = timedelta(hours=ttl_hours)
def _hash_key(self, key: str) -> str:
"""Génère un hash pour la clé"""
return hashlib.sha256(key.encode()).hexdigest()
def _get_cache_path(self, key: str) -> Path:
"""Retourne le chemin du fichier cache"""
hash_key = self._hash_key(key)
return self.cache_dir / f"{hash_key}.json"
def get(self, key: str) -> Optional[dict]:
"""
Récupère une valeur du cache
Args:
key: Clé de recherche
Returns:
dict avec 'response' et 'timestamp' ou None si expiré/absent
"""
cache_path = self._get_cache_path(key)
if not cache_path.exists():
return None
try:
with open(cache_path, 'r', encoding='utf-8') as f:
data = json.load(f)
cached_time = datetime.fromisoformat(data.get('timestamp', ''))
if datetime.now() - cached_time > self.ttl:
cache_path.unlink()
return None
return data
except (json.JSONDecodeError, ValueError, OSError):
return None
def set(self, key: str, response: Any, metadata: dict = None) -> bool:
"""
Sauvegarde une réponse dans le cache
Args:
key: Clé de recherche
response: Réponse à sauvegarder
metadata: Métadonnées additionnelles
Returns:
True si succès
"""
cache_path = self._get_cache_path(key)
data = {
'key': key,
'response': response,
'timestamp': datetime.now().isoformat(),
'metadata': metadata or {}
}
try:
with open(cache_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
return True
except OSError:
return False
def delete(self, key: str) -> bool:
"""Supprime une entrée du cache"""
cache_path = self._get_cache_path(key)
try:
if cache_path.exists():
cache_path.unlink()
return True
except OSError:
pass
return False
def clear(self) -> int:
"""Supprime tout le cache"""
count = 0
for f in self.cache_dir.glob("*.json"):
try:
f.unlink()
count += 1
except OSError:
pass
return count
def clear_expired(self) -> int:
"""Supprime les entrées expirées"""
count = 0
now = datetime.now()
for f in self.cache_dir.glob("*.json"):
try:
with open(f, 'r', encoding='utf-8') as fp:
data = json.load(fp)
cached_time = datetime.fromisoformat(data.get('timestamp', ''))
if now - cached_time > self.ttl:
f.unlink()
count += 1
except (json.JSONDecodeError, ValueError, OSError):
pass
return count
def get_stats(self) -> dict:
"""Retourne des statistiques sur le cache"""
files = list(self.cache_dir.glob("*.json"))
total_size = sum(f.stat().st_size for f in files)
now = datetime.now()
expired = 0
for f in files:
try:
with open(f, 'r', encoding='utf-8') as fp:
data = json.load(fp)
cached_time = datetime.fromisoformat(data.get('timestamp', ''))
if now - cached_time > self.ttl:
expired += 1
except:
pass
return {
'total_entries': len(files),
'total_size_bytes': total_size,
'expired_entries': expired,
'active_entries': len(files) - expired
}
class QuestionCache:
"""Cache spécifique pour les questions SQL"""
def __init__(self, db_path: str = None):
if db_path is None:
db_path = os.path.expanduser("~/.cache/turf_llm/sql_cache.json")
self.cache_file = Path(db_path)
self.cache = self._load()
def _load(self) -> dict:
"""Charge le cache depuis le fichier"""
if self.cache_file.exists():
try:
with open(self.cache_file, 'r', encoding='utf-8') as f:
return json.load(f)
except:
pass
return {}
def _save(self):
"""Sauvegarde le cache"""
try:
self.cache_file.parent.mkdir(parents=True, exist_ok=True)
with open(self.cache_file, 'w', encoding='utf-8') as f:
json.dump(self.cache, f, indent=2)
except:
pass
def get_sql(self, question: str) -> Optional[str]:
"""Récupère SQL pour une question similaire"""
normalized = question.lower().strip()
if normalized in self.cache:
return self.cache[normalized].get('sql')
for key, value in self.cache.items():
if self._similarity(normalized, key) > 0.7:
return value.get('sql')
return None
def set_sql(self, question: str, sql: str, success: bool = True):
"""Sauvegarde SQL pour une question"""
normalized = question.lower().strip()
self.cache[normalized] = {
'sql': sql,
'success': success,
'timestamp': datetime.now().isoformat(),
'count': self.cache.get(normalized, {}).get('count', 0) + 1
}
self._save()
def _similarity(self, s1: str, s2: str) -> float:
"""Calcule similarité simple entre deux strings"""
words1 = set(s1.split())
words2 = set(s2.split())
if not words1 or not words2:
return 0.0
intersection = len(words1 & words2)
union = len(words1 | words2)
return intersection / union if union > 0 else 0.0
def get_frequent_questions(self, limit: int = 10) -> list:
"""Retourne les questions les plus fréquentes"""
sorted_questions = sorted(
self.cache.items(),
key=lambda x: x[1].get('count', 0),
reverse=True
)
return [q[0] for q in sorted_questions[:limit]]
_global_cache = None
_sql_cache = None
def get_llm_cache() -> LLMCache:
"""Singleton pour le cache global"""
global _global_cache
if _global_cache is None:
_global_cache = LLMCache()
return _global_cache
def get_sql_cache() -> QuestionCache:
"""Singleton pour le cache SQL"""
global _sql_cache
if _sql_cache is None:
_sql_cache = QuestionCache()
return _sql_cache