#!/usr/bin/env python3 """ Cache LLM - Turf Scraper Réduction des appels API par mise en cache des réponses """ import json import hashlib import os import time from datetime import datetime, timedelta from pathlib import Path from typing import Optional, Any class LLMCache: """Cache pour réponses LLM avec expiration""" def __init__(self, cache_dir: str = None, ttl_hours: int = 24): """ Args: cache_dir: Répertoire pour le cache (défaut: ~/.cache/turf_llm/) ttl_hours: Time-to-live en heures (défaut: 24h) """ if cache_dir is None: cache_dir = os.path.expanduser("~/.cache/turf_llm") self.cache_dir = Path(cache_dir) self.cache_dir.mkdir(parents=True, exist_ok=True) self.ttl = timedelta(hours=ttl_hours) def _hash_key(self, key: str) -> str: """Génère un hash pour la clé""" return hashlib.sha256(key.encode()).hexdigest() def _get_cache_path(self, key: str) -> Path: """Retourne le chemin du fichier cache""" hash_key = self._hash_key(key) return self.cache_dir / f"{hash_key}.json" def get(self, key: str) -> Optional[dict]: """ Récupère une valeur du cache Args: key: Clé de recherche Returns: dict avec 'response' et 'timestamp' ou None si expiré/absent """ cache_path = self._get_cache_path(key) if not cache_path.exists(): return None try: with open(cache_path, 'r', encoding='utf-8') as f: data = json.load(f) cached_time = datetime.fromisoformat(data.get('timestamp', '')) if datetime.now() - cached_time > self.ttl: cache_path.unlink() return None return data except (json.JSONDecodeError, ValueError, OSError): return None def set(self, key: str, response: Any, metadata: dict = None) -> bool: """ Sauvegarde une réponse dans le cache Args: key: Clé de recherche response: Réponse à sauvegarder metadata: Métadonnées additionnelles Returns: True si succès """ cache_path = self._get_cache_path(key) data = { 'key': key, 'response': response, 'timestamp': datetime.now().isoformat(), 'metadata': metadata or {} } try: with open(cache_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) return True except OSError: return False def delete(self, key: str) -> bool: """Supprime une entrée du cache""" cache_path = self._get_cache_path(key) try: if cache_path.exists(): cache_path.unlink() return True except OSError: pass return False def clear(self) -> int: """Supprime tout le cache""" count = 0 for f in self.cache_dir.glob("*.json"): try: f.unlink() count += 1 except OSError: pass return count def clear_expired(self) -> int: """Supprime les entrées expirées""" count = 0 now = datetime.now() for f in self.cache_dir.glob("*.json"): try: with open(f, 'r', encoding='utf-8') as fp: data = json.load(fp) cached_time = datetime.fromisoformat(data.get('timestamp', '')) if now - cached_time > self.ttl: f.unlink() count += 1 except (json.JSONDecodeError, ValueError, OSError): pass return count def get_stats(self) -> dict: """Retourne des statistiques sur le cache""" files = list(self.cache_dir.glob("*.json")) total_size = sum(f.stat().st_size for f in files) now = datetime.now() expired = 0 for f in files: try: with open(f, 'r', encoding='utf-8') as fp: data = json.load(fp) cached_time = datetime.fromisoformat(data.get('timestamp', '')) if now - cached_time > self.ttl: expired += 1 except: pass return { 'total_entries': len(files), 'total_size_bytes': total_size, 'expired_entries': expired, 'active_entries': len(files) - expired } class QuestionCache: """Cache spécifique pour les questions SQL""" def __init__(self, db_path: str = None): if db_path is None: db_path = os.path.expanduser("~/.cache/turf_llm/sql_cache.json") self.cache_file = Path(db_path) self.cache = self._load() def _load(self) -> dict: """Charge le cache depuis le fichier""" if self.cache_file.exists(): try: with open(self.cache_file, 'r', encoding='utf-8') as f: return json.load(f) except: pass return {} def _save(self): """Sauvegarde le cache""" try: self.cache_file.parent.mkdir(parents=True, exist_ok=True) with open(self.cache_file, 'w', encoding='utf-8') as f: json.dump(self.cache, f, indent=2) except: pass def get_sql(self, question: str) -> Optional[str]: """Récupère SQL pour une question similaire""" normalized = question.lower().strip() if normalized in self.cache: return self.cache[normalized].get('sql') for key, value in self.cache.items(): if self._similarity(normalized, key) > 0.7: return value.get('sql') return None def set_sql(self, question: str, sql: str, success: bool = True): """Sauvegarde SQL pour une question""" normalized = question.lower().strip() self.cache[normalized] = { 'sql': sql, 'success': success, 'timestamp': datetime.now().isoformat(), 'count': self.cache.get(normalized, {}).get('count', 0) + 1 } self._save() def _similarity(self, s1: str, s2: str) -> float: """Calcule similarité simple entre deux strings""" words1 = set(s1.split()) words2 = set(s2.split()) if not words1 or not words2: return 0.0 intersection = len(words1 & words2) union = len(words1 | words2) return intersection / union if union > 0 else 0.0 def get_frequent_questions(self, limit: int = 10) -> list: """Retourne les questions les plus fréquentes""" sorted_questions = sorted( self.cache.items(), key=lambda x: x[1].get('count', 0), reverse=True ) return [q[0] for q in sorted_questions[:limit]] _global_cache = None _sql_cache = None def get_llm_cache() -> LLMCache: """Singleton pour le cache global""" global _global_cache if _global_cache is None: _global_cache = LLMCache() return _global_cache def get_sql_cache() -> QuestionCache: """Singleton pour le cache SQL""" global _sql_cache if _sql_cache is None: _sql_cache = QuestionCache() return _sql_cache