#!/usr/bin/env python3
"""
Cache LLM - Turf Scraper
Réduction des appels API par mise en cache des réponses
"""
import json
import hashlib
import os
import time
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional, Any


class LLMCache:
    """Cache pour réponses LLM avec expiration"""
    
    def __init__(self, cache_dir: str = None, ttl_hours: int = 24):
        """
        Args:
            cache_dir: Répertoire pour le cache (défaut: ~/.cache/turf_llm/)
            ttl_hours: Time-to-live en heures (défaut: 24h)
        """
        if cache_dir is None:
            cache_dir = os.path.expanduser("~/.cache/turf_llm")
        
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        self.ttl = timedelta(hours=ttl_hours)
    
    def _hash_key(self, key: str) -> str:
        """Génère un hash pour la clé"""
        return hashlib.sha256(key.encode()).hexdigest()
    
    def _get_cache_path(self, key: str) -> Path:
        """Retourne le chemin du fichier cache"""
        hash_key = self._hash_key(key)
        return self.cache_dir / f"{hash_key}.json"
    
    def get(self, key: str) -> Optional[dict]:
        """
        Récupère une valeur du cache
        
        Args:
            key: Clé de recherche
            
        Returns:
            dict avec 'response' et 'timestamp' ou None si expiré/absent
        """
        cache_path = self._get_cache_path(key)
        
        if not cache_path.exists():
            return None
        
        try:
            with open(cache_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            
            cached_time = datetime.fromisoformat(data.get('timestamp', ''))
            
            if datetime.now() - cached_time > self.ttl:
                cache_path.unlink()
                return None
            
            return data
            
        except (json.JSONDecodeError, ValueError, OSError):
            return None
    
    def set(self, key: str, response: Any, metadata: dict = None) -> bool:
        """
        Sauvegarde une réponse dans le cache
        
        Args:
            key: Clé de recherche
            response: Réponse à sauvegarder
            metadata: Métadonnées additionnelles
            
        Returns:
            True si succès
        """
        cache_path = self._get_cache_path(key)
        
        data = {
            'key': key,
            'response': response,
            'timestamp': datetime.now().isoformat(),
            'metadata': metadata or {}
        }
        
        try:
            with open(cache_path, 'w', encoding='utf-8') as f:
                json.dump(data, f, indent=2, ensure_ascii=False)
            return True
        except OSError:
            return False
    
    def delete(self, key: str) -> bool:
        """Supprime une entrée du cache"""
        cache_path = self._get_cache_path(key)
        try:
            if cache_path.exists():
                cache_path.unlink()
                return True
        except OSError:
            pass
        return False
    
    def clear(self) -> int:
        """Supprime tout le cache"""
        count = 0
        for f in self.cache_dir.glob("*.json"):
            try:
                f.unlink()
                count += 1
            except OSError:
                pass
        return count
    
    def clear_expired(self) -> int:
        """Supprime les entrées expirées"""
        count = 0
        now = datetime.now()
        
        for f in self.cache_dir.glob("*.json"):
            try:
                with open(f, 'r', encoding='utf-8') as fp:
                    data = json.load(fp)
                
                cached_time = datetime.fromisoformat(data.get('timestamp', ''))
                
                if now - cached_time > self.ttl:
                    f.unlink()
                    count += 1
            except (json.JSONDecodeError, ValueError, OSError):
                pass
        
        return count
    
    def get_stats(self) -> dict:
        """Retourne des statistiques sur le cache"""
        files = list(self.cache_dir.glob("*.json"))
        total_size = sum(f.stat().st_size for f in files)
        
        now = datetime.now()
        expired = 0
        
        for f in files:
            try:
                with open(f, 'r', encoding='utf-8') as fp:
                    data = json.load(fp)
                cached_time = datetime.fromisoformat(data.get('timestamp', ''))
                if now - cached_time > self.ttl:
                    expired += 1
            except:
                pass
        
        return {
            'total_entries': len(files),
            'total_size_bytes': total_size,
            'expired_entries': expired,
            'active_entries': len(files) - expired
        }


class QuestionCache:
    """Cache spécifique pour les questions SQL"""
    
    def __init__(self, db_path: str = None):
        if db_path is None:
            db_path = os.path.expanduser("~/.cache/turf_llm/sql_cache.json")
        
        self.cache_file = Path(db_path)
        self.cache = self._load()
    
    def _load(self) -> dict:
        """Charge le cache depuis le fichier"""
        if self.cache_file.exists():
            try:
                with open(self.cache_file, 'r', encoding='utf-8') as f:
                    return json.load(f)
            except:
                pass
        return {}
    
    def _save(self):
        """Sauvegarde le cache"""
        try:
            self.cache_file.parent.mkdir(parents=True, exist_ok=True)
            with open(self.cache_file, 'w', encoding='utf-8') as f:
                json.dump(self.cache, f, indent=2)
        except:
            pass
    
    def get_sql(self, question: str) -> Optional[str]:
        """Récupère SQL pour une question similaire"""
        normalized = question.lower().strip()
        
        if normalized in self.cache:
            return self.cache[normalized].get('sql')
        
        for key, value in self.cache.items():
            if self._similarity(normalized, key) > 0.7:
                return value.get('sql')
        
        return None
    
    def set_sql(self, question: str, sql: str, success: bool = True):
        """Sauvegarde SQL pour une question"""
        normalized = question.lower().strip()
        
        self.cache[normalized] = {
            'sql': sql,
            'success': success,
            'timestamp': datetime.now().isoformat(),
            'count': self.cache.get(normalized, {}).get('count', 0) + 1
        }
        self._save()
    
    def _similarity(self, s1: str, s2: str) -> float:
        """Calcule similarité simple entre deux strings"""
        words1 = set(s1.split())
        words2 = set(s2.split())
        
        if not words1 or not words2:
            return 0.0
        
        intersection = len(words1 & words2)
        union = len(words1 | words2)
        
        return intersection / union if union > 0 else 0.0
    
    def get_frequent_questions(self, limit: int = 10) -> list:
        """Retourne les questions les plus fréquentes"""
        sorted_questions = sorted(
            self.cache.items(),
            key=lambda x: x[1].get('count', 0),
            reverse=True
        )
        return [q[0] for q in sorted_questions[:limit]]


_global_cache = None
_sql_cache = None


def get_llm_cache() -> LLMCache:
    """Singleton pour le cache global"""
    global _global_cache
    if _global_cache is None:
        _global_cache = LLMCache()
    return _global_cache


def get_sql_cache() -> QuestionCache:
    """Singleton pour le cache SQL"""
    global _sql_cache
    if _sql_cache is None:
        _sql_cache = QuestionCache()
    return _sql_cache