Initial commit: existing turf_saas codebase

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
ML Engineer
2026-04-25 17:18:43 +02:00
commit ed07c8a3d1
137 changed files with 36398 additions and 0 deletions

309
turf_scheduler.py Executable file
View File

@@ -0,0 +1,309 @@
#!/usr/bin/env python3
"""
Turf Scheduler - Scraping automatique sans dépendance OpenClaw
"""
import sys
import os
import sqlite3
import schedule
import time
import logging
from datetime import datetime
sys.path.insert(0, "/home/h3r7/turf_saas")
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("/home/h3r7/turf_saas/scheduler.log"),
logging.StreamHandler(),
],
)
logger = logging.getLogger(__name__)
DB_PATH = "/home/h3r7/turf_saas/turf_saas.db"
def run_scraper():
"""Lance le scraper principal"""
logger.info("🕐 [SCHEDULER] Exécution scraper...")
try:
os.chdir("/home/h3r7/turf_saas")
import multi_scraper_v5
result = multi_scraper_v5.main()
logger.info(f"✅ [SCHEDULER] Scraper terminé: {result}")
except Exception as e:
logger.error(f"❌ [SCHEDULER] Erreur scraper: {e}")
import traceback
traceback.print_exc()
def run_scoring():
"""Lance le scoring (calcul des scores et recommandations)"""
logger.info("🧠 [SCHEDULER] Exécution scoring...")
try:
os.chdir("/home/h3r7/turf_saas")
import scoring_v2 as scoring
scoring.main()
logger.info("✅ [SCHEDULER] Scoring terminé")
except Exception as e:
logger.error(f"❌ [SCHEDULER] Erreur scoring: {e}")
import traceback
traceback.print_exc()
def run_results():
"""Récupère les résultats"""
logger.info("🕐 [SCHEDULER] Récupération résultats...")
try:
os.chdir("/home/h3r7/turf_saas")
import pmu_results
from datetime import datetime
today = datetime.now().strftime("%d%m%Y")
pmu_results.run(today)
logger.info("✅ [SCHEDULER] Résultats récupérés")
except Exception as e:
logger.error(f"❌ [SCHEDULER] Erreur résultats: {e}")
import traceback
traceback.print_exc()
def run_ml():
"""Entraîne les modèles ML"""
logger.info("🕐 [SCHEDULER] Entraînement ML...")
try:
os.chdir("/home/h3r7/turf_saas")
import train_xgboost
train_xgboost.main()
logger.info("✅ [SCHEDULER] ML terminé")
except Exception as e:
logger.error(f"❌ [SCHEDULER] Erreur ML: {e}")
def run_analytics():
"""Met à jour les analytics"""
logger.info("🕐 [SCHEDULER] Analytics...")
try:
os.chdir("/home/h3r7/turf_saas")
import populate_analytics
populate_analytics.populate_bet_results()
populate_analytics.populate_daily_stats()
populate_analytics.populate_stats_by_type()
logger.info("✅ [SCHEDULER] Analytics mis à jour")
except Exception as e:
logger.error(f"❌ [SCHEDULER] Erreur analytics: {e}")
import traceback
traceback.print_exc()
def get_todays_race_time():
"""Récupère l'heure de la course principale du jour depuis la DB
Returns: timestamp en ms ou None
"""
try:
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
c = conn.cursor()
today = datetime.now().strftime("%Y-%m-%d")
# Essayer d'abord dans pmu_courses (timestamp ms)
c.execute(
"""
SELECT heure_depart as race_time
FROM pmu_courses
WHERE date_programme = ?
AND heure_depart IS NOT NULL
ORDER BY heure_depart ASC
LIMIT 1
""",
(today,),
)
row = c.fetchone()
if row and row["race_time"]:
conn.close()
return row["race_time"]
# Fallback dans pmu_rapports
c.execute(
"""
SELECT DISTINCT course_time as race_time
FROM pmu_rapports
WHERE date = ?
LIMIT 1
""",
(today,),
)
row = c.fetchone()
if row and row["race_time"]:
conn.close()
return row["race_time"]
conn.close()
return None
except Exception as e:
logger.warning(f"⚠️ Impossible de récupérer l'heure de course: {e}")
return None
def schedule_dynamic_scoring():
"""Planifie le scoring 15min avant la course"""
race_time = get_todays_race_time()
if race_time:
try:
# Convertir timestamp ms en datetime
dt = datetime.fromtimestamp(race_time / 1000)
race_hour = dt.hour
race_min = dt.minute
logger.info(
f"📅 [SCHEDULER] Course détectée à {race_hour:02d}:{race_min:02d}"
)
# Scoring 15min avant la course
pre_min = race_min - 15
pre_hour = race_hour
if pre_min < 0:
pre_min += 60
pre_hour -= 1
scoring_time = f"{pre_hour:02d}:{pre_min:02d}"
schedule.every().day.at(scoring_time).do(run_scoring).tag(
"scoring", "dynamic"
)
logger.info(
f"📅 [SCHEDULER] Scoring dynamique planifié à {scoring_time} (15min avant la course)"
)
except Exception as e:
logger.warning(f"⚠️ Impossible de planifier le scoring dynamique: {e}")
else:
logger.info(" [SCHEDULER] Pas de course aujourd'hui, pas de scoring dynamique")
def schedule_dynamic_results():
"""Planifie le scraping des résultats à H+1 (1h après la course)"""
race_time = get_todays_race_time()
if race_time:
try:
dt = datetime.fromtimestamp(race_time / 1000)
race_hour = dt.hour
race_min = dt.minute
result_hour = (race_hour + 1) % 24
result_time = f"{result_hour:02d}:{race_min:02d}"
schedule.every().day.at(result_time).do(run_results).tag(
"results", "dynamic"
)
logger.info(
f"📅 [SCHEDULER] Résultats planifiés à {result_time} (H+1 de {race_hour:02d}:{race_min:02d})"
)
except Exception as e:
logger.warning(f"⚠️ Impossible de planifier les résultats: {e}")
schedule.every().day.at("15:00").do(run_results).tag("results", "default")
else:
logger.info(" [SCHEDULER] Aucune course aujourd'hui, pas de scrapingResults")
def main():
logger.info("=" * 60)
logger.info("🚀 TURF SCHEDULER INDÉPENDANT DÉMARRÉ")
logger.info("=" * 60)
# Jobs de scraping fixes
schedule.every().day.at("08:00").do(run_scraper).tag("scraper", "early_morning")
schedule.every().day.at("09:00").do(run_scraper).tag("scraper", "morning")
schedule.every().day.at("10:00").do(run_scraper).tag("scraper", "late_morning")
schedule.every().day.at("11:00").do(run_scraper).tag("scraper", "mid_morning")
schedule.every().day.at("12:00").do(run_scraper).tag("scraper", "noon")
schedule.every().day.at("13:00").do(run_scraper).tag("scraper", "early_afternoon")
schedule.every().day.at("13:30").do(run_scraper).tag("scraper", "afternoon")
schedule.every().day.at("13:45").do(run_scraper).tag("scraper", "pre_race")
schedule.every().day.at("14:00").do(run_scraper).tag("scraper", "post_race")
# Scoring fixes - suit l'évolution des cotes
schedule.every().day.at("09:30").do(run_scoring).tag("scoring", "morning")
schedule.every().day.at("11:30").do(run_scoring).tag("scoring", "late_morning")
schedule.every().day.at("12:30").do(run_scoring).tag("scoring", "noon")
schedule.every().day.at("13:30").do(run_scoring).tag("scoring", "pre_race")
# Scoring dynamique (15min avant course)
schedule_dynamic_scoring()
# Résultats dynamiques (H+1)
schedule_dynamic_results()
schedule.every().day.at("18:00").do(run_scraper).tag("scraper", "evening")
# Resultats automatiques (fixe 20h00 - fallback)
schedule.every().day.at("20:00").do(run_results).tag("results", "daily_fallback")
schedule.every().day.at("19:00").do(run_scraper).tag("scraper", "late_evening")
schedule.every().sunday.at("02:00").do(run_ml).tag("ml", "weekly")
schedule.every().wednesday.at("02:00").do(run_ml).tag("ml", "midweek")
schedule.every().day.at("15:00").do(run_analytics).tag("analytics", "daily")
# Alertes email automatiques : verif ROI exceptionnel tous les jours a 21h30
schedule.every().day.at("21:30").do(run_metrics_alerts).tag("alerts", "email_roi")
schedule.every().hour.do(lambda: logger.info("💓 Scheduler alive"))
logger.info("📅 Jobs planifiés:")
for job in schedule.jobs:
logger.info(f" - {job}")
logger.info("=" * 60)
while True:
schedule.run_pending()
time.sleep(30)
def run_metrics_alerts():
"""Verifie les metriques du jour et envoie une alerte email si ROI > 1.0€"""
logger.info("📧 [SCHEDULER] Vérification alertes métriques...")
try:
os.chdir("/home/h3r7/turf_saas")
import metrics_alerts
from datetime import datetime, timedelta
date_str = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
result = metrics_alerts.check_daily_alerts(date_str)
if result:
msg, has_roi = result
if has_roi:
logger.info("💰 [SCHEDULER] ROI exceptionnel détecté — envoi email...")
date_fmt = datetime.strptime(date_str, "%Y-%m-%d").strftime("%d/%m/%Y")
subject = "Alerte Turf — ROI exceptionnel {}".format(date_fmt)
sent = metrics_alerts.send_email_alert(subject, msg)
if sent:
logger.info("✅ [SCHEDULER] Email alerte envoyé")
else:
logger.warning("⚠️ [SCHEDULER] Echec envoi email alerte")
else:
logger.info(" [SCHEDULER] Pas d'alerte ROI aujourd'hui")
else:
logger.info(" [SCHEDULER] Aucune métrique disponible pour alertes")
except Exception as e:
logger.error(f"❌ [SCHEDULER] Erreur alertes métriques: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()