Initial commit: existing turf_saas codebase

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
ML Engineer
2026-04-25 17:18:43 +02:00
commit ed07c8a3d1
137 changed files with 36398 additions and 0 deletions

423
analytics_reports.py Normal file
View File

@@ -0,0 +1,423 @@
#!/usr/bin/env python3
"""
Analytics et Rapports Automatisés - Turf Scraper
Génère des rapports quotidiens, hebdomadaires et mensuels
"""
import sqlite3
import json
from datetime import datetime, timedelta
from typing import Dict, List, Optional
from pathlib import Path
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
def get_db():
"""Connexion à la base"""
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def get_daily_report(date: str = None) -> dict:
"""
Génère un rapport quotidien
"""
if date is None:
date = datetime.now().strftime('%Y-%m-%d')
conn = get_db()
c = conn.cursor()
report = {
'date': date,
'generated_at': datetime.now().isoformat(),
'races': {},
'predictions': {},
'results': {}
}
try:
c.execute("""
SELECT id, num_reunion, num_course, libelle, libelle_court,
discipline, distance, heure_depart_str
FROM pmu_courses
WHERE date_programme = ?
ORDER BY num_reunion, num_course
""", (date,))
races = c.fetchall()
report['races']['count'] = len(races)
report['races']['details'] = [dict(r) for r in races[:10]]
c.execute("""
SELECT race_name, race_hippodrome, COUNT(*) as pred_count
FROM predictions
WHERE date = ?
GROUP BY race_name
""", (date,))
preds = c.fetchall()
report['predictions']['count'] = sum(p['pred_count'] for p in preds)
report['predictions']['by_race'] = [dict(p) for p in preds]
c.execute("""
SELECT p.nom as cheval, p.num_pmu as numero, p.ordre_arrivee as position,
p.cote_direct as cote, p.driver, c.libelle as course
FROM pmu_partants p
JOIN pmu_courses c ON p.date_programme = c.date_programme
AND p.num_reunion = c.num_reunion AND p.num_course = c.num_course
WHERE p.date_programme = ? AND p.ordre_arrivee > 0
ORDER BY c.num_reunion, c.num_course, p.ordre_arrivee
""", (date,))
results = c.fetchall()
report['results']['count'] = len(results)
report['results']['details'] = [dict(r) for r in results[:20]]
c.execute("""
SELECT COUNT(*) as total,
SUM(CASE WHEN ordre_arrivee = 1 THEN 1 ELSE 0 END) as wins,
SUM(CASE WHEN ordre_arrivee IN (1,2,3) THEN 1 ELSE 0 END) as podiums,
AVG(CASE WHEN ordre_arrivee > 0 THEN cote_direct ELSE NULL END) as avg_cote
FROM pmu_partants
WHERE date_programme = ? AND ordre_arrivee > 0
""", (date,))
stats = c.fetchone()
total = stats['total'] or 0
wins = stats['wins'] or 0
podiums = stats['podiums'] or 0
report['stats'] = {
'total_partants': total,
'wins': wins,
'podiums': podiums,
'win_rate': round(wins / total * 100, 1) if total > 0 else 0,
'podium_rate': round(podiums / total * 100, 1) if total > 0 else 0,
'avg_cote': round(stats['avg_cote'], 2) if stats['avg_cote'] else 0
}
except Exception as e:
report['error'] = str(e)
finally:
conn.close()
return report
def get_weekly_report(start_date: str = None, end_date: str = None) -> dict:
"""
Génère un rapport hebdomadaire
"""
if end_date is None:
end_date = datetime.now()
else:
end_date = datetime.strptime(end_date, '%Y-%m-%d')
if start_date is None:
start_date = end_date - timedelta(days=7)
else:
start_date = datetime.strptime(start_date, '%Y-%m-%d')
start_str = start_date.strftime('%Y-%m-%d')
end_str = end_date.strftime('%Y-%m-%d')
conn = get_db()
c = conn.cursor()
report = {
'start_date': start_str,
'end_date': end_str,
'generated_at': datetime.now().isoformat()
}
try:
c.execute("""
SELECT COUNT(DISTINCT date_programme) as race_days,
COUNT(DISTINCT id) as total_races
FROM pmu_courses
WHERE date_programme BETWEEN ? AND ?
""", (start_str, end_str))
overview = c.fetchone()
report['overview'] = {
'race_days': overview['race_days'] or 0,
'total_races': overview['total_races'] or 0
}
c.execute("""
SELECT COUNT(*) as total,
SUM(CASE WHEN ordre_arrivee = 1 THEN 1 ELSE 0 END) as wins,
SUM(CASE WHEN ordre_arrivee IN (1,2,3) THEN 1 ELSE 0 END) as podiums,
AVG(CASE WHEN ordre_arrivee > 0 THEN cote_direct ELSE NULL END) as avg_cote
FROM pmu_partants
WHERE date_programme BETWEEN ? AND ? AND ordre_arrivee > 0
""", (start_str, end_str))
stats = c.fetchone()
total = stats['total'] or 0
wins = stats['wins'] or 0
podiums = stats['podiums'] or 0
report['stats'] = {
'total_partants': total,
'wins': wins,
'podiums': podiums,
'win_rate': round(wins / total * 100, 1) if total > 0 else 0,
'podium_rate': round(podiums / total * 100, 1) if total > 0 else 0,
'avg_cote': round(stats['avg_cote'], 2) if stats['avg_cote'] else 0
}
c.execute("""
SELECT driver, COUNT(*) as wins,
AVG(CASE WHEN ordre_arrivee > 0 THEN ordre_arrivee ELSE NULL END) as avg_pos
FROM pmu_partants
WHERE date_programme BETWEEN ? AND ? AND ordre_arrivee = 1
GROUP BY driver
ORDER BY wins DESC
LIMIT 10
""", (start_str, end_str))
report['top_jockeys'] = [dict(r) for r in c.fetchall()]
c.execute("""
SELECT discipline, COUNT(DISTINCT c.id) as races,
COUNT(*) as partants,
SUM(CASE WHEN ordre_arrivee = 1 THEN 1 ELSE 0 END) as wins
FROM pmu_courses c
LEFT JOIN pmu_partants p ON c.date_programme = p.date_programme
AND c.num_reunion = p.num_reunion AND c.num_course = p.num_course
WHERE c.date_programme BETWEEN ? AND ?
GROUP BY discipline
ORDER BY races DESC
""", (start_str, end_str))
report['by_discipline'] = [dict(r) for r in c.fetchall()]
c.execute("""
SELECT date_programme, COUNT(*) as partants,
SUM(CASE WHEN ordre_arrivee = 1 THEN 1 ELSE 0 END) as wins
FROM pmu_partants
WHERE date_programme BETWEEN ? AND ?
GROUP BY date_programme
ORDER BY date_programme
""", (start_str, end_str))
report['by_day'] = [dict(r) for r in c.fetchall()]
except Exception as e:
report['error'] = str(e)
finally:
conn.close()
return report
def get_monthly_report(year: int = None, month: int = None) -> dict:
"""
Génère un rapport mensuel
"""
if year is None:
year = datetime.now().year
if month is None:
month = datetime.now().month
start_date = datetime(year, month, 1)
if month == 12:
end_date = datetime(year + 1, 1, 1) - timedelta(days=1)
else:
end_date = datetime(year, month + 1, 1) - timedelta(days=1)
start_str = start_date.strftime('%Y-%m-%d')
end_str = end_date.strftime('%Y-%m-%d')
conn = get_db()
c = conn.cursor()
report = {
'year': year,
'month': month,
'start_date': start_str,
'end_date': end_str,
'generated_at': datetime.now().isoformat()
}
try:
c.execute("""
SELECT COUNT(DISTINCT date_programme) as race_days,
COUNT(DISTINCT id) as total_races,
COUNT(*) as total_partants
FROM pmu_courses c
LEFT JOIN pmu_partants p ON c.date_programme = p.date_programme
AND c.num_reunion = p.num_reunion AND c.num_course = p.num_course
WHERE c.date_programme BETWEEN ? AND ?
""", (start_str, end_str))
overview = c.fetchone()
report['overview'] = {
'race_days': overview['race_days'] or 0,
'total_races': overview['total_races'] or 0,
'total_partants': overview['total_partants'] or 0
}
c.execute("""
SELECT COUNT(*) as total,
SUM(CASE WHEN ordre_arrivee = 1 THEN 1 ELSE 0 END) as wins,
SUM(CASE WHEN ordre_arrivee IN (1,2,3) THEN 1 ELSE 0 END) as podiums,
AVG(CASE WHEN ordre_arrivee > 0 THEN cote_direct ELSE NULL END) as avg_cote
FROM pmu_partants
WHERE date_programme BETWEEN ? AND ? AND ordre_arrivee > 0
""", (start_str, end_str))
stats = c.fetchone()
total = stats['total'] or 0
wins = stats['wins'] or 0
podiums = stats['podiums'] or 0
report['stats'] = {
'total_partants': total,
'wins': wins,
'podiums': podiums,
'win_rate': round(wins / total * 100, 1) if total > 0 else 0,
'podium_rate': round(podiums / total * 100, 1) if total > 0 else 0,
'avg_cote': round(stats['avg_cote'], 2) if stats['avg_cote'] else 0
}
c.execute("""
SELECT discipline, COUNT(DISTINCT c.id) as races,
SUM(CASE WHEN ordre_arrivee = 1 THEN 1 ELSE 0 END) as wins,
AVG(CASE WHEN ordre_arrivee > 0 THEN ordre_arrivee ELSE NULL END) as avg_pos
FROM pmu_courses c
LEFT JOIN pmu_partants p ON c.date_programme = p.date_programme
AND c.num_reunion = p.num_reunion AND c.num_course = p.num_course
WHERE c.date_programme BETWEEN ? AND ?
GROUP BY discipline
ORDER BY races DESC
LIMIT 20
""", (start_str, end_str))
report['by_discipline'] = [dict(r) for r in c.fetchall()]
c.execute("""
SELECT date_programme, COUNT(*) as partants,
SUM(CASE WHEN ordre_arrivee = 1 THEN 1 ELSE 0 END) as wins
FROM pmu_partants
WHERE date_programme BETWEEN ? AND ?
GROUP BY date_programme
ORDER BY date_programme
""", (start_str, end_str))
report['daily_breakdown'] = [dict(r) for r in c.fetchall()]
except Exception as e:
report['error'] = str(e)
finally:
conn.close()
return report
def format_report_markdown(report: dict, report_type: str = 'daily') -> str:
"""
Formate un rapport en Markdown
"""
lines = []
if report_type == 'daily':
lines.append(f"# 📊 Rapport Quotidien - {report.get('date', 'N/A')}")
lines.append("")
lines.append(f"*Généré le {report.get('generated_at', '')}*")
lines.append("")
if 'error' in report:
lines.append(f"❌ Erreur: {report['error']}")
return "\n".join(lines)
races = report.get('races', {})
stats = report.get('stats', {})
lines.append("## Résumé")
lines.append(f"- 🏇 Courses: {races.get('count', 0)}")
lines.append(f"- 🎯 Partants: {stats.get('total_partants', 0)}")
lines.append(f"- 🏆 Victoires: {stats.get('wins', 0)} ({stats.get('win_rate', 0)}%)")
lines.append(f"- 🥉 Podiums: {stats.get('podiums', 0)} ({stats.get('podium_rate', 0)}%)")
lines.append("")
elif report_type == 'weekly':
lines.append(f"# 📈 Rapport Hebdomadaire")
lines.append(f"**{report.get('start_date', '')}** au **{report.get('end_date', '')}**")
lines.append("")
if 'error' in report:
lines.append(f"❌ Erreur: {report['error']}")
return "\n".join(lines)
overview = report.get('overview', {})
stats = report.get('stats', {})
lines.append("## Résumé")
lines.append(f"- Jours de course: {overview.get('race_days', 0)}")
lines.append(f"- Courses totales: {overview.get('total_races', 0)}")
lines.append(f"- Victoires: {stats.get('wins', 0)} ({stats.get('win_rate', 0)}%)")
lines.append(f"- Podiums: {stats.get('podiums', 0)} ({stats.get('podium_rate', 0)}%)")
lines.append(f"- Cote moyenne: {stats.get('avg_cote', 0)}")
lines.append("")
if report.get('top_jockeys'):
lines.append("## Top Jockeys")
for j in report['top_jockeys'][:5]:
lines.append(f"- {j['driver']}: {j['wins']} victoires")
lines.append("")
elif report_type == 'monthly':
month_name = ['Janvier', 'Février', 'Mars', 'Avril', 'Mai', 'Juin',
'Juillet', 'Août', 'Septembre', 'Octobre', 'Novembre', 'Décembre']
m = report.get('month', 1)
lines.append(f"# 📅 Rapport Mensuel - {month_name[m-1]} {report.get('year', '')}")
lines.append("")
if 'error' in report:
lines.append(f"❌ Erreur: {report['error']}")
return "\n".join(lines)
overview = report.get('overview', {})
stats = report.get('stats', {})
lines.append("## Résumé du Mois")
lines.append(f"- Jours de course: {overview.get('race_days', 0)}")
lines.append(f"- Courses: {overview.get('total_races', 0)}")
lines.append(f"- Partants: {overview.get('total_partants', 0)}")
lines.append(f"- Victoires: {stats.get('wins', 0)} ({stats.get('win_rate', 0)}%)")
lines.append(f"- Podiums: {stats.get('podiums', 0)} ({stats.get('podium_rate', 0)}%)")
lines.append(f"- Cote moyenne: {stats.get('avg_cote', 0)}")
lines.append("")
return "\n".join(lines)
if __name__ == '__main__':
import sys
if len(sys.argv) < 2:
print("Usage: python analytics_reports.py [daily|weekly|monthly] [date]")
sys.exit(1)
report_type = sys.argv[1]
date_arg = sys.argv[2] if len(sys.argv) > 2 else None
if report_type == 'daily':
report = get_daily_report(date_arg)
print(format_report_markdown(report, 'daily'))
elif report_type == 'weekly':
report = get_weekly_report(date_arg)
print(format_report_markdown(report, 'weekly'))
elif report_type == 'monthly':
if date_arg:
year, month = map(int, date_arg.split('-'))
else:
year, month = None, None
report = get_monthly_report(year, month)
print(format_report_markdown(report, 'monthly'))
else:
print(f"Type de rapport inconnu: {report_type}")