feat(devops): CI/CD + Docker + Monitoring infrastructure

- Multi-stage Dockerfile (builder+runner, <500MB target)
- docker-compose.yml: app(x4) + postgres + redis + prometheus + grafana + nginx
- .env.example with all required secrets (never hardcoded)
- requirements.txt with all dependencies including prometheus-client, alembic
- GitHub Actions CI: lint (flake8+bandit+safety) + tests + Docker build/push
- GitHub Actions CD: staging deploy -> smoke tests -> production deploy + rollback
- Alembic migration setup + initial PostgreSQL schema (001_initial_schema)
- SQLite→PostgreSQL data migration script
- Prometheus metrics module (HTTP, ML, DB, business metrics)
- Prometheus alert rules (5xx >1%, latency >2s, disk >80%, ML accuracy)
- Grafana dashboard (overview: req/s, p95, ML accuracy, error rate)
- Nginx reverse proxy config (HTTPS/TLS, rate limiting, security headers)
- Structured JSON logging module
- Automated daily DB backup script (pg_dump + 30-day retention)

Branch: feature/devops-cicd

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
DevOps Engineer
2026-04-25 17:32:02 +02:00
parent ed07c8a3d1
commit dce1e9b744
25 changed files with 2659 additions and 0 deletions

1
migrations/README Normal file
View File

@@ -0,0 +1 @@
Generic single-database configuration file

68
migrations/env.py Normal file
View File

@@ -0,0 +1,68 @@
"""Alembic env.py — Turf SaaS database migrations."""
import os
from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
# Alembic Config object — gives access to .ini values
config = context.config
# Set logging from config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Override sqlalchemy.url from environment variables
def get_db_url():
user = os.environ.get("POSTGRES_USER", "turf")
password = os.environ.get("POSTGRES_PASSWORD", "")
host = os.environ.get("POSTGRES_HOST", "localhost")
port = os.environ.get("POSTGRES_PORT", "5432")
db = os.environ.get("POSTGRES_DB", "turf_saas")
url = os.environ.get(
"DATABASE_URL", f"postgresql://{user}:{password}@{host}:{port}/{db}"
)
return url
config.set_main_option("sqlalchemy.url", get_db_url())
# No declarative model — we use raw DDL migrations
target_metadata = None
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode (no live DB connection needed)."""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode (uses live DB connection)."""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,180 @@
#!/usr/bin/env python3
"""
SQLite → PostgreSQL Data Migration Script
Migrates existing turf_saas.db data to PostgreSQL.
Usage:
python migrations/migrate_sqlite_to_postgres.py \
--sqlite /path/to/turf_saas.db \
--pg-url postgresql://turf:password@localhost:5432/turf_saas
Run AFTER alembic upgrade head.
"""
import argparse
import sqlite3
import sys
import os
import logging
from datetime import datetime
logger = logging.getLogger("migrate")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
# Tables to migrate (in order to respect FK constraints)
TABLES = [
"predictions",
"results",
"performance",
"scraping_logs",
"pmu_reunions",
"pmu_meteo",
"pmu_courses",
"pmu_partants",
"ml_predictions_cache",
"users",
"subscriptions",
"refresh_tokens",
]
def get_sqlite_conn(sqlite_path: str):
conn = sqlite3.connect(sqlite_path)
conn.row_factory = sqlite3.Row
return conn
def get_pg_conn(pg_url: str):
try:
import psycopg2
import psycopg2.extras
conn = psycopg2.connect(pg_url)
return conn
except ImportError:
logger.error("psycopg2 not installed. Run: pip install psycopg2-binary")
sys.exit(1)
def migrate_table(sqlite_conn, pg_conn, table: str, batch_size: int = 500) -> int:
"""Migrate a single table from SQLite to PostgreSQL. Returns row count."""
import psycopg2.extras
sqlite_cur = sqlite_conn.cursor()
pg_cur = pg_conn.cursor()
# Get rows from SQLite
try:
sqlite_cur.execute(f"SELECT * FROM {table}")
except Exception as e:
logger.warning(f" Skipping {table}: {e}")
return 0
rows = sqlite_cur.fetchall()
if not rows:
logger.info(f" {table}: empty — skipping")
return 0
# Get column names
columns = [desc[0] for desc in sqlite_cur.description]
# Exclude 'id' to let PostgreSQL generate SERIAL
non_id_columns = [c for c in columns if c != "id"]
if not non_id_columns:
logger.warning(f" {table}: no columns to insert")
return 0
placeholders = ", ".join(["%s"] * len(non_id_columns))
col_list = ", ".join(non_id_columns)
insert_sql = f"INSERT INTO {table} ({col_list}) VALUES ({placeholders}) ON CONFLICT DO NOTHING"
inserted = 0
batch = []
for row in rows:
row_dict = dict(row)
values = tuple(row_dict.get(c) for c in non_id_columns)
batch.append(values)
if len(batch) >= batch_size:
try:
pg_cur.executemany(insert_sql, batch)
pg_conn.commit()
inserted += len(batch)
except Exception as e:
pg_conn.rollback()
logger.error(f" {table} batch error: {e}")
batch = []
# Final batch
if batch:
try:
pg_cur.executemany(insert_sql, batch)
pg_conn.commit()
inserted += len(batch)
except Exception as e:
pg_conn.rollback()
logger.error(f" {table} final batch error: {e}")
# Sync PostgreSQL sequence to max id
try:
pg_cur.execute(f"SELECT MAX(id) FROM {table}")
max_id = pg_cur.fetchone()[0]
if max_id:
seq_name = f"{table}_id_seq"
pg_cur.execute(f"SELECT setval('{seq_name}', {max_id})")
pg_conn.commit()
except Exception:
pass # Table may not have a sequence
return inserted
def run_migration(sqlite_path: str, pg_url: str):
logger.info(f"=== SQLite → PostgreSQL Migration ===")
logger.info(f"SQLite: {sqlite_path}")
logger.info(f"PostgreSQL: {pg_url.split('@')[-1]}") # Hide credentials in log
logger.info(f"Started: {datetime.now().isoformat()}")
if not os.path.exists(sqlite_path):
logger.error(f"SQLite file not found: {sqlite_path}")
sys.exit(1)
sqlite_conn = get_sqlite_conn(sqlite_path)
pg_conn = get_pg_conn(pg_url)
total = 0
for table in TABLES:
logger.info(f" Migrating: {table}...")
count = migrate_table(sqlite_conn, pg_conn, table)
logger.info(f"{table}: {count} rows migrated")
total += count
sqlite_conn.close()
pg_conn.close()
logger.info(f"=== Migration complete: {total} total rows ===")
logger.info(f"Finished: {datetime.now().isoformat()}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Migrate SQLite → PostgreSQL")
parser.add_argument(
"--sqlite",
default=os.environ.get("DB_PATH", "/home/h3r7/turf_saas/turf_saas.db"),
help="Path to SQLite database file",
)
parser.add_argument(
"--pg-url",
default=os.environ.get("DATABASE_URL", ""),
help="PostgreSQL connection URL",
)
parser.add_argument("--batch-size", type=int, default=500)
args = parser.parse_args()
if not args.pg_url:
logger.error("--pg-url or DATABASE_URL env var required")
sys.exit(1)
run_migration(args.sqlite, args.pg_url)

26
migrations/script.py.mako Normal file
View File

@@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,345 @@
"""Initial schema — PostgreSQL migration from SQLite
Revision ID: 001_initial_schema
Revises: None
Create Date: 2026-04-25
Full migration of turf_saas SQLite schema to PostgreSQL.
Tables: predictions, results, performance, scraping_logs,
pmu_reunions, pmu_meteo, pmu_courses, pmu_partants,
ml_predictions_cache, users, subscriptions, refresh_tokens
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers
revision: str = "001_initial_schema"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ----------------------------------------------------------
# predictions
# ----------------------------------------------------------
op.create_table(
"predictions",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("date", sa.Text, nullable=False),
sa.Column("race_name", sa.Text),
sa.Column("race_hippodrome", sa.Text),
sa.Column("race_time", sa.Text),
sa.Column("horse_number", sa.Integer),
sa.Column("horse_name", sa.Text),
sa.Column("odds", sa.Numeric(10, 2)),
sa.Column("prediction_rank", sa.Integer),
sa.Column("source", sa.Text),
sa.Column("jockey", sa.Text),
sa.Column("odds_time", sa.Text),
sa.Column("odds_prev", sa.Numeric(10, 2)),
sa.Column("created_at", sa.TIMESTAMP, server_default=sa.text("NOW()")),
)
op.create_index("idx_predictions_date", "predictions", ["date"])
op.create_index("idx_predictions_horse", "predictions", ["horse_name"])
# ----------------------------------------------------------
# results
# ----------------------------------------------------------
op.create_table(
"results",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("date", sa.Text, nullable=False),
sa.Column("race_name", sa.Text),
sa.Column("race_hippodrome", sa.Text),
sa.Column("position", sa.Integer),
sa.Column("horse_name", sa.Text),
sa.Column("odds", sa.Numeric(10, 2)),
sa.Column("created_at", sa.TIMESTAMP, server_default=sa.text("NOW()")),
)
op.create_index("idx_results_date", "results", ["date"])
# ----------------------------------------------------------
# performance
# ----------------------------------------------------------
op.create_table(
"performance",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("prediction_date", sa.Text),
sa.Column("race_date", sa.Text),
sa.Column("horse_name", sa.Text),
sa.Column("predicted_rank", sa.Integer),
sa.Column("actual_position", sa.Integer),
sa.Column("hit", sa.Boolean),
sa.Column("created_at", sa.TIMESTAMP, server_default=sa.text("NOW()")),
)
# ----------------------------------------------------------
# scraping_logs
# ----------------------------------------------------------
op.create_table(
"scraping_logs",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("timestamp", sa.Text),
sa.Column("runtime_sec", sa.Numeric(10, 3)),
sa.Column("total_pages", sa.Integer),
sa.Column("url", sa.Text),
sa.Column("site", sa.Text),
sa.Column("status", sa.Text),
)
# ----------------------------------------------------------
# pmu_reunions
# ----------------------------------------------------------
op.create_table(
"pmu_reunions",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("date_programme", sa.Text, nullable=False),
sa.Column("num_reunion", sa.Integer, nullable=False),
sa.Column("num_externe", sa.Integer),
sa.Column("nature", sa.Text),
sa.Column("statut", sa.Text),
sa.Column("audience", sa.Text),
sa.Column("hippodrome_code", sa.Text),
sa.Column("hippodrome_court", sa.Text),
sa.Column("hippodrome_long", sa.Text),
sa.Column("pays_code", sa.Text),
sa.Column("pays_libelle", sa.Text),
sa.Column("created_at", sa.TIMESTAMP, server_default=sa.text("NOW()")),
sa.UniqueConstraint("date_programme", "num_reunion", name="uq_pmu_reunions"),
)
op.create_index("idx_reunions_date", "pmu_reunions", ["date_programme"])
# ----------------------------------------------------------
# pmu_meteo
# ----------------------------------------------------------
op.create_table(
"pmu_meteo",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("date_programme", sa.Text, nullable=False),
sa.Column("num_reunion", sa.Integer, nullable=False),
sa.Column("nebulositecode", sa.Text),
sa.Column("nebulosite_court", sa.Text),
sa.Column("nebulosite_long", sa.Text),
sa.Column("temperature", sa.Integer),
sa.Column("force_vent", sa.Integer),
sa.Column("direction_vent", sa.Text),
sa.Column("date_prevision", sa.BigInteger),
sa.Column("created_at", sa.TIMESTAMP, server_default=sa.text("NOW()")),
sa.UniqueConstraint("date_programme", "num_reunion", name="uq_pmu_meteo"),
)
# ----------------------------------------------------------
# pmu_courses
# ----------------------------------------------------------
op.create_table(
"pmu_courses",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("date_programme", sa.Text, nullable=False),
sa.Column("num_reunion", sa.Integer, nullable=False),
sa.Column("num_course", sa.Integer, nullable=False),
sa.Column("num_externe", sa.Integer),
sa.Column("libelle", sa.Text),
sa.Column("libelle_court", sa.Text),
sa.Column("heure_depart", sa.BigInteger),
sa.Column("heure_depart_str", sa.Text),
sa.Column("distance", sa.Integer),
sa.Column("distance_unit", sa.Text),
sa.Column("parcours", sa.Text),
sa.Column("discipline", sa.Text),
sa.Column("specialite", sa.Text),
sa.Column("type_piste", sa.Text),
sa.Column("corde", sa.Text),
sa.Column("condition_age", sa.Text),
sa.Column("condition_sexe", sa.Text),
sa.Column("categorie_particularite", sa.Text),
sa.Column("nb_declares_partants", sa.Integer),
sa.Column("montant_prix", sa.Integer),
sa.Column("montant_1er", sa.Integer),
sa.Column("montant_2eme", sa.Integer),
sa.Column("montant_3eme", sa.Integer),
sa.Column("montant_4eme", sa.Integer),
sa.Column("montant_5eme", sa.Integer),
sa.Column("created_at", sa.TIMESTAMP, server_default=sa.text("NOW()")),
sa.UniqueConstraint(
"date_programme", "num_reunion", "num_course", name="uq_pmu_courses"
),
)
op.create_index("idx_courses_date", "pmu_courses", ["date_programme"])
op.create_index("idx_courses_discipline", "pmu_courses", ["discipline"])
# ----------------------------------------------------------
# pmu_partants
# ----------------------------------------------------------
op.create_table(
"pmu_partants",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("date_programme", sa.Text, nullable=False),
sa.Column("num_reunion", sa.Integer, nullable=False),
sa.Column("num_course", sa.Integer, nullable=False),
sa.Column("num_pmu", sa.Integer),
sa.Column("id_cheval", sa.BigInteger),
sa.Column("nom", sa.Text),
sa.Column("age", sa.Integer),
sa.Column("sexe", sa.Text),
sa.Column("race", sa.Text),
sa.Column("robe", sa.Text),
sa.Column("pays", sa.Text),
sa.Column("place_corde", sa.Integer),
sa.Column("nom_pere", sa.Text),
sa.Column("nom_mere", sa.Text),
sa.Column("nom_pere_mere", sa.Text),
sa.Column("driver", sa.Text),
sa.Column("driver_change", sa.Boolean),
sa.Column("entraineur", sa.Text),
sa.Column("proprietaire", sa.Text),
sa.Column("eleveur", sa.Text),
sa.Column("oeilleres", sa.Text),
sa.Column("supplement", sa.Boolean),
sa.Column("handicap_valeur", sa.Numeric(8, 2)),
sa.Column("handicap_poids", sa.Numeric(8, 2)),
sa.Column("musique", sa.Text),
sa.Column("nombre_courses", sa.Integer),
sa.Column("nombre_victoires", sa.Integer),
sa.Column("nombre_places", sa.Integer),
sa.Column("cote_direct", sa.Numeric(10, 2)),
sa.Column("cote_reference", sa.Numeric(10, 2)),
sa.Column("tendance_cote", sa.Text),
sa.Column("favoris", sa.Boolean),
sa.Column("ordre_arrivee", sa.Integer),
sa.Column("tx_victoire", sa.Numeric(6, 3)),
sa.Column("tx_place", sa.Numeric(6, 3)),
sa.Column("forme_recente", sa.Text),
sa.Column("gains_carriere", sa.BigInteger),
sa.Column("gains_annee_en_cours", sa.BigInteger),
sa.Column("tendance_forme", sa.Text),
sa.Column("distance_cheval_prec", sa.Integer),
sa.Column("commentaire_apres_course", sa.Text),
sa.Column("pays_entrainement", sa.Text),
sa.Column("created_at", sa.TIMESTAMP, server_default=sa.text("NOW()")),
sa.UniqueConstraint(
"date_programme",
"num_reunion",
"num_course",
"num_pmu",
name="uq_pmu_partants",
),
)
op.create_index("idx_partants_date", "pmu_partants", ["date_programme"])
op.create_index("idx_partants_nom", "pmu_partants", ["nom"])
op.create_index("idx_partants_entraineur", "pmu_partants", ["entraineur"])
# ----------------------------------------------------------
# ml_predictions_cache
# ----------------------------------------------------------
op.create_table(
"ml_predictions_cache",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("date", sa.Text, nullable=False),
sa.Column("num_reunion", sa.Integer),
sa.Column("num_course", sa.Integer),
sa.Column("horse_name", sa.Text),
sa.Column("horse_number", sa.Integer),
sa.Column("odds", sa.Numeric(10, 2)),
sa.Column("prob_top1", sa.Numeric(6, 4)),
sa.Column("prob_top3", sa.Numeric(6, 4)),
sa.Column("ml_score", sa.Numeric(6, 4)),
sa.Column("recommendation", sa.Text),
sa.Column("is_value_bet", sa.Integer, server_default="0"),
sa.Column("is_outlier", sa.Integer, server_default="0"),
sa.Column("race_label", sa.Text),
sa.Column("race_name", sa.Text),
sa.Column("hippodrome", sa.Text),
sa.Column("discipline", sa.Text),
sa.Column("distance", sa.Numeric(8, 1)),
sa.Column("heure", sa.Text),
sa.Column("model_version", sa.Text, server_default="'xgboost_v1'"),
sa.Column("risque_label", sa.Text, server_default="'neutral'"),
sa.Column("risque_score", sa.Integer, server_default="50"),
sa.Column("created_at", sa.TIMESTAMP, server_default=sa.text("NOW()")),
sa.UniqueConstraint(
"date", "num_reunion", "num_course", "horse_name", name="uq_ml_cache"
),
)
op.create_index("idx_ml_cache_date", "ml_predictions_cache", ["date"])
# ----------------------------------------------------------
# users
# ----------------------------------------------------------
op.create_table(
"users",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("email", sa.Text, nullable=False, unique=True),
sa.Column("password_hash", sa.Text, nullable=False),
sa.Column(
"plan",
sa.Text,
nullable=False,
server_default="'free'",
),
sa.Column(
"created_at", sa.TIMESTAMP, nullable=False, server_default=sa.text("NOW()")
),
sa.Column("is_active", sa.Integer, nullable=False, server_default="1"),
sa.Column("daily_usage", sa.Integer, nullable=False, server_default="0"),
sa.Column("last_usage_date", sa.Text),
sa.CheckConstraint("plan IN ('free','premium','pro')", name="ck_users_plan"),
)
op.create_index("idx_users_email", "users", ["email"], unique=True)
# ----------------------------------------------------------
# subscriptions
# ----------------------------------------------------------
op.create_table(
"subscriptions",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("user_id", sa.BigInteger, sa.ForeignKey("users.id"), nullable=False),
sa.Column("plan", sa.Text, nullable=False),
sa.Column(
"start_date", sa.TIMESTAMP, nullable=False, server_default=sa.text("NOW()")
),
sa.Column("end_date", sa.TIMESTAMP),
sa.Column("stripe_customer_id", sa.Text),
sa.CheckConstraint(
"plan IN ('free','premium','pro')", name="ck_subscriptions_plan"
),
)
op.create_index("idx_subscriptions_user", "subscriptions", ["user_id"])
op.create_index("idx_subscriptions_stripe", "subscriptions", ["stripe_customer_id"])
# ----------------------------------------------------------
# refresh_tokens
# ----------------------------------------------------------
op.create_table(
"refresh_tokens",
sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=True),
sa.Column("user_id", sa.BigInteger, sa.ForeignKey("users.id"), nullable=False),
sa.Column("token_hash", sa.Text, nullable=False, unique=True),
sa.Column(
"created_at", sa.TIMESTAMP, nullable=False, server_default=sa.text("NOW()")
),
sa.Column("expires_at", sa.TIMESTAMP, nullable=False),
sa.Column("revoked", sa.Integer, nullable=False, server_default="0"),
)
op.create_index("idx_refresh_tokens_user", "refresh_tokens", ["user_id"])
op.create_index(
"idx_refresh_tokens_hash", "refresh_tokens", ["token_hash"], unique=True
)
def downgrade() -> None:
op.drop_table("refresh_tokens")
op.drop_table("subscriptions")
op.drop_table("users")
op.drop_table("ml_predictions_cache")
op.drop_table("pmu_partants")
op.drop_table("pmu_courses")
op.drop_table("pmu_meteo")
op.drop_table("pmu_reunions")
op.drop_table("scraping_logs")
op.drop_table("performance")
op.drop_table("results")
op.drop_table("predictions")