Files
turf_saas/docker-compose.yml
DevOps Engineer dce1e9b744 feat(devops): CI/CD + Docker + Monitoring infrastructure
- Multi-stage Dockerfile (builder+runner, <500MB target)
- docker-compose.yml: app(x4) + postgres + redis + prometheus + grafana + nginx
- .env.example with all required secrets (never hardcoded)
- requirements.txt with all dependencies including prometheus-client, alembic
- GitHub Actions CI: lint (flake8+bandit+safety) + tests + Docker build/push
- GitHub Actions CD: staging deploy -> smoke tests -> production deploy + rollback
- Alembic migration setup + initial PostgreSQL schema (001_initial_schema)
- SQLite→PostgreSQL data migration script
- Prometheus metrics module (HTTP, ML, DB, business metrics)
- Prometheus alert rules (5xx >1%, latency >2s, disk >80%, ML accuracy)
- Grafana dashboard (overview: req/s, p95, ML accuracy, error rate)
- Nginx reverse proxy config (HTTPS/TLS, rate limiting, security headers)
- Structured JSON logging module
- Automated daily DB backup script (pg_dump + 30-day retention)

Branch: feature/devops-cicd

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-25 17:32:02 +02:00

251 lines
7.5 KiB
YAML

version: "3.9"
# ============================================================
# H3R7Tech Turf SaaS — Docker Compose
# Services: app (x4) + postgres + redis + prometheus + grafana + nginx
# ============================================================
x-app-common: &app-common
build:
context: .
dockerfile: Dockerfile
target: runner
restart: unless-stopped
env_file:
- .env
depends_on:
postgres:
condition: service_healthy
networks:
- turf-net
volumes:
- ml-models:/app/data/models
- app-logs:/app/logs
services:
# ----------------------------------------------------------
# PostgreSQL — primary database
# ----------------------------------------------------------
postgres:
image: postgres:16-alpine
restart: unless-stopped
environment:
POSTGRES_DB: ${POSTGRES_DB:-turf_saas}
POSTGRES_USER: ${POSTGRES_USER:-turf}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
volumes:
- postgres-data:/var/lib/postgresql/data
- ./infra/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql:ro
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-turf} -d ${POSTGRES_DB:-turf_saas}"]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
networks:
- turf-net
ports:
- "127.0.0.1:5432:5432"
# ----------------------------------------------------------
# Redis — caching & session store
# ----------------------------------------------------------
redis:
image: redis:7-alpine
restart: unless-stopped
command: redis-server --appendonly yes --requirepass ${REDIS_PASSWORD}
volumes:
- redis-data:/data
healthcheck:
test: ["CMD", "redis-cli", "--pass", "${REDIS_PASSWORD}", "ping"]
interval: 10s
timeout: 5s
retries: 3
networks:
- turf-net
ports:
- "127.0.0.1:6379:6379"
# ----------------------------------------------------------
# Combined API — main predictions + ideas API (port 8790)
# ----------------------------------------------------------
combined-api:
<<: *app-common
container_name: turf-combined-api
command: gunicorn --bind 0.0.0.0:8790 --workers 2 --timeout 120 --access-logfile - --error-logfile - combined_api:app
ports:
- "127.0.0.1:8790:8790"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8790/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
environment:
PORT: 8790
SERVICE_NAME: combined-api
# ----------------------------------------------------------
# Dashboard API — analytics & ML scoring (port 8791)
# ----------------------------------------------------------
dashboard-api:
<<: *app-common
container_name: turf-dashboard-api
command: gunicorn --bind 0.0.0.0:8791 --workers 2 --timeout 120 --access-logfile - --error-logfile - dashboard_api:app
ports:
- "127.0.0.1:8791:8791"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8791/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
environment:
PORT: 8791
SERVICE_NAME: dashboard-api
# ----------------------------------------------------------
# Portal Server — frontend portal (port 8792)
# ----------------------------------------------------------
portal:
<<: *app-common
container_name: turf-portal
command: gunicorn --bind 0.0.0.0:8792 --workers 2 --timeout 60 --access-logfile - --error-logfile - portal_server:app
ports:
- "127.0.0.1:8792:8792"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8792/"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
environment:
PORT: 8792
SERVICE_NAME: portal
# ----------------------------------------------------------
# Scheduler — background jobs (no external port)
# ----------------------------------------------------------
scheduler:
<<: *app-common
container_name: turf-scheduler
command: python turf_scheduler.py
environment:
SERVICE_NAME: scheduler
# ----------------------------------------------------------
# Prometheus — metrics scraping
# ----------------------------------------------------------
prometheus:
image: prom/prometheus:v2.53.4
restart: unless-stopped
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=30d"
- "--web.enable-lifecycle"
volumes:
- ./infra/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./infra/prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro
- prometheus-data:/prometheus
ports:
- "127.0.0.1:9090:9090"
networks:
- turf-net
healthcheck:
test: ["CMD", "wget", "-q", "--tries=1", "--spider", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
# ----------------------------------------------------------
# Grafana — dashboards
# ----------------------------------------------------------
grafana:
image: grafana/grafana:11.5.2
restart: unless-stopped
environment:
GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD}
GF_USERS_ALLOW_SIGN_UP: "false"
GF_SERVER_DOMAIN: ${DOMAIN:-localhost}
GF_SERVER_ROOT_URL: https://${DOMAIN:-localhost}/grafana/
GF_SERVER_SERVE_FROM_SUB_PATH: "true"
volumes:
- grafana-data:/var/lib/grafana
- ./infra/grafana/provisioning:/etc/grafana/provisioning:ro
- ./infra/grafana/dashboards:/var/lib/grafana/dashboards:ro
ports:
- "127.0.0.1:3000:3000"
networks:
- turf-net
depends_on:
- prometheus
# ----------------------------------------------------------
# Nginx — reverse proxy + TLS termination
# ----------------------------------------------------------
nginx:
image: nginx:1.27-alpine
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./infra/nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./infra/nginx/conf.d:/etc/nginx/conf.d:ro
- certbot-www:/var/www/certbot:ro
- certbot-certs:/etc/letsencrypt:ro
networks:
- turf-net
depends_on:
- combined-api
- dashboard-api
- portal
healthcheck:
test: ["CMD", "nginx", "-t"]
interval: 60s
timeout: 10s
retries: 3
# ----------------------------------------------------------
# Certbot — Let's Encrypt TLS certificate renewal
# ----------------------------------------------------------
certbot:
image: certbot/certbot:latest
restart: "no"
volumes:
- certbot-www:/var/www/certbot
- certbot-certs:/etc/letsencrypt
command: certonly --webroot --webroot-path=/var/www/certbot --email ${ADMIN_EMAIL} --agree-tos --no-eff-email -d ${DOMAIN}
networks:
- turf-net
# ============================================================
# Named volumes — persistent storage
# ============================================================
volumes:
postgres-data:
driver: local
redis-data:
driver: local
ml-models:
driver: local
app-logs:
driver: local
prometheus-data:
driver: local
grafana-data:
driver: local
certbot-www:
driver: local
certbot-certs:
driver: local
# ============================================================
# Network
# ============================================================
networks:
turf-net:
driver: bridge