- Multi-stage Dockerfile (builder+runner, <500MB target) - docker-compose.yml: app(x4) + postgres + redis + prometheus + grafana + nginx - .env.example with all required secrets (never hardcoded) - requirements.txt with all dependencies including prometheus-client, alembic - GitHub Actions CI: lint (flake8+bandit+safety) + tests + Docker build/push - GitHub Actions CD: staging deploy -> smoke tests -> production deploy + rollback - Alembic migration setup + initial PostgreSQL schema (001_initial_schema) - SQLite→PostgreSQL data migration script - Prometheus metrics module (HTTP, ML, DB, business metrics) - Prometheus alert rules (5xx >1%, latency >2s, disk >80%, ML accuracy) - Grafana dashboard (overview: req/s, p95, ML accuracy, error rate) - Nginx reverse proxy config (HTTPS/TLS, rate limiting, security headers) - Structured JSON logging module - Automated daily DB backup script (pg_dump + 30-day retention) Branch: feature/devops-cicd Co-Authored-By: Paperclip <noreply@paperclip.ing>
206 lines
6.9 KiB
YAML
206 lines
6.9 KiB
YAML
# ============================================================
|
|
# CD Pipeline — deploy to staging then production
|
|
# Triggers on push to main/master
|
|
# ============================================================
|
|
|
|
name: CD
|
|
|
|
on:
|
|
push:
|
|
branches: [main, master]
|
|
workflow_dispatch:
|
|
inputs:
|
|
environment:
|
|
description: "Target environment"
|
|
required: true
|
|
default: staging
|
|
type: choice
|
|
options: [staging, production]
|
|
|
|
concurrency:
|
|
group: cd-${{ github.ref }}
|
|
cancel-in-progress: false # Never cancel an active deploy
|
|
|
|
env:
|
|
REGISTRY: ghcr.io
|
|
IMAGE_NAME: ${{ github.repository }}
|
|
|
|
jobs:
|
|
# ----------------------------------------------------------
|
|
# Job 1: Deploy to Staging
|
|
# ----------------------------------------------------------
|
|
deploy-staging:
|
|
name: Deploy → Staging
|
|
runs-on: ubuntu-latest
|
|
environment:
|
|
name: staging
|
|
url: https://staging.turf.h3r7.tech
|
|
permissions:
|
|
contents: read
|
|
packages: read
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Deploy to staging server via SSH
|
|
uses: appleboy/ssh-action@v1.0.3
|
|
with:
|
|
host: ${{ secrets.STAGING_HOST }}
|
|
username: ${{ secrets.STAGING_USER }}
|
|
key: ${{ secrets.STAGING_SSH_KEY }}
|
|
port: ${{ secrets.STAGING_PORT || 22 }}
|
|
script: |
|
|
set -e
|
|
echo "=== Deploying to STAGING ==="
|
|
cd /opt/turf-saas
|
|
|
|
# Pull latest code
|
|
git fetch origin
|
|
git checkout ${{ github.sha }}
|
|
|
|
# Pull latest Docker images
|
|
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
|
|
docker compose pull
|
|
|
|
# Run DB migrations
|
|
docker compose run --rm combined-api alembic upgrade head
|
|
|
|
# Rolling restart — zero downtime
|
|
docker compose up -d --no-deps --scale combined-api=2 combined-api
|
|
sleep 15
|
|
docker compose up -d --no-deps --scale combined-api=1 combined-api
|
|
|
|
# Restart other services
|
|
docker compose up -d --no-deps dashboard-api portal scheduler
|
|
|
|
# Health check
|
|
sleep 20
|
|
curl -f https://staging.turf.h3r7.tech/health || exit 1
|
|
|
|
echo "=== Staging deploy OK ==="
|
|
|
|
- name: Notify Staging Deploy
|
|
run: |
|
|
MSG="✅ Staging deployed: \`${{ github.repository }}\` commit=\`${{ github.sha }}\`"
|
|
curl -s -X POST "${{ secrets.SLACK_WEBHOOK_URL }}" \
|
|
-H 'Content-type: application/json' \
|
|
--data "{\"text\":\"${MSG}\"}" || true
|
|
curl -s -X POST \
|
|
"https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
|
-d chat_id="${{ secrets.TELEGRAM_CHAT_ID }}" \
|
|
-d text="${MSG}" || true
|
|
|
|
# ----------------------------------------------------------
|
|
# Job 2: Smoke Tests on Staging
|
|
# ----------------------------------------------------------
|
|
smoke-test-staging:
|
|
name: Smoke Tests on Staging
|
|
runs-on: ubuntu-latest
|
|
needs: deploy-staging
|
|
steps:
|
|
- name: Health endpoints check
|
|
run: |
|
|
BASE="https://staging.turf.h3r7.tech"
|
|
echo "Checking ${BASE}/health ..."
|
|
curl -f "${BASE}/health" -o /dev/null -s -w "%{http_code}\n"
|
|
echo "Checking ${BASE}/api/predictions ..."
|
|
curl -f "${BASE}/api/predictions" -o /dev/null -s -w "%{http_code}\n" || true
|
|
echo "Smoke tests passed"
|
|
|
|
# ----------------------------------------------------------
|
|
# Job 3: Deploy to Production (manual approval gate)
|
|
# ----------------------------------------------------------
|
|
deploy-production:
|
|
name: Deploy → Production
|
|
runs-on: ubuntu-latest
|
|
needs: smoke-test-staging
|
|
environment:
|
|
name: production
|
|
url: https://turf.h3r7.tech
|
|
permissions:
|
|
contents: read
|
|
packages: read
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Deploy to production server via SSH
|
|
uses: appleboy/ssh-action@v1.0.3
|
|
with:
|
|
host: ${{ secrets.PROD_HOST }}
|
|
username: ${{ secrets.PROD_USER }}
|
|
key: ${{ secrets.PROD_SSH_KEY }}
|
|
port: ${{ secrets.PROD_PORT || 22 }}
|
|
script: |
|
|
set -e
|
|
echo "=== Deploying to PRODUCTION ==="
|
|
cd /opt/turf-saas
|
|
|
|
# Backup current state
|
|
docker compose exec -T postgres pg_dumpall -U turf > /opt/backups/turf_saas_pre_deploy_$(date +%Y%m%d_%H%M%S).sql
|
|
|
|
# Pull latest code
|
|
git fetch origin
|
|
git checkout ${{ github.sha }}
|
|
|
|
# Pull latest Docker images
|
|
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
|
|
docker compose pull
|
|
|
|
# Run DB migrations
|
|
docker compose run --rm combined-api alembic upgrade head
|
|
|
|
# Rolling restart
|
|
docker compose up -d --no-deps --scale combined-api=2 combined-api
|
|
sleep 20
|
|
docker compose up -d --no-deps --scale combined-api=1 combined-api
|
|
docker compose up -d --no-deps dashboard-api portal scheduler
|
|
|
|
# Health check
|
|
sleep 30
|
|
curl -f https://turf.h3r7.tech/health || exit 1
|
|
|
|
# Clean old images
|
|
docker image prune -f
|
|
|
|
echo "=== Production deploy OK ==="
|
|
|
|
- name: Notify Production Deploy
|
|
run: |
|
|
MSG="🚀 Production deployed: \`${{ github.repository }}\` commit=\`${{ github.sha }}\`"
|
|
curl -s -X POST "${{ secrets.SLACK_WEBHOOK_URL }}" \
|
|
-H 'Content-type: application/json' \
|
|
--data "{\"text\":\"${MSG}\"}" || true
|
|
curl -s -X POST \
|
|
"https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
|
-d chat_id="${{ secrets.TELEGRAM_CHAT_ID }}" \
|
|
-d text="${MSG}" || true
|
|
|
|
# ----------------------------------------------------------
|
|
# Rollback job (triggered manually on failure)
|
|
# ----------------------------------------------------------
|
|
rollback:
|
|
name: Rollback Production
|
|
runs-on: ubuntu-latest
|
|
if: failure() && needs.deploy-production.result == 'failure'
|
|
needs: deploy-production
|
|
environment: production
|
|
steps:
|
|
- name: Rollback via SSH
|
|
uses: appleboy/ssh-action@v1.0.3
|
|
with:
|
|
host: ${{ secrets.PROD_HOST }}
|
|
username: ${{ secrets.PROD_USER }}
|
|
key: ${{ secrets.PROD_SSH_KEY }}
|
|
script: |
|
|
cd /opt/turf-saas
|
|
git checkout HEAD~1
|
|
docker compose up -d --force-recreate
|
|
echo "Rollback complete"
|
|
|
|
- name: Notify Rollback
|
|
run: |
|
|
curl -s -X POST "${{ secrets.SLACK_WEBHOOK_URL }}" \
|
|
-H 'Content-type: application/json' \
|
|
--data '{"text":"⚠️ Production ROLLED BACK due to deploy failure!"}' || true
|