From 61236e864c6f85e7e86802d343dbc48525e0b9b7 Mon Sep 17 00:00:00 2001 From: Bendt Date: Mon, 9 Feb 2026 10:58:30 -0500 Subject: [PATCH] Add infrastructure management: backup scripts and documentation --- infrastructure/README.md | 190 ++++++++++++++++++ .../scripts/backup-compose-files.sh | 78 +++++++ .../scripts/backup-critical-data.sh | 139 +++++++++++++ 3 files changed, 407 insertions(+) create mode 100644 infrastructure/README.md create mode 100755 infrastructure/scripts/backup-compose-files.sh create mode 100755 infrastructure/scripts/backup-critical-data.sh diff --git a/infrastructure/README.md b/infrastructure/README.md new file mode 100644 index 0000000..383d653 --- /dev/null +++ b/infrastructure/README.md @@ -0,0 +1,190 @@ +# Infrastructure Management + +This directory contains backups, scripts, and documentation for managing the homelab infrastructure. + +## Directory Structure + +``` +infrastructure/ +├── compose/ # Docker Compose files backed up from cluster +├── stacks/ # Docker Stack definitions +├── traefik/ # Traefik configuration backups +├── scripts/ # Management and backup scripts +├── backups/ # Critical data backups (created by scripts) +└── BACKUP_MANIFEST.md # Auto-generated backup manifest +``` + +## Quick Start + +### 1. Backup Compose Files + +Run this to back up all compose configurations from the cluster: + +```bash +./scripts/backup-compose-files.sh +``` + +This will: +- Copy all Dokploy compose files from `/etc/dokploy/compose/` +- Copy Traefik configuration +- Copy stack files +- Generate a backup manifest + +### 2. Backup Critical Data + +Run this to back up databases and application data: + +```bash +./scripts/backup-critical-data.sh +``` + +This will: +- Backup PostgreSQL databases (Dokploy, Immich, BewCloud) +- Backup MariaDB databases (Pancake) +- Backup application volumes (Memos, Gitea) +- Clean up old backups (30+ days) +- Generate a backup report + +## Automated Backups + +### Set up cron jobs on your local machine: + +```bash +# Edit crontab +crontab -e + +# Add these lines: +# Backup compose files daily at 2 AM +0 2 * * * cd /Users/timothy.bendt/developer/cloud-compose/infrastructure/scripts && ./backup-compose-files.sh >> /var/log/homelab-backup.log 2>&1 + +# Backup critical data daily at 3 AM +0 3 * * * cd /Users/timothy.bendt/developer/cloud-compose/infrastructure/scripts && ./backup-critical-data.sh >> /var/log/homelab-backup.log 2>&1 +``` + +### Or run manually whenever you make changes: + +```bash +# After modifying any service +cd /Users/timothy.bendt/developer/cloud-compose +./infrastructure/scripts/backup-compose-files.sh +git add infrastructure/ +git commit -m "Backup infrastructure configs" +git push +``` + +## Restore Procedures + +### Restore a Compose File + +1. Copy the compose file from `infrastructure/compose//docker-compose.yml` +2. Upload via Dokploy UI +3. Deploy + +### Restore a Database + +```bash +# Example: Restore Dokploy database +scp infrastructure/backups/dokploy-postgres-dokploy-2026-02-09.sql ubuntu@192.168.2.130:/tmp/ +ssh ubuntu@192.168.2.130 "docker exec -i dokploy-postgres.1. psql -U postgres dokploy < /tmp/dokploy-postgres-dokploy-2026-02-09.sql" +``` + +### Restore Volume Data + +```bash +# Example: Restore Memos data +scp infrastructure/backups/bewcloud-memos-ssogxn-memos-data-2026-02-09.tar.gz ubuntu@192.168.2.130:/tmp/ +ssh ubuntu@192.168.2.130 "docker run --rm -v bewcloud-memos-ssogxn_memos_data:/data -v /tmp:/backup alpine sh -c 'cd /data && tar xzf /backup/bewcloud-memos-ssogxn-memos-data-2026-02-09.tar.gz'" +``` + +## SSH Access + +### Controller (tpi-n1) +```bash +ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.130 +``` + +### Worker (tpi-n2) +```bash +ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.19 +``` + +### NAS (node-nas) +```bash +ssh tim@192.168.2.18 +``` + +## Useful Commands + +### Check Service Status +```bash +ssh ubuntu@192.168.2.130 "docker service ls" +``` + +### View Service Logs +```bash +ssh ubuntu@192.168.2.130 "docker service logs --tail 100 -f" +``` + +### Scale a Service +```bash +ssh ubuntu@192.168.2.130 "docker service scale =" +``` + +### Check Node Status +```bash +ssh ubuntu@192.168.2.130 "docker node ls" +``` + +## Web Interfaces + +| Service | URL | Purpose | +|---------|-----|---------| +| Dokploy | http://192.168.2.130:3000 | Container management | +| Swarmpit | http://192.168.2.130:888 | Swarm monitoring | +| Traefik | http://192.168.2.130:8080 | Reverse proxy dashboard | +| MinIO | http://192.168.2.18:9001 | Object storage console | + +## Backup Storage + +### Local +Backups are stored in `infrastructure/backups/` with date stamps. + +### Offsite (Recommended) +Consider copying backups to: +- MinIO bucket (`backups/`) +- External hard drive +- Cloud storage (AWS S3, etc.) +- Another server + +Example: +```bash +# Copy to MinIO +mc cp infrastructure/backups/* minio/backups/ +``` + +## Maintenance Checklist + +### Daily +- [ ] Check backup logs for errors +- [ ] Verify critical services are running + +### Weekly +- [ ] Review Swarmpit dashboard +- [ ] Check disk usage on all nodes +- [ ] Review backup integrity + +### Monthly +- [ ] Test restore procedures +- [ ] Update documentation +- [ ] Review and update services +- [ ] Clean up unused images/volumes + +### Quarterly +- [ ] Full disaster recovery drill +- [ ] Security audit +- [ ] Update base images +- [ ] Review access controls + +--- + +*Infrastructure Management Guide - February 2026* diff --git a/infrastructure/scripts/backup-compose-files.sh b/infrastructure/scripts/backup-compose-files.sh new file mode 100755 index 0000000..41b88d3 --- /dev/null +++ b/infrastructure/scripts/backup-compose-files.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# backup-compose-files.sh +# Backs up all compose files from the cluster to local repo + +set -e + +BACKUP_DIR="/Users/timothy.bendt/developer/cloud-compose/infrastructure" +CONTROLLER="ubuntu@192.168.2.130" +DATE=$(date +%Y-%m-%d) + +echo "📦 Backing up compose files from cluster..." +echo "Date: $DATE" +echo "" + +# Backup Dokploy compose files +echo "📁 Backing up Dokploy compose files..." +ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "find /etc/dokploy/compose -name 'docker-compose.yml' -type f" | while read file; do + # Get the project name from the path + project=$(echo $file | grep -oP 'compose/\K[^/]+' | head -1) + if [ ! -z "$project" ]; then + echo " - $project" + mkdir -p "$BACKUP_DIR/compose/$project" + scp -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:"$file" "$BACKUP_DIR/compose/$project/" + fi +done + +# Backup Traefik configuration +echo "📁 Backing up Traefik configuration..." +mkdir -p "$BACKUP_DIR/traefik" +scp -r -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:/etc/dokploy/traefik/* "$BACKUP_DIR/traefik/" 2>/dev/null || echo " (No Traefik config or permission denied)" + +# Backup local compose files from home directory +echo "📁 Backing up local compose files..." +scp -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:~/minio-stack.yml "$BACKUP_DIR/stacks/" 2>/dev/null || true + +# Create manifest +echo "📄 Creating backup manifest..." +cat > "$BACKUP_DIR/BACKUP_MANIFEST.md" << EOF +# Infrastructure Backup Manifest + +**Backup Date:** $DATE +**Source:** $CONTROLLER + +## Contents + +### Compose Files +All Docker Compose files from Dokploy-managed projects: +\`\`\` +$(ls -1 $BACKUP_DIR/compose/) +\`\`\` + +### Stack Files +Standalone stack definitions: +\`\`\` +$(ls -1 $BACKUP_DIR/stacks/ 2>/dev/null || echo "None") +\`\`\` + +### Traefik Configuration +Reverse proxy configuration backed up to ./traefik/ + +## Restoration +To restore these configurations: +1. Copy compose files to /etc/dokploy/compose//code/ +2. Copy stack files to ~/ on the controller +3. Copy Traefik config to /etc/dokploy/traefik/ +4. Redeploy via Dokploy UI or docker stack deploy + +## Services Backed Up +$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker service ls --format '- {{.Name}}'" 2>/dev/null || echo "Unable to fetch service list") + +--- +Generated: $(date) +EOF + +echo "" +echo "✅ Backup complete!" +echo "📁 Location: $BACKUP_DIR/" +echo "📄 Manifest: $BACKUP_DIR/BACKUP_MANIFEST.md" diff --git a/infrastructure/scripts/backup-critical-data.sh b/infrastructure/scripts/backup-critical-data.sh new file mode 100755 index 0000000..ffc08af --- /dev/null +++ b/infrastructure/scripts/backup-critical-data.sh @@ -0,0 +1,139 @@ +#!/bin/bash +# backup-critical-data.sh +# Backs up critical databases and data from the homelab + +set -e + +BACKUP_DIR="/Users/timothy.bendt/developer/cloud-compose/backups" +CONTROLLER="ubuntu@192.168.2.130" +NAS="tim@192.168.2.18" +DATE=$(date +%Y-%m-%d) +RETENTION_DAYS=30 + +echo "💾 Starting critical data backup..." +echo "Date: $DATE" +echo "" + +mkdir -p "$BACKUP_DIR" + +# Function to backup a PostgreSQL database +backup_postgres() { + local service=$1 + local db_name=$2 + local output_file="$BACKUP_DIR/${service}-${db_name}-${DATE}.sql" + + echo " 📊 Backing up PostgreSQL: $service/$db_name..." + + # Find the container + container=$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker ps -q -f name=$service" | head -1) + + if [ ! -z "$container" ]; then + ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker exec $container pg_dump -U postgres $db_name" > "$output_file" + echo " ✅ Saved to: $output_file" + else + echo " ❌ Container not found: $service" + fi +} + +# Function to backup a MariaDB database +backup_mariadb() { + local service=$1 + local db_name=$2 + local output_file="$BACKUP_DIR/${service}-${db_name}-${DATE}.sql" + + echo " 📊 Backing up MariaDB: $service/$db_name..." + + container=$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker ps -q -f name=$service" | head -1) + + if [ ! -z "$container" ]; then + ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker exec $container mariadb-dump -u root -p'3QU5eA&U^Y&3DQm6' $db_name" > "$output_file" 2>/dev/null || \ + echo " ⚠️ Could not backup (check credentials)" + else + echo " ❌ Container not found: $service" + fi +} + +# Function to backup volume data +backup_volume() { + local service=$1 + local volume=$2 + local output_file="$BACKUP_DIR/${service}-data-${DATE}.tar.gz" + + echo " 💿 Backing up volume: $volume..." + + ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker run --rm -v $volume:/data -v /tmp:/backup alpine tar czf /backup/${service}-backup.tar.gz -C /data ." + scp -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:/tmp/${service}-backup.tar.gz "$output_file" + ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "rm -f /tmp/${service}-backup.tar.gz" + + echo " ✅ Saved to: $output_file" +} + +echo "📦 Backing up databases..." + +# Dokploy database +backup_postgres "dokploy-postgres" "dokploy" + +# Immich database +backup_postgres "immich3-compose-ubyhe9-immich-database" "immich" + +# BewCloud database +backup_postgres "bewcloud-postgres-in40hh" "bewcloud" + +# Pancake database +backup_mariadb "bendtstudio-pancake-bzgfpc" "pancake" + +echo "" +echo "📦 Backing up application data..." + +# Memos data +backup_volume "bewcloud-memos-ssogxn-memos" "bewcloud-memos-ssogxn_memos_data" + +# Gitea data +echo " 💿 Backing up Gitea repositories..." +container=$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker ps -q -f name=gitea" | head -1) +if [ ! -z "$container" ]; then + ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker exec $container tar czf /tmp/gitea-backup.tar.gz -C /data ." + scp -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:/tmp/gitea-backup.tar.gz "$BACKUP_DIR/gitea-data-${DATE}.tar.gz" + ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "rm -f /tmp/gitea-backup.tar.gz" + echo " ✅ Saved to: $BACKUP_DIR/gitea-data-${DATE}.tar.gz" +fi + +echo "" +echo "🧹 Cleaning up old backups (older than $RETENTION_DAYS days)..." +find "$BACKUP_DIR" -type f -mtime +$RETENTION_DAYS -delete +echo " ✅ Cleanup complete" + +echo "" +echo "📄 Creating backup report..." +cat > "$BACKUP_DIR/BACKUP_REPORT-${DATE}.md" << EOF +# Backup Report - $DATE + +## Backup Location +$BACKUP_DIR + +## Files Backed Up +\`\`\` +$(ls -lh $BACKUP_DIR/*-${DATE}* 2>/dev/null || echo "No files found") +\`\`\` + +## Services Status +$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker service ls --format 'table {{.Name}}\t{{.Replicas}}\t{{.Image}}'" 2>/dev/null || echo "Unable to fetch status") + +## Next Steps +1. Verify backup files are valid +2. Copy backups to offsite storage (MinIO, external drive, etc.) +3. Test restore procedure periodically + +## Retention Policy +Backups older than $RETENTION_DAYS days are automatically deleted. + +--- +Generated: $(date) +EOF + +echo "" +echo "✅ Backup complete!" +echo "📁 Location: $BACKUP_DIR/" +echo "📄 Report: $BACKUP_DIR/BACKUP_REPORT-${DATE}.md" +echo "" +echo "⚠️ Remember to copy backups to offsite storage!"