Compare commits
20 Commits
8d23a6f576
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b206bb0049 | ||
|
|
4cd7b26c58 | ||
|
|
7d59ead221 | ||
|
|
0d710e82ee | ||
|
|
6c1eebd5e5 | ||
|
|
7eff53c37b | ||
|
|
52d7378ec5 | ||
|
|
966bc22a71 | ||
|
|
5ebec1987c | ||
|
|
722c83be73 | ||
|
|
72d5b22e24 | ||
|
|
e7bd3fde6c | ||
|
|
3572e36ab8 | ||
|
|
61236e864c | ||
|
|
fab217c259 | ||
|
|
923e1f2389 | ||
|
|
9c697104d6 | ||
|
|
a1a41ba696 | ||
|
|
fd2d2dde13 | ||
|
|
15c9dee474 |
403
ACTION_PLAN.md
Normal file
403
ACTION_PLAN.md
Normal file
@@ -0,0 +1,403 @@
|
|||||||
|
# Home Lab Action Plan
|
||||||
|
|
||||||
|
## Phase 1: Critical Fixes (Do This Week)
|
||||||
|
|
||||||
|
### 1.1 Fix Failing Services
|
||||||
|
|
||||||
|
**bewcloud-memos (Restarting Loop)**
|
||||||
|
```bash
|
||||||
|
# SSH to controller
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.130
|
||||||
|
|
||||||
|
# Check what's wrong
|
||||||
|
docker service logs bewcloud-memos-ssogxn-memos --tail 100
|
||||||
|
|
||||||
|
# Common fixes:
|
||||||
|
# If database connection issue:
|
||||||
|
docker service update --env-add "MEMOS_DB_HOST=correct-hostname" bewcloud-memos-ssogxn-memos
|
||||||
|
|
||||||
|
# If it keeps failing, try recreating:
|
||||||
|
docker service rm bewcloud-memos-ssogxn-memos
|
||||||
|
# Then redeploy via Dokploy UI
|
||||||
|
```
|
||||||
|
|
||||||
|
**bendtstudio-webstatic (Rollback Paused)**
|
||||||
|
```bash
|
||||||
|
# Check the error
|
||||||
|
docker service ps bendtstudio-webstatic-iq9evl --no-trunc
|
||||||
|
|
||||||
|
# Force update to retry
|
||||||
|
docker service update --force bendtstudio-webstatic-iq9evl
|
||||||
|
|
||||||
|
# If that fails, inspect the image
|
||||||
|
docker service inspect bendtstudio-webstatic-iq9evl --format '{{.Spec.TaskTemplate.ContainerSpec.Image}}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**syncthing (Stopped)**
|
||||||
|
```bash
|
||||||
|
# Option A: Start it if you need it
|
||||||
|
docker service scale syncthing=1
|
||||||
|
|
||||||
|
# Option B: Remove it if not needed
|
||||||
|
docker service rm syncthing
|
||||||
|
# Also remove the volume if no longer needed
|
||||||
|
docker volume rm cloud-syncthing-i2rpwr_syncthing_config
|
||||||
|
```
|
||||||
|
|
||||||
|
### 1.2 Clean Up Unused Resources
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Remove unused volumes (reclaim ~595MB)
|
||||||
|
docker volume prune
|
||||||
|
|
||||||
|
# Remove unused images
|
||||||
|
docker image prune -a
|
||||||
|
|
||||||
|
# System-wide cleanup
|
||||||
|
docker system prune -a --volumes
|
||||||
|
```
|
||||||
|
|
||||||
|
### 1.3 Document Current State
|
||||||
|
|
||||||
|
Take screenshots of:
|
||||||
|
- Dokploy UI (all projects)
|
||||||
|
- Swarmpit dashboard
|
||||||
|
- Traefik dashboard (http://192.168.2.130:8080)
|
||||||
|
- MinIO console (http://192.168.2.18:9001)
|
||||||
|
- Gitea repositories
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 2: Configuration Backup (Do This Week)
|
||||||
|
|
||||||
|
### 2.1 Create Git Repository for Infrastructure
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On the controller node:
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.130
|
||||||
|
|
||||||
|
# Create a backup directory
|
||||||
|
mkdir -p ~/infrastructure-backup/$(date +%Y-%m-%d)
|
||||||
|
cd ~/infrastructure-backup/$(date +%Y-%m-%d)
|
||||||
|
|
||||||
|
# Copy all compose files
|
||||||
|
cp -r /etc/dokploy/compose ./dokploy-compose
|
||||||
|
cp -r /etc/dokploy/traefik ./traefik-config
|
||||||
|
cp ~/minio-stack.yml ./
|
||||||
|
|
||||||
|
# Export service configs
|
||||||
|
mkdir -p ./service-configs
|
||||||
|
docker service ls -q | while read service; do
|
||||||
|
docker service inspect "$service" > "./service-configs/${service}.json"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Export stack configs
|
||||||
|
docker stack ls -q | while read stack; do
|
||||||
|
docker stack ps "$stack" > "./service-configs/${stack}-tasks.txt"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Create a summary
|
||||||
|
cat > README.txt << EOF
|
||||||
|
Infrastructure Backup - $(date)
|
||||||
|
Cluster: Docker Swarm with Dokploy
|
||||||
|
Nodes: 3 (tpi-n1, tpi-n2, node-nas)
|
||||||
|
Services: $(docker service ls -q | wc -l) services
|
||||||
|
Stacks: $(docker stack ls -q | wc -l) stacks
|
||||||
|
|
||||||
|
See HOMELAB_AUDIT.md for full documentation.
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Create tar archive
|
||||||
|
cd ..
|
||||||
|
tar -czf infrastructure-$(date +%Y-%m-%d).tar.gz $(date +%Y-%m-%d)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 Commit to Gitea
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone your infrastructure repo (create if needed)
|
||||||
|
# Replace with your actual Gitea URL
|
||||||
|
git clone http://gitea.bendtstudio.com:3000/sirtimbly/homelab-configs.git
|
||||||
|
cd homelab-configs
|
||||||
|
|
||||||
|
# Copy backed up configs
|
||||||
|
cp -r ~/infrastructure-backup/$(date +%Y-%m-%d)/* .
|
||||||
|
|
||||||
|
# Organize by service
|
||||||
|
mkdir -p {stacks,compose,dokploy,traefik,docs}
|
||||||
|
mv dokploy-compose/* compose/ 2>/dev/null || true
|
||||||
|
mv traefik-config/* traefik/ 2>/dev/null || true
|
||||||
|
mv minio-stack.yml stacks/
|
||||||
|
mv service-configs/* docs/ 2>/dev/null || true
|
||||||
|
|
||||||
|
# Commit
|
||||||
|
git add .
|
||||||
|
git commit -m "Initial infrastructure backup - $(date +%Y-%m-%d)
|
||||||
|
|
||||||
|
- All Dokploy compose files
|
||||||
|
- Traefik configuration
|
||||||
|
- MinIO stack definition
|
||||||
|
- Service inspection exports
|
||||||
|
- Task history exports
|
||||||
|
|
||||||
|
Services backed up:
|
||||||
|
$(docker service ls --format '- {{.Name}}' | sort)
|
||||||
|
|
||||||
|
git push origin main
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 3: Security Hardening (Do Next Week)
|
||||||
|
|
||||||
|
### 3.1 Remove Exposed Credentials
|
||||||
|
|
||||||
|
**Problem:** Services have passwords in environment variables visible in Docker configs
|
||||||
|
|
||||||
|
**Solution:** Use Docker secrets or Dokploy environment variables
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: Securing MinIO
|
||||||
|
# Instead of having password in compose file, use Docker secret:
|
||||||
|
|
||||||
|
echo "your-minio-password" | docker secret create minio_root_password -
|
||||||
|
|
||||||
|
# Then in compose:
|
||||||
|
# environment:
|
||||||
|
# MINIO_ROOT_PASSWORD_FILE: /run/secrets/minio_root_password
|
||||||
|
# secrets:
|
||||||
|
# - minio_root_password
|
||||||
|
```
|
||||||
|
|
||||||
|
**Action items:**
|
||||||
|
1. List all services with exposed passwords:
|
||||||
|
```bash
|
||||||
|
docker service ls -q | xargs -I {} docker service inspect {} --format '{{.Spec.Name}}: {{range .Spec.TaskTemplate.ContainerSpec.Env}}{{.}} {{end}}' | grep -i password
|
||||||
|
```
|
||||||
|
|
||||||
|
2. For each service, create a plan to move credentials to:
|
||||||
|
- Docker secrets (best for swarm)
|
||||||
|
- Environment files (easier to manage)
|
||||||
|
- Dokploy UI environment variables
|
||||||
|
|
||||||
|
3. Update compose files and redeploy
|
||||||
|
|
||||||
|
### 3.2 Update Default Passwords
|
||||||
|
|
||||||
|
Check for default/weak passwords:
|
||||||
|
- Dokploy (if still default)
|
||||||
|
- MinIO
|
||||||
|
- Gitea admin
|
||||||
|
- Technitium DNS
|
||||||
|
- Any databases
|
||||||
|
|
||||||
|
### 3.3 Review Exposed Ports
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check all published ports
|
||||||
|
docker service ls --format '{{.Name}}: {{.Ports}}'
|
||||||
|
|
||||||
|
# Check if any services are exposed without Traefik
|
||||||
|
# (Should only be: 53, 2222, 3000, 8384, 9000-9001)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 4: Monitoring Setup (Do Next Week)
|
||||||
|
|
||||||
|
### 4.1 Set Up Prometheus + Grafana
|
||||||
|
|
||||||
|
You mentioned these in PLAN.md but they're not running. Let's add them:
|
||||||
|
|
||||||
|
Create `monitoring-stack.yml`:
|
||||||
|
```yaml
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
volumes:
|
||||||
|
- prometheus-data:/prometheus
|
||||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.path=/prometheus'
|
||||||
|
networks:
|
||||||
|
- dokploy-network
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == manager
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
volumes:
|
||||||
|
- grafana-data:/var/lib/grafana
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/grafana_admin_password
|
||||||
|
secrets:
|
||||||
|
- grafana_admin_password
|
||||||
|
networks:
|
||||||
|
- dokploy-network
|
||||||
|
deploy:
|
||||||
|
labels:
|
||||||
|
- traefik.http.routers.grafana.rule=Host(`grafana.bendtstudio.com`)
|
||||||
|
- traefik.http.routers.grafana.entrypoints=websecure
|
||||||
|
- traefik.http.routers.grafana.tls.certresolver=letsencrypt
|
||||||
|
- traefik.enable=true
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
prometheus-data:
|
||||||
|
grafana-data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
dokploy-network:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
grafana_admin_password:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 Add Node Exporter
|
||||||
|
|
||||||
|
Deploy node-exporter on all nodes to collect system metrics.
|
||||||
|
|
||||||
|
### 4.3 Configure Alerts
|
||||||
|
|
||||||
|
Set up alerts for:
|
||||||
|
- Service down
|
||||||
|
- High CPU/memory usage
|
||||||
|
- Disk space low
|
||||||
|
- Certificate expiration
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 5: Backup Strategy (Do Within 2 Weeks)
|
||||||
|
|
||||||
|
### 5.1 Define What to Back Up
|
||||||
|
|
||||||
|
**Critical Data:**
|
||||||
|
1. Gitea repositories (/data/git)
|
||||||
|
2. Dokploy database
|
||||||
|
3. MinIO buckets
|
||||||
|
4. Immich photos (/mnt/synology-data/immich)
|
||||||
|
5. PostgreSQL databases
|
||||||
|
6. Configuration files
|
||||||
|
|
||||||
|
### 5.2 Create Backup Scripts
|
||||||
|
|
||||||
|
Example backup script for Gitea:
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# /opt/backup/backup-gitea.sh
|
||||||
|
|
||||||
|
BACKUP_DIR="/backup/gitea/$(date +%Y%m%d)"
|
||||||
|
mkdir -p "$BACKUP_DIR"
|
||||||
|
|
||||||
|
# Backup Gitea data
|
||||||
|
docker exec gitea-giteasqlite-bhymqw-gitea-1 tar czf /tmp/gitea-backup.tar.gz /data
|
||||||
|
docker cp gitea-giteasqlite-bhymqw-gitea-1:/tmp/gitea-backup.tar.gz "$BACKUP_DIR/"
|
||||||
|
|
||||||
|
# Backup to MinIO (offsite)
|
||||||
|
mc cp "$BACKUP_DIR/gitea-backup.tar.gz" minio/backups/gitea/
|
||||||
|
|
||||||
|
# Clean up old backups (keep 30 days)
|
||||||
|
find /backup/gitea -type d -mtime +30 -exec rm -rf {} +
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.3 Automate Backups
|
||||||
|
|
||||||
|
Add to crontab:
|
||||||
|
```bash
|
||||||
|
# Daily backups at 2 AM
|
||||||
|
0 2 * * * /opt/backup/backup-gitea.sh
|
||||||
|
0 3 * * * /opt/backup/backup-dokploy.sh
|
||||||
|
0 4 * * * /opt/backup/backup-databases.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 6: Documentation (Ongoing)
|
||||||
|
|
||||||
|
### 6.1 Create Service Catalog
|
||||||
|
|
||||||
|
For each service, document:
|
||||||
|
- **Purpose:** What does it do?
|
||||||
|
- **Access URL:** How do I reach it?
|
||||||
|
- **Dependencies:** What does it need?
|
||||||
|
- **Data location:** Where is data stored?
|
||||||
|
- **Backup procedure:** How to back it up?
|
||||||
|
- **Restore procedure:** How to restore it?
|
||||||
|
|
||||||
|
### 6.2 Create Runbooks
|
||||||
|
|
||||||
|
Common operations:
|
||||||
|
- Adding a new service
|
||||||
|
- Scaling a service
|
||||||
|
- Updating a service
|
||||||
|
- Removing a service
|
||||||
|
- Recovering from node failure
|
||||||
|
- Restoring from backup
|
||||||
|
|
||||||
|
### 6.3 Network Diagram
|
||||||
|
|
||||||
|
Create a visual diagram showing:
|
||||||
|
- Nodes and their roles
|
||||||
|
- Services and their locations
|
||||||
|
- Network connections
|
||||||
|
- Data flows
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Reference Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Cluster status
|
||||||
|
docker node ls
|
||||||
|
docker service ls
|
||||||
|
docker stack ls
|
||||||
|
|
||||||
|
# Service management
|
||||||
|
docker service logs <service> --tail 100 -f
|
||||||
|
docker service ps <service>
|
||||||
|
docker service scale <service>=<count>
|
||||||
|
docker service update --force <service>
|
||||||
|
|
||||||
|
# Resource usage
|
||||||
|
docker system df
|
||||||
|
docker stats
|
||||||
|
|
||||||
|
# SSH access
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.130 # Manager
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.19 # Worker
|
||||||
|
|
||||||
|
# Web UIs
|
||||||
|
curl http://192.168.2.130:3000 # Dokploy
|
||||||
|
curl http://192.168.2.130:888 # Swarmpit
|
||||||
|
curl http://192.168.2.130:8080 # Traefik
|
||||||
|
curl http://192.168.2.18:5380 # Technitium DNS
|
||||||
|
curl http://192.168.2.18:9001 # MinIO Console
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Questions for You
|
||||||
|
|
||||||
|
Before we proceed, I need to clarify a few things:
|
||||||
|
|
||||||
|
1. **NAS Node Access:** What are the SSH credentials for node-nas (192.168.2.18)?
|
||||||
|
|
||||||
|
2. **bendtstudio-app:** Is this service needed? It has 0 replicas.
|
||||||
|
|
||||||
|
3. **syncthing:** Do you want to keep this? It's currently stopped.
|
||||||
|
|
||||||
|
4. **Monitoring:** Do you want me to set up Prometheus/Grafana now, or later?
|
||||||
|
|
||||||
|
5. **Gitea:** Can you provide access credentials so I can check what's already version controlled?
|
||||||
|
|
||||||
|
6. **Priority:** Which phase should we tackle first? I recommend Phase 1 (critical fixes).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Action Plan Version 1.0 - February 9, 2026*
|
||||||
685
GITHUB_RUNNERS_PLAN.md
Normal file
685
GITHUB_RUNNERS_PLAN.md
Normal file
@@ -0,0 +1,685 @@
|
|||||||
|
# GitHub Actions Self-Hosted Runners Deployment Plan
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
**Goal:** Deploy GitHub Actions self-hosted runners on your Docker Swarm cluster to run CI/CD workflows with unlimited minutes, custom environments, and access to your homelab resources.
|
||||||
|
|
||||||
|
**Architecture:** Docker-based runners deployed as a Swarm service with auto-scaling capabilities.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture Decision
|
||||||
|
|
||||||
|
### Option 1: Docker Container Runners (Recommended for your setup)
|
||||||
|
- ✅ Runs in Docker containers on your existing cluster
|
||||||
|
- ✅ Scales horizontally by adding/removing containers
|
||||||
|
- ✅ Uses your existing infrastructure (tpi-n1, tpi-n2, node-nas)
|
||||||
|
- ✅ Easy to manage through Docker Swarm
|
||||||
|
- ✅ ARM64 and x86_64 support for multi-arch builds
|
||||||
|
|
||||||
|
### Option 2: VM/Physical Runners (Alternative)
|
||||||
|
- Runners installed directly on VMs or bare metal
|
||||||
|
- More isolated but harder to manage
|
||||||
|
- Not recommended for your containerized setup
|
||||||
|
|
||||||
|
**Decision:** Use Docker Container Runners (Option 1) with multi-arch support.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Deployment Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
GitHub Repository
|
||||||
|
│
|
||||||
|
│ Webhook/REST API
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────┐
|
||||||
|
│ GitHub Actions Service │
|
||||||
|
└─────────────────────────────┘
|
||||||
|
│
|
||||||
|
│ Job Request
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────┐
|
||||||
|
│ Your Docker Swarm Cluster │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────┐ │
|
||||||
|
│ │ Runner Service │ │
|
||||||
|
│ │ (Multiple Replicas)│ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────┐ ┌─────┐ │ │
|
||||||
|
│ │ │ ARM │ │x86_64│ │ │
|
||||||
|
│ │ │64 │ │ │ │ │
|
||||||
|
│ │ └─────┘ └─────┘ │ │
|
||||||
|
│ └─────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────┐ │
|
||||||
|
│ │ Docker-in-Docker │ │
|
||||||
|
│ │ (for Docker builds)│ │
|
||||||
|
│ └─────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1: Planning & Preparation
|
||||||
|
|
||||||
|
### Step 1: Determine Requirements
|
||||||
|
|
||||||
|
**Use Cases:**
|
||||||
|
- [ ] Build and test applications
|
||||||
|
- [ ] Deploy to your homelab (Kubernetes/Docker Swarm)
|
||||||
|
- [ ] Run ARM64 builds (for Raspberry Pi/ARM apps)
|
||||||
|
- [ ] Run x86_64 builds (standard applications)
|
||||||
|
- [ ] Access private network resources (databases, internal APIs)
|
||||||
|
- [ ] Build Docker images and push to your Gitea registry
|
||||||
|
|
||||||
|
**Resource Requirements per Runner:**
|
||||||
|
- CPU: 2+ cores recommended
|
||||||
|
- Memory: 4GB+ RAM per runner
|
||||||
|
- Disk: 20GB+ for workspace and Docker layers
|
||||||
|
- Network: Outbound HTTPS to GitHub
|
||||||
|
|
||||||
|
**Current Cluster Capacity:**
|
||||||
|
- tpi-n1: 8 cores ARM64, 8GB RAM (Manager)
|
||||||
|
- tpi-n2: 8 cores ARM64, 8GB RAM (Worker)
|
||||||
|
- node-nas: 2 cores x86_64, 8GB RAM (Storage)
|
||||||
|
|
||||||
|
**Recommended Allocation:**
|
||||||
|
- 2 runners on tpi-n1 (ARM64)
|
||||||
|
- 2 runners on tpi-n2 (ARM64)
|
||||||
|
- 1 runner on node-nas (x86_64)
|
||||||
|
|
||||||
|
### Step 2: GitHub Configuration
|
||||||
|
|
||||||
|
**Choose Runner Level:**
|
||||||
|
- [ ] **Repository-level** - Dedicated to specific repo (recommended to start)
|
||||||
|
- [ ] **Organization-level** - Shared across org repos
|
||||||
|
- [ ] **Enterprise-level** - Shared across enterprise
|
||||||
|
|
||||||
|
**For your use case:** Start with **repository-level** runners, then expand to organization-level if needed.
|
||||||
|
|
||||||
|
**Required GitHub Settings:**
|
||||||
|
1. Go to: `Settings > Actions > Runners > New self-hosted runner`
|
||||||
|
2. Note the **Registration Token** (expires after 1 hour)
|
||||||
|
3. Note the **Runner Group** (default: "Default")
|
||||||
|
4. Configure labels (e.g., `homelab`, `arm64`, `x86_64`, `self-hosted`)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 2: Infrastructure Setup
|
||||||
|
|
||||||
|
### Step 3: Create Docker Network
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On controller (tpi-n1)
|
||||||
|
ssh ubuntu@192.168.2.130
|
||||||
|
|
||||||
|
# Create overlay network for runners
|
||||||
|
docker network create --driver overlay --attachable github-runners-network
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
docker network ls | grep github
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 4: Create Persistent Storage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create volume for runner cache (shared across runners)
|
||||||
|
docker volume create github-runner-cache
|
||||||
|
|
||||||
|
# Create volume for Docker build cache
|
||||||
|
docker volume create github-runner-docker-cache
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 5: Prepare Node Labels
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Verify node labels
|
||||||
|
ssh ubuntu@192.168.2.130
|
||||||
|
docker node ls --format '{{.Hostname}} {{.Labels}}'
|
||||||
|
|
||||||
|
# Expected output:
|
||||||
|
# tpi-n1 map[infra:true role:storage storage:high]
|
||||||
|
# tpi-n2 map[role:compute]
|
||||||
|
# node-nas map[type:nas]
|
||||||
|
|
||||||
|
# Add architecture labels if missing:
|
||||||
|
docker node update --label-add arch=arm64 tpi-n1
|
||||||
|
docker node update --label-add arch=arm64 tpi-n2
|
||||||
|
docker node update --label-add arch=x86_64 node-nas
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 3: Runner Deployment
|
||||||
|
|
||||||
|
### Step 6: Create Environment File
|
||||||
|
|
||||||
|
Create `.env` file:
|
||||||
|
```bash
|
||||||
|
# GitHub Configuration
|
||||||
|
GITHUB_TOKEN=your_github_personal_access_token
|
||||||
|
GITHUB_OWNER=your-github-username-or-org
|
||||||
|
GITHUB_REPO=your-repository-name # Leave empty for org-level
|
||||||
|
|
||||||
|
# Runner Configuration
|
||||||
|
RUNNER_NAME_PREFIX=homelab
|
||||||
|
RUNNER_LABELS=self-hosted,homelab,linux
|
||||||
|
RUNNER_GROUP=Default
|
||||||
|
|
||||||
|
# Docker Configuration
|
||||||
|
DOCKER_TLS_CERTDIR=/certs
|
||||||
|
|
||||||
|
# Optional: Pre-installed tools
|
||||||
|
PRE_INSTALL_TOOLS="docker-compose,nodejs,npm,yarn,python3,pip,git"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 7: Create Docker Compose Stack
|
||||||
|
|
||||||
|
Create `github-runners-stack.yml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
services:
|
||||||
|
# ARM64 Runners
|
||||||
|
runner-arm64:
|
||||||
|
image: myoung34/github-runner:latest
|
||||||
|
environment:
|
||||||
|
- ACCESS_TOKEN=${GITHUB_TOKEN}
|
||||||
|
- REPO_URL=https://github.com/${GITHUB_OWNER}/${GITHUB_REPO}
|
||||||
|
- RUNNER_NAME=${RUNNER_NAME_PREFIX}-arm64-{{.Task.Slot}}
|
||||||
|
- RUNNER_WORKDIR=/tmp/runner-work
|
||||||
|
- RUNNER_GROUP=${RUNNER_GROUP:-Default}
|
||||||
|
- RUNNER_SCOPE=repo
|
||||||
|
- LABELS=${RUNNER_LABELS},arm64
|
||||||
|
- DISABLE_AUTO_UPDATE=true
|
||||||
|
- EPHEMERAL=true # One job per container
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
- github-runner-cache:/home/runner/cache
|
||||||
|
- github-runner-docker-cache:/var/lib/docker
|
||||||
|
networks:
|
||||||
|
- github-runners-network
|
||||||
|
- dokploy-network
|
||||||
|
deploy:
|
||||||
|
mode: replicated
|
||||||
|
replicas: 2
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.arch == arm64
|
||||||
|
restart_policy:
|
||||||
|
condition: any
|
||||||
|
delay: 5s
|
||||||
|
max_attempts: 3
|
||||||
|
privileged: true # Required for Docker-in-Docker
|
||||||
|
|
||||||
|
# x86_64 Runners
|
||||||
|
runner-x86_64:
|
||||||
|
image: myoung34/github-runner:latest
|
||||||
|
environment:
|
||||||
|
- ACCESS_TOKEN=${GITHUB_TOKEN}
|
||||||
|
- REPO_URL=https://github.com/${GITHUB_OWNER}/${GITHUB_REPO}
|
||||||
|
- RUNNER_NAME=${RUNNER_NAME_PREFIX}-x86_64-{{.Task.Slot}}
|
||||||
|
- RUNNER_WORKDIR=/tmp/runner-work
|
||||||
|
- RUNNER_GROUP=${RUNNER_GROUP:-Default}
|
||||||
|
- RUNNER_SCOPE=repo
|
||||||
|
- LABELS=${RUNNER_LABELS},x86_64
|
||||||
|
- DISABLE_AUTO_UPDATE=true
|
||||||
|
- EPHEMERAL=true
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
- github-runner-cache:/home/runner/cache
|
||||||
|
- github-runner-docker-cache:/var/lib/docker
|
||||||
|
networks:
|
||||||
|
- github-runners-network
|
||||||
|
- dokploy-network
|
||||||
|
deploy:
|
||||||
|
mode: replicated
|
||||||
|
replicas: 1
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.arch == x86_64
|
||||||
|
restart_policy:
|
||||||
|
condition: any
|
||||||
|
delay: 5s
|
||||||
|
max_attempts: 3
|
||||||
|
privileged: true
|
||||||
|
|
||||||
|
# Optional: Runner Autoscaler
|
||||||
|
autoscaler:
|
||||||
|
image: ghcr.io/actions-runner-controller/actions-runner-controller:latest
|
||||||
|
environment:
|
||||||
|
- GITHUB_TOKEN=${GITHUB_TOKEN}
|
||||||
|
- RUNNER_SCOPE=repo
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
networks:
|
||||||
|
- github-runners-network
|
||||||
|
deploy:
|
||||||
|
mode: replicated
|
||||||
|
replicas: 1
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == manager
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
github-runner-cache:
|
||||||
|
github-runner-docker-cache:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
github-runners-network:
|
||||||
|
driver: overlay
|
||||||
|
dokploy-network:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 8: Deploy Runners
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Copy files to controller
|
||||||
|
scp github-runners-stack.yml ubuntu@192.168.2.130:~/
|
||||||
|
scp .env ubuntu@192.168.2.130:~/
|
||||||
|
|
||||||
|
# SSH to controller
|
||||||
|
ssh ubuntu@192.168.2.130
|
||||||
|
|
||||||
|
# Load environment
|
||||||
|
set -a && source .env && set +a
|
||||||
|
|
||||||
|
# Deploy stack
|
||||||
|
docker stack deploy -c github-runners-stack.yml github-runners
|
||||||
|
|
||||||
|
# Verify deployment
|
||||||
|
docker stack ps github-runners
|
||||||
|
docker service ls | grep github
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 4: GitHub Integration
|
||||||
|
|
||||||
|
### Step 9: Verify Runners in GitHub
|
||||||
|
|
||||||
|
1. Go to: `https://github.com/[OWNER]/[REPO]/settings/actions/runners`
|
||||||
|
2. You should see your runners listed as "Idle"
|
||||||
|
3. Labels should show: `self-hosted`, `homelab`, `linux`, `arm64` or `x86_64`
|
||||||
|
|
||||||
|
### Step 10: Test with Sample Workflow
|
||||||
|
|
||||||
|
Create `.github/workflows/test-self-hosted.yml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
name: Test Self-Hosted Runners
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main ]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test-arm64:
|
||||||
|
runs-on: [self-hosted, homelab, arm64]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Show runner info
|
||||||
|
run: |
|
||||||
|
echo "Architecture: $(uname -m)"
|
||||||
|
echo "OS: $(uname -s)"
|
||||||
|
echo "Node: $(hostname)"
|
||||||
|
echo "CPU: $(nproc)"
|
||||||
|
echo "Memory: $(free -h | grep Mem)"
|
||||||
|
|
||||||
|
- name: Test Docker
|
||||||
|
run: |
|
||||||
|
docker --version
|
||||||
|
docker info
|
||||||
|
docker run --rm hello-world
|
||||||
|
|
||||||
|
test-x86_64:
|
||||||
|
runs-on: [self-hosted, homelab, x86_64]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Show runner info
|
||||||
|
run: |
|
||||||
|
echo "Architecture: $(uname -m)"
|
||||||
|
echo "OS: $(uname -s)"
|
||||||
|
echo "Node: $(hostname)"
|
||||||
|
|
||||||
|
- name: Test access to homelab
|
||||||
|
run: |
|
||||||
|
# Test connectivity to your services
|
||||||
|
curl -s http://gitea.bendtstudio.com:3000 || echo "Gitea not accessible"
|
||||||
|
curl -s http://192.168.2.130:3000 || echo "Dokploy not accessible"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 5: Security Hardening
|
||||||
|
|
||||||
|
### Step 11: Implement Security Best Practices
|
||||||
|
|
||||||
|
**1. Use Short-Lived Tokens:**
|
||||||
|
```bash
|
||||||
|
# Generate a GitHub App instead of PAT for better security
|
||||||
|
# Or use OpenID Connect (OIDC) for authentication
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Restrict Runner Permissions:**
|
||||||
|
```yaml
|
||||||
|
# Add to workflow
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: [self-hosted, homelab]
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write # Only if pushing to registry
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Network Isolation:**
|
||||||
|
```yaml
|
||||||
|
# Modify stack to use isolated network
|
||||||
|
networks:
|
||||||
|
github-runners-network:
|
||||||
|
driver: overlay
|
||||||
|
internal: true # No external access except through proxy
|
||||||
|
```
|
||||||
|
|
||||||
|
**4. Resource Limits:**
|
||||||
|
```yaml
|
||||||
|
# Add to service definition in stack
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '2'
|
||||||
|
memory: 4G
|
||||||
|
reservations:
|
||||||
|
cpus: '1'
|
||||||
|
memory: 2G
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 12: Enable Ephemeral Mode
|
||||||
|
|
||||||
|
Ephemeral runners (already configured with `EPHEMERAL=true`) provide better security:
|
||||||
|
- Each runner handles only one job
|
||||||
|
- Container is destroyed after job completion
|
||||||
|
- Fresh environment for every build
|
||||||
|
- Prevents credential leakage between jobs
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 6: Monitoring & Maintenance
|
||||||
|
|
||||||
|
### Step 13: Set Up Monitoring
|
||||||
|
|
||||||
|
**Create monitoring script** (`monitor-runners.sh`):
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Check runner status
|
||||||
|
echo "=== Docker Service Status ==="
|
||||||
|
docker service ls | grep github-runner
|
||||||
|
|
||||||
|
echo -e "\n=== Runner Containers ==="
|
||||||
|
docker ps --filter name=github-runner --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
|
||||||
|
|
||||||
|
echo -e "\n=== Recent Logs ==="
|
||||||
|
docker service logs github-runners_runner-arm64 --tail 50
|
||||||
|
docker service logs github-runners_runner-x86_64 --tail 50
|
||||||
|
|
||||||
|
echo -e "\n=== Resource Usage ==="
|
||||||
|
docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" | grep github-runner
|
||||||
|
```
|
||||||
|
|
||||||
|
**Create cron job for monitoring:**
|
||||||
|
```bash
|
||||||
|
# Add to crontab
|
||||||
|
crontab -e
|
||||||
|
|
||||||
|
# Check runner health every 5 minutes
|
||||||
|
*/5 * * * * /home/ubuntu/github-runners/monitor-runners.sh >> /var/log/github-runners.log 2>&1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 14: Set Up Log Rotation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create logrotate config
|
||||||
|
sudo tee /etc/logrotate.d/github-runners << EOF
|
||||||
|
/var/log/github-runners.log {
|
||||||
|
daily
|
||||||
|
rotate 7
|
||||||
|
compress
|
||||||
|
delaycompress
|
||||||
|
missingok
|
||||||
|
notifempty
|
||||||
|
create 644 ubuntu ubuntu
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 15: Backup Strategy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create backup script
|
||||||
|
#!/bin/bash
|
||||||
|
BACKUP_DIR="/backup/github-runners/$(date +%Y%m%d)"
|
||||||
|
mkdir -p "$BACKUP_DIR"
|
||||||
|
|
||||||
|
# Backup configuration
|
||||||
|
cp ~/github-runners-stack.yml "$BACKUP_DIR/"
|
||||||
|
cp ~/.env "$BACKUP_DIR/"
|
||||||
|
|
||||||
|
# Backup volumes
|
||||||
|
docker run --rm -v github-runner-cache:/data -v "$BACKUP_DIR":/backup alpine tar czf /backup/runner-cache.tar.gz -C /data .
|
||||||
|
docker run --rm -v github-runner-docker-cache:/data -v "$BACKUP_DIR":/backup alpine tar czf /backup/docker-cache.tar.gz -C /data .
|
||||||
|
|
||||||
|
echo "Backup completed: $BACKUP_DIR"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 7: Advanced Configuration
|
||||||
|
|
||||||
|
### Step 16: Cache Optimization
|
||||||
|
|
||||||
|
**Mount host cache directories:**
|
||||||
|
```yaml
|
||||||
|
volumes:
|
||||||
|
- /home/ubuntu/.cache/npm:/root/.npm
|
||||||
|
- /home/ubuntu/.cache/pip:/root/.cache/pip
|
||||||
|
- /home/ubuntu/.cache/go-build:/root/.cache/go-build
|
||||||
|
- /home/ubuntu/.cargo:/root/.cargo
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pre-install common tools in custom image** (`Dockerfile.runner`):
|
||||||
|
```dockerfile
|
||||||
|
FROM myoung34/github-runner:latest
|
||||||
|
|
||||||
|
# Install common build tools
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
nodejs \
|
||||||
|
npm \
|
||||||
|
python3 \
|
||||||
|
python3-pip \
|
||||||
|
golang-go \
|
||||||
|
openjdk-17-jdk \
|
||||||
|
maven \
|
||||||
|
gradle \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Docker Compose
|
||||||
|
RUN curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" \
|
||||||
|
-o /usr/local/bin/docker-compose && \
|
||||||
|
chmod +x /usr/local/bin/docker-compose
|
||||||
|
|
||||||
|
# Pre-pull common images
|
||||||
|
RUN docker pull node:lts-alpine
|
||||||
|
RUN docker pull python:3.11-slim
|
||||||
|
```
|
||||||
|
|
||||||
|
Build and use custom image:
|
||||||
|
```bash
|
||||||
|
docker build -t your-registry/github-runner:custom -f Dockerfile.runner .
|
||||||
|
docker push your-registry/github-runner:custom
|
||||||
|
|
||||||
|
# Update stack to use custom image
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 17: Autoscaling Configuration
|
||||||
|
|
||||||
|
**Use Actions Runner Controller (ARC) for Kubernetes-style autoscaling:**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Add to stack
|
||||||
|
autoscaler:
|
||||||
|
image: ghcr.io/actions-runner-controller/actions-runner-controller:latest
|
||||||
|
environment:
|
||||||
|
- GITHUB_TOKEN=${GITHUB_TOKEN}
|
||||||
|
- GITHUB_APP_ID=${GITHUB_APP_ID}
|
||||||
|
- GITHUB_APP_INSTALLATION_ID=${GITHUB_APP_INSTALLATION_ID}
|
||||||
|
- GITHUB_APP_PRIVATE_KEY=/etc/gh-app-key/private-key.pem
|
||||||
|
volumes:
|
||||||
|
- /path/to/private-key.pem:/etc/gh-app-key/private-key.pem:ro
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
deploy:
|
||||||
|
mode: replicated
|
||||||
|
replicas: 1
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.role == manager
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 18: Multi-Repository Setup
|
||||||
|
|
||||||
|
For organization-level runners, update environment:
|
||||||
|
```bash
|
||||||
|
# For org-level
|
||||||
|
RUNNER_SCOPE=org
|
||||||
|
ORG_NAME=your-organization
|
||||||
|
|
||||||
|
# Remove REPO_URL, use:
|
||||||
|
ORG_URL=https://github.com/${ORG_NAME}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 8: Troubleshooting Guide
|
||||||
|
|
||||||
|
### Common Issues & Solutions
|
||||||
|
|
||||||
|
**1. Runner shows "Offline" in GitHub:**
|
||||||
|
```bash
|
||||||
|
# Check logs
|
||||||
|
docker service logs github-runners_runner-arm64
|
||||||
|
|
||||||
|
# Common causes:
|
||||||
|
# - Expired token (regenerate in GitHub settings)
|
||||||
|
# - Network connectivity issue
|
||||||
|
docker exec <container> curl -I https://github.com
|
||||||
|
|
||||||
|
# Restart service
|
||||||
|
docker service update --force github-runners_runner-arm64
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Docker-in-Docker not working:**
|
||||||
|
```bash
|
||||||
|
# Ensure privileged mode is enabled
|
||||||
|
# Check Docker socket is mounted
|
||||||
|
docker exec <container> docker ps
|
||||||
|
|
||||||
|
# If failing, check AppArmor/SELinux
|
||||||
|
sudo aa-status | grep docker
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Jobs stuck in "Queued":**
|
||||||
|
```bash
|
||||||
|
# Check if runners are picking up jobs
|
||||||
|
docker service ps github-runners_runner-arm64
|
||||||
|
|
||||||
|
# Verify labels match
|
||||||
|
docker exec <container> cat /home/runner/.runner | jq '.labels'
|
||||||
|
```
|
||||||
|
|
||||||
|
**4. Out of disk space:**
|
||||||
|
```bash
|
||||||
|
# Clean up Docker system
|
||||||
|
docker system prune -a --volumes
|
||||||
|
|
||||||
|
# Clean runner cache
|
||||||
|
docker volume rm github-runner-docker-cache
|
||||||
|
docker volume create github-runner-docker-cache
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Checklist
|
||||||
|
|
||||||
|
### Phase 1: Planning
|
||||||
|
- [ ] Determine which repositories need self-hosted runners
|
||||||
|
- [ ] Decide on runner count per architecture
|
||||||
|
- [ ] Generate GitHub Personal Access Token
|
||||||
|
|
||||||
|
### Phase 2: Infrastructure
|
||||||
|
- [ ] Create Docker network
|
||||||
|
- [ ] Create persistent volumes
|
||||||
|
- [ ] Verify node labels
|
||||||
|
|
||||||
|
### Phase 3: Deployment
|
||||||
|
- [ ] Create `.env` file with GitHub token
|
||||||
|
- [ ] Create `github-runners-stack.yml`
|
||||||
|
- [ ] Deploy stack to Docker Swarm
|
||||||
|
- [ ] Verify runners appear in GitHub UI
|
||||||
|
|
||||||
|
### Phase 4: Testing
|
||||||
|
- [ ] Create test workflow
|
||||||
|
- [ ] Run test on ARM64 runner
|
||||||
|
- [ ] Run test on x86_64 runner
|
||||||
|
- [ ] Verify Docker builds work
|
||||||
|
- [ ] Test access to homelab services
|
||||||
|
|
||||||
|
### Phase 5: Security
|
||||||
|
- [ ] Enable ephemeral mode
|
||||||
|
- [ ] Set resource limits
|
||||||
|
- [ ] Review and restrict permissions
|
||||||
|
- [ ] Set up network isolation
|
||||||
|
|
||||||
|
### Phase 6: Operations
|
||||||
|
- [ ] Create monitoring script
|
||||||
|
- [ ] Set up log rotation
|
||||||
|
- [ ] Create backup script
|
||||||
|
- [ ] Document maintenance procedures
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Cost & Resource Analysis
|
||||||
|
|
||||||
|
**Compared to GitHub-hosted runners:**
|
||||||
|
|
||||||
|
| Feature | GitHub Hosted | Your Self-Hosted |
|
||||||
|
|---------|---------------|------------------|
|
||||||
|
| Cost | $0.008/minute Linux | Free (electricity) |
|
||||||
|
| Minutes | 2,000/month free | Unlimited |
|
||||||
|
| ARM64 | Limited | Full control |
|
||||||
|
| Concurrency | 20 jobs | Unlimited |
|
||||||
|
| Network | Internet only | Your homelab access |
|
||||||
|
|
||||||
|
**Your Infrastructure Cost:**
|
||||||
|
- Existing hardware: $0 (already running)
|
||||||
|
- Electricity: ~$10-20/month additional load
|
||||||
|
- Time: Initial setup ~2-4 hours
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
1. **Review this plan** and decide on your specific use cases
|
||||||
|
2. **Generate GitHub PAT** with `repo` and `admin:org` scopes
|
||||||
|
3. **Start with Phase 1** - Planning
|
||||||
|
4. **Deploy a single runner first** to test before scaling
|
||||||
|
5. **Iterate** based on your workflow needs
|
||||||
|
|
||||||
|
Would you like me to help you start with any specific phase, or do you have questions about the architecture? 🚀
|
||||||
425
HOMELAB_AUDIT.md
Normal file
425
HOMELAB_AUDIT.md
Normal file
@@ -0,0 +1,425 @@
|
|||||||
|
# Home Lab Cluster Audit Report
|
||||||
|
|
||||||
|
**Date:** February 9, 2026
|
||||||
|
**Auditor:** opencode
|
||||||
|
**Cluster:** Docker Swarm with Dokploy
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Cluster Overview
|
||||||
|
|
||||||
|
- **Cluster Type:** Docker Swarm (3 nodes)
|
||||||
|
- **Orchestration:** Dokploy v3.x
|
||||||
|
- **Reverse Proxy:** Traefik v3.6.1
|
||||||
|
- **DNS:** Technitium DNS Server
|
||||||
|
- **Monitoring:** Swarmpit
|
||||||
|
- **Git Server:** Gitea v1.24.4
|
||||||
|
- **Object Storage:** MinIO
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Node Inventory
|
||||||
|
|
||||||
|
### Node 1: tpi-n1 (Controller/Manager)
|
||||||
|
- **IP:** 192.168.2.130
|
||||||
|
- **Role:** Manager (Leader)
|
||||||
|
- **Architecture:** aarch64 (ARM64)
|
||||||
|
- **OS:** Linux
|
||||||
|
- **CPU:** 8 cores
|
||||||
|
- **RAM:** ~8 GB
|
||||||
|
- **Docker:** v27.5.1
|
||||||
|
- **Labels:**
|
||||||
|
- `infra=true`
|
||||||
|
- `role=storage`
|
||||||
|
- `storage=high`
|
||||||
|
- **Status:** Ready, Active
|
||||||
|
|
||||||
|
### Node 2: tpi-n2 (Worker)
|
||||||
|
- **IP:** 192.168.2.19
|
||||||
|
- **Role:** Worker
|
||||||
|
- **Architecture:** aarch64 (ARM64)
|
||||||
|
- **OS:** Linux
|
||||||
|
- **CPU:** 8 cores
|
||||||
|
- **RAM:** ~8 GB
|
||||||
|
- **Docker:** v27.5.1
|
||||||
|
- **Labels:**
|
||||||
|
- `role=compute`
|
||||||
|
- **Status:** Ready, Active
|
||||||
|
|
||||||
|
### Node 3: node-nas (Storage Worker)
|
||||||
|
- **IP:** 192.168.2.18
|
||||||
|
- **Role:** Worker (NAS/Storage)
|
||||||
|
- **Architecture:** x86_64
|
||||||
|
- **OS:** Linux
|
||||||
|
- **CPU:** 2 cores
|
||||||
|
- **RAM:** ~8 GB
|
||||||
|
- **Docker:** v29.1.2
|
||||||
|
- **Labels:**
|
||||||
|
- `type=nas`
|
||||||
|
- **Status:** Ready, Active
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Docker Stacks (Swarm Mode)
|
||||||
|
|
||||||
|
### Active Stacks:
|
||||||
|
|
||||||
|
#### 1. minio
|
||||||
|
- **Services:** 1 (minio_minio)
|
||||||
|
- **Status:** Running
|
||||||
|
- **Node:** node-nas (constrained to NAS)
|
||||||
|
- **Ports:** 9000 (API), 9001 (Console)
|
||||||
|
- **Storage:** /mnt/synology-data/minio (bind mount)
|
||||||
|
- **Credentials:** [REDACTED - see service config]
|
||||||
|
|
||||||
|
#### 2. swarmpit
|
||||||
|
- **Services:** 4
|
||||||
|
- swarmpit_app (UI) - Running on tpi-n1, Port 888
|
||||||
|
- swarmpit_agent (global) - Running on all 3 nodes
|
||||||
|
- swarmpit_db (CouchDB) - Running on tpi-n2
|
||||||
|
- swarmpit_influxdb - Running on node-nas
|
||||||
|
- **Status:** Active with historical failures
|
||||||
|
- **Issues:** Multiple container failures in history (mostly resolved)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Dokploy-Managed Services
|
||||||
|
|
||||||
|
### Running Services (via Dokploy Compose):
|
||||||
|
|
||||||
|
1. **ai-lobechat-yqvecg** - AI chat interface
|
||||||
|
2. **bewcloud-memos-ssogxn** - Note-taking app (⚠️ Restarting loop)
|
||||||
|
3. **bewcloud-silverbullet-42sjev** - SilverBullet markdown editor + Watchtower
|
||||||
|
4. **cloud-bewcloud-u2pls5** - BewCloud instance with Radicale (CalDAV/CardDAV)
|
||||||
|
5. **cloud-fizzy-ezuhfq** - Fizzy web app
|
||||||
|
6. **cloud-ironcalc-0id5k8** - IronCalc spreadsheet
|
||||||
|
7. **cloud-radicale-wqldcv** - Standalone Radicale server
|
||||||
|
8. **cloud-uptimekuma-jdeivt** - Uptime monitoring
|
||||||
|
9. **dns-technitum-6ojgo2** - Technitium DNS server
|
||||||
|
10. **gitea-giteasqlite-bhymqw** - Git server (Port 3000, SSH on 2222)
|
||||||
|
11. **gitea-registry-vdftrt** - Docker registry (Port 5000)
|
||||||
|
|
||||||
|
### Dokploy Infrastructure Services:
|
||||||
|
- **dokploy** - Main Dokploy UI (Port 3000, host mode)
|
||||||
|
- **dokploy-postgres** - Dokploy database
|
||||||
|
- **dokploy-redis** - Dokploy cache
|
||||||
|
- **dokploy-traefik** - Reverse proxy (Ports 80, 443, 8080)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Standalone Services (docker-compose)
|
||||||
|
|
||||||
|
### Running:
|
||||||
|
- **technitium-dns** - DNS server (Port 53, 5380)
|
||||||
|
- **immich3-compose** - Photo management (Immich v2.3.0)
|
||||||
|
- immich-server
|
||||||
|
- immich-machine-learning
|
||||||
|
- immich-database (pgvecto-rs)
|
||||||
|
- immich-redis
|
||||||
|
|
||||||
|
### Stack Services:
|
||||||
|
- **bendtstudio-pancake-bzgfpc** - MariaDB database (Port 3306)
|
||||||
|
- **bendtstudio-webstatic-iq9evl** - Static web files (⚠️ Rollback paused state)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Issues Identified
|
||||||
|
|
||||||
|
### 🔴 Critical Issues:
|
||||||
|
|
||||||
|
1. **bewcloud-memos in Restart Loop**
|
||||||
|
- Container keeps restarting (seen 24 seconds ago)
|
||||||
|
- Status: `Restarting (0) 24 seconds ago`
|
||||||
|
- **Action Required:** Check logs and fix configuration
|
||||||
|
|
||||||
|
2. **bendtstudio-webstatic in Rollback Paused State**
|
||||||
|
- Service is not updating properly
|
||||||
|
- State: `rollback_paused`
|
||||||
|
- **Action Required:** Investigate update failure
|
||||||
|
|
||||||
|
3. **bendtstudio-app Not Running**
|
||||||
|
- Service has 0/0 replicas
|
||||||
|
- **Action Required:** Determine if needed or remove
|
||||||
|
|
||||||
|
4. **syncthing Stopped**
|
||||||
|
- Service has 0 replicas
|
||||||
|
- Should be on node-nas
|
||||||
|
- **Action Required:** Restart or remove if not needed
|
||||||
|
|
||||||
|
### 🟡 Warning Issues:
|
||||||
|
|
||||||
|
5. **Swarmpit Agent Failures (Historical)**
|
||||||
|
- Multiple past failures on all nodes
|
||||||
|
- Currently running but concerning history
|
||||||
|
- **Action Required:** Monitor for stability
|
||||||
|
|
||||||
|
6. **No Monitoring of MinIO**
|
||||||
|
- MinIO running but no obvious backup/monitoring strategy documented
|
||||||
|
- **Action Required:** Set up monitoring and backup
|
||||||
|
|
||||||
|
7. **Credential Management**
|
||||||
|
- Passwords visible in service configs (bendtstudio-webstatic, MinIO, DNS)
|
||||||
|
- **Action Required:** Migrate to Docker secrets or env files
|
||||||
|
|
||||||
|
### 🟢 Informational:
|
||||||
|
|
||||||
|
8. **13 Unused/Orphaned Volumes**
|
||||||
|
- 33 total volumes, only 20 active
|
||||||
|
- **Action Required:** Clean up unused volumes to reclaim ~595MB
|
||||||
|
|
||||||
|
9. **Gitea Repository Status Unknown**
|
||||||
|
- Cannot verify if all compose files are version controlled
|
||||||
|
- **Action Required:** Audit Gitea repositories
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Storage Configuration
|
||||||
|
|
||||||
|
### Local Volumes (33 total):
|
||||||
|
Key volumes include:
|
||||||
|
- `dokploy-postgres-database`
|
||||||
|
- `bewcloud-postgres-in40hh-data`
|
||||||
|
- `gitea-data`, `gitea-registry-data`
|
||||||
|
- `immich-postgres`, `immich-redis-data`, `immich-model-cache`
|
||||||
|
- `bendtstudio-pancake-data`
|
||||||
|
- `shared-data` (NFS/shared)
|
||||||
|
- Various app-specific volumes
|
||||||
|
|
||||||
|
### Bind Mounts:
|
||||||
|
- **MinIO:** `/mnt/synology-data/minio` → `/data`
|
||||||
|
- **Syncthing:** `/mnt/synology-data` → `/var/syncthing` (currently stopped)
|
||||||
|
- **Dokploy:** `/etc/dokploy` → `/etc/dokploy`
|
||||||
|
|
||||||
|
### NFS Mounts:
|
||||||
|
- Synology NAS mounted at `/mnt/synology-data/`
|
||||||
|
- Contains: immich/, minio/
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Networking
|
||||||
|
|
||||||
|
### Overlay Networks:
|
||||||
|
- `dokploy-network` - Main Dokploy network
|
||||||
|
- `minio_default` - MinIO stack network
|
||||||
|
- `swarmpit_net` - Swarmpit monitoring network
|
||||||
|
- `ingress` - Docker Swarm ingress
|
||||||
|
|
||||||
|
### Bridge Networks:
|
||||||
|
- Multiple app-specific networks created by compose
|
||||||
|
- `ai-lobechat-yqvecg`
|
||||||
|
- `bewcloud-memos-ssogxn`
|
||||||
|
- `bewcloud-silverbullet-42sjev`
|
||||||
|
- `cloud-fizzy-ezuhfq_default`
|
||||||
|
- `cloud-uptimekuma-jdeivt`
|
||||||
|
- `gitea-giteasqlite-bhymqw`
|
||||||
|
- `gitea-registry-vdftrt`
|
||||||
|
- `immich3-compose-ubyhe9_default`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. SSL/TLS Configuration
|
||||||
|
|
||||||
|
- **Certificate Resolver:** Let's Encrypt (ACME)
|
||||||
|
- **Email:** sirtimbly@gmail.com
|
||||||
|
- **Challenge Type:** HTTP-01
|
||||||
|
- **Storage:** `/etc/dokploy/traefik/dynamic/acme.json`
|
||||||
|
- **Entry Points:** web (80) → websecure (443) with auto-redirect
|
||||||
|
- **HTTP/3:** Enabled on websecure
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. Traefik Routing
|
||||||
|
|
||||||
|
### Configured Routes (via labels):
|
||||||
|
- gitea.bendtstudio.com → Gitea
|
||||||
|
- Multiple apps via traefik.me subdomains
|
||||||
|
- HTTP → HTTPS redirect enabled
|
||||||
|
- Middlewares configured in `/etc/dokploy/traefik/dynamic/`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. DNS Configuration
|
||||||
|
|
||||||
|
### Technitium DNS:
|
||||||
|
- **Port:** 53 (TCP/UDP), 5380 (Web UI)
|
||||||
|
- **Domain:** dns.bendtstudio.com
|
||||||
|
- **Admin Password:** [REDACTED]
|
||||||
|
- **Placement:** Locked to tpi-n1
|
||||||
|
- **TZ:** America/New_York
|
||||||
|
|
||||||
|
### Services using DNS:
|
||||||
|
- All services accessible via bendtstudio.com subdomains
|
||||||
|
- Internal DNS resolution for Docker services
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 12. Configuration Files Location
|
||||||
|
|
||||||
|
### In `/etc/dokploy/`:
|
||||||
|
- `traefik/traefik.yml` - Main Traefik config
|
||||||
|
- `traefik/dynamic/*.yml` - Dynamic routes and middlewares
|
||||||
|
- `compose/*/code/docker-compose.yml` - Dokploy-managed compose files
|
||||||
|
|
||||||
|
### In `/home/ubuntu/`:
|
||||||
|
- `minio-stack.yml` - MinIO stack definition
|
||||||
|
|
||||||
|
### In local workspace:
|
||||||
|
- Various compose files (not all deployed via Dokploy)
|
||||||
|
- May be out of sync with running services
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 13. Missing Configuration in Version Control
|
||||||
|
|
||||||
|
Based on the analysis, the following may NOT be properly tracked in Gitea:
|
||||||
|
|
||||||
|
1. ✅ **Gitea** itself - compose file present
|
||||||
|
2. ✅ **MinIO** - stack file in ~/minio-stack.yml
|
||||||
|
3. ⚠️ **Dokploy dynamic configs** - traefik routes
|
||||||
|
4. ⚠️ **All Dokploy-managed compose files** - 11 services
|
||||||
|
5. ❌ **Technitium DNS** - compose file in /etc/dokploy/
|
||||||
|
6. ❌ **Immich** - compose configuration
|
||||||
|
7. ❌ **Swarmpit** - stack configuration
|
||||||
|
8. ❌ **Dokploy infrastructure** - internal services
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 14. Resource Usage
|
||||||
|
|
||||||
|
### Docker System:
|
||||||
|
- **Images:** 23 (10.91 GB)
|
||||||
|
- **Containers:** 26 (135 MB)
|
||||||
|
- **Volumes:** 33 (2.02 GB, 595MB reclaimable)
|
||||||
|
- **Build Cache:** 0
|
||||||
|
|
||||||
|
### Node Resources:
|
||||||
|
- **tpi-n1 & tpi-n2:** 8 cores ARM64, 8GB RAM each
|
||||||
|
- **node-nas:** 2 cores x86_64, 8GB RAM
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 15. Recommendations
|
||||||
|
|
||||||
|
### Immediate Actions (High Priority):
|
||||||
|
|
||||||
|
1. **Fix bewcloud-memos**
|
||||||
|
```bash
|
||||||
|
docker service logs bewcloud-memos-ssogxn-memos --tail 50
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Fix bendtstudio-webstatic**
|
||||||
|
```bash
|
||||||
|
docker service ps bendtstudio-webstatic-iq9evl --no-trunc
|
||||||
|
docker service update --force bendtstudio-webstatic-iq9evl
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Restart or Remove syncthing**
|
||||||
|
```bash
|
||||||
|
# Option 1: Scale up
|
||||||
|
docker service scale syncthing=1
|
||||||
|
|
||||||
|
# Option 2: Remove
|
||||||
|
docker service rm syncthing
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Clean up unused volumes**
|
||||||
|
```bash
|
||||||
|
docker volume prune
|
||||||
|
```
|
||||||
|
|
||||||
|
### Short-term Actions (Medium Priority):
|
||||||
|
|
||||||
|
5. **Audit Gitea repositories**
|
||||||
|
- Access Gitea at http://gitea.bendtstudio.com
|
||||||
|
- Verify which compose files are tracked
|
||||||
|
- Commit missing configurations
|
||||||
|
|
||||||
|
6. **Secure credentials**
|
||||||
|
- Use Docker secrets for passwords
|
||||||
|
- Move credentials to environment files
|
||||||
|
- Never commit .env files with real passwords
|
||||||
|
|
||||||
|
7. **Set up automated backups**
|
||||||
|
- Back up Dokploy database
|
||||||
|
- Back up Gitea repositories
|
||||||
|
- Back up MinIO data
|
||||||
|
|
||||||
|
8. **Document all services**
|
||||||
|
- Create README for each service
|
||||||
|
- Document dependencies and data locations
|
||||||
|
- Create runbook for common operations
|
||||||
|
|
||||||
|
### Long-term Actions (Low Priority):
|
||||||
|
|
||||||
|
9. **Implement proper monitoring**
|
||||||
|
- Prometheus/Grafana for metrics (mentioned in PLAN.md but not found)
|
||||||
|
- Alerting for service failures
|
||||||
|
- Disk usage monitoring
|
||||||
|
|
||||||
|
10. **Implement GitOps workflow**
|
||||||
|
- All changes through Git
|
||||||
|
- Automated deployments via Dokploy webhooks
|
||||||
|
- Configuration drift detection
|
||||||
|
|
||||||
|
11. **Consolidate storage strategy**
|
||||||
|
- Define clear policy for volumes vs bind mounts
|
||||||
|
- Document backup procedures for each storage type
|
||||||
|
|
||||||
|
12. **Security audit**
|
||||||
|
- Review all exposed ports
|
||||||
|
- Check for default/weak passwords
|
||||||
|
- Implement network segmentation if needed
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 16. Next Steps Checklist
|
||||||
|
|
||||||
|
- [ ] Fix critical service issues (memos, webstatic)
|
||||||
|
- [ ] Document all running services with purpose
|
||||||
|
- [ ] Commit all compose files to Gitea
|
||||||
|
- [ ] Create backup strategy
|
||||||
|
- [ ] Set up monitoring and alerting
|
||||||
|
- [ ] Clean up unused resources
|
||||||
|
- [ ] Create disaster recovery plan
|
||||||
|
- [ ] Document SSH access for all nodes
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Appendix A: Quick Commands Reference
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View cluster status
|
||||||
|
docker node ls
|
||||||
|
docker service ls
|
||||||
|
docker stack ls
|
||||||
|
|
||||||
|
# View service logs
|
||||||
|
docker service logs <service-name> --tail 100 -f
|
||||||
|
|
||||||
|
# View container logs
|
||||||
|
docker logs <container-name> --tail 100 -f
|
||||||
|
|
||||||
|
# Scale a service
|
||||||
|
docker service scale <service-name>=<replicas>
|
||||||
|
|
||||||
|
# Update a service
|
||||||
|
docker service update --force <service-name>
|
||||||
|
|
||||||
|
# SSH to nodes
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.130 # tpi-n1 (manager)
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.19 # tpi-n2 (worker)
|
||||||
|
# NAS node requires different credentials
|
||||||
|
|
||||||
|
# Access Dokploy UI
|
||||||
|
http://192.168.2.130:3000
|
||||||
|
|
||||||
|
# Access Swarmpit UI
|
||||||
|
http://192.168.2.130:888
|
||||||
|
|
||||||
|
# Access Traefik Dashboard
|
||||||
|
http://192.168.2.130:8080
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*End of Audit Report*
|
||||||
153
POSSE_PARTY_README.md
Normal file
153
POSSE_PARTY_README.md
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
# Posse-Party Deployment Guide
|
||||||
|
|
||||||
|
**Posse-Party** - POSSE (Publish on Own Site, Syndicate Elsewhere) platform for managing your social media presence.
|
||||||
|
|
||||||
|
## What is Posse-Party?
|
||||||
|
|
||||||
|
Posse-Party helps you:
|
||||||
|
- **Publish** content on your own website/platform
|
||||||
|
- **Syndicate** to other social media platforms (Twitter/X, Mastodon, LinkedIn, etc.)
|
||||||
|
- **Own** your content while still reaching audiences on other platforms
|
||||||
|
- **Cross-post** automatically to multiple platforms at once
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### 1. Generate Secret Key
|
||||||
|
|
||||||
|
```bash
|
||||||
|
openssl rand -hex 64
|
||||||
|
```
|
||||||
|
|
||||||
|
Copy this value - you'll need it for `SECRET_KEY_BASE`.
|
||||||
|
|
||||||
|
### 2. Create Environment File
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp posse-party.env.example .env
|
||||||
|
nano .env # or use your preferred editor
|
||||||
|
```
|
||||||
|
|
||||||
|
**Required settings to fill in:**
|
||||||
|
- `SECRET_KEY_BASE` - The value from step 1
|
||||||
|
- `POSTGRES_PASSWORD` - A strong database password
|
||||||
|
- `APP_HOST` - Should already be set to `posseparty.bendtstudio.com`
|
||||||
|
|
||||||
|
**Optional but recommended:**
|
||||||
|
- Email configuration (for login and notifications)
|
||||||
|
- OAuth providers (for social login)
|
||||||
|
|
||||||
|
### 3. Deploy via Dokploy
|
||||||
|
|
||||||
|
1. Log into Dokploy: http://192.168.2.130:3000
|
||||||
|
2. Create a new project
|
||||||
|
3. Upload the compose file: `posse-party-compose.yml`
|
||||||
|
4. Upload your `.env` file
|
||||||
|
5. Deploy!
|
||||||
|
|
||||||
|
### 4. Add DNS Record
|
||||||
|
|
||||||
|
In Technitium DNS (http://192.168.2.130:5380):
|
||||||
|
- Add A record: `posseparty.bendtstudio.com` → `192.168.2.130`
|
||||||
|
|
||||||
|
### 5. Access Posse-Party
|
||||||
|
|
||||||
|
Once deployed, visit: https://posseparty.bendtstudio.com
|
||||||
|
|
||||||
|
The first user to register will become the admin.
|
||||||
|
|
||||||
|
## Services Included
|
||||||
|
|
||||||
|
- **web**: Rails application server (port 3000)
|
||||||
|
- **worker**: Background job processor (Solid Queue)
|
||||||
|
- **db**: PostgreSQL database
|
||||||
|
- **migrate**: One-time database migration service
|
||||||
|
|
||||||
|
## Maintenance
|
||||||
|
|
||||||
|
### Backup Database
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh tim@192.168.2.18
|
||||||
|
cd /path/to/posse-party
|
||||||
|
docker exec posse-party-db-1 pg_dump -U postgres posse_party > backup-$(date +%Y%m%d).sql
|
||||||
|
```
|
||||||
|
|
||||||
|
### View Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh tim@192.168.2.18
|
||||||
|
docker logs posse-party-web-1 -f
|
||||||
|
docker logs posse-party-worker-1 -f
|
||||||
|
```
|
||||||
|
|
||||||
|
### Update to Latest Version
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh tim@192.168.2.18
|
||||||
|
cd /path/to/posse-party
|
||||||
|
docker compose pull
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Details
|
||||||
|
|
||||||
|
### Email Setup (Optional but Recommended)
|
||||||
|
|
||||||
|
Posse-Party can send emails for:
|
||||||
|
- Account verification
|
||||||
|
- Password resets
|
||||||
|
- Notifications
|
||||||
|
|
||||||
|
See the `.env.example` file for supported providers (SendGrid, Mailgun, etc.)
|
||||||
|
|
||||||
|
### OAuth Setup (Optional)
|
||||||
|
|
||||||
|
Enable social login by configuring OAuth providers:
|
||||||
|
1. Create OAuth app at the provider (GitHub, Google, etc.)
|
||||||
|
2. Add callback URL: `https://posseparty.bendtstudio.com/auth/<provider>/callback`
|
||||||
|
3. Copy Client ID and Secret to `.env`
|
||||||
|
|
||||||
|
### S3/MinIO Storage (Optional)
|
||||||
|
|
||||||
|
By default, uploads are stored locally. To use S3 or MinIO:
|
||||||
|
1. Uncomment S3 configuration in `.env`
|
||||||
|
2. Set your credentials and bucket name
|
||||||
|
3. Redeploy
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Service Won't Start
|
||||||
|
|
||||||
|
Check logs:
|
||||||
|
```bash
|
||||||
|
docker service logs posse-party-web-1 --tail 50
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Connection Issues
|
||||||
|
|
||||||
|
Verify database is healthy:
|
||||||
|
```bash
|
||||||
|
docker ps | grep posse-party-db
|
||||||
|
docker logs posse-party-db-1 --tail 20
|
||||||
|
```
|
||||||
|
|
||||||
|
### HTTPS Not Working
|
||||||
|
|
||||||
|
1. Check DNS record is correct
|
||||||
|
2. Verify Traefik labels in compose file
|
||||||
|
3. Check Traefik dashboard: http://192.168.2.130:8080
|
||||||
|
|
||||||
|
## Resources
|
||||||
|
|
||||||
|
- **Official Repo**: https://github.com/searlsco/posse_party
|
||||||
|
- **Documentation**: https://github.com/searlsco/posse_party/tree/main/docs
|
||||||
|
- **Website**: https://posseparty.com
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For Posse-Party specific issues:
|
||||||
|
- GitHub Issues: https://github.com/searlsco/posse_party/issues
|
||||||
|
|
||||||
|
For deployment issues with your homelab:
|
||||||
|
- Check your Dokploy logs
|
||||||
|
- Review Traefik routing at http://192.168.2.130:8080
|
||||||
290
QUICKSTART.md
Normal file
290
QUICKSTART.md
Normal file
@@ -0,0 +1,290 @@
|
|||||||
|
# Home Lab Quick Start Guide
|
||||||
|
|
||||||
|
**Welcome to your documented homelab!** This guide will help you manage and maintain your cluster.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🎯 What's Been Set Up
|
||||||
|
|
||||||
|
✅ **Cluster Audit Complete** - All services documented
|
||||||
|
✅ **Memos Fixed** - Running at https://memos.bendtstudio.com
|
||||||
|
✅ **WebStatic Scaled** - Down to 1 replica, stable
|
||||||
|
✅ **Documentation Created** - Service catalog and guides
|
||||||
|
✅ **Backup Scripts** - Automated backup tools ready
|
||||||
|
✅ **Infrastructure Repo** - All configs in version control
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🚀 Quick Access
|
||||||
|
|
||||||
|
### Your Services (via HTTPS)
|
||||||
|
|
||||||
|
| Service | URL | Status |
|
||||||
|
|---------|-----|--------|
|
||||||
|
| **Memos** | https://memos.bendtstudio.com | ✅ Working |
|
||||||
|
| **Gitea** | https://gitea.bendtstudio.com:3000 | ✅ Git server |
|
||||||
|
| **BewCloud** | Check Traefik routes | ✅ Cloud storage |
|
||||||
|
| **Immich** | Check Traefik routes | ✅ Photos |
|
||||||
|
| **SilverBullet** | Check Traefik routes | ✅ Notes |
|
||||||
|
|
||||||
|
### Management Interfaces
|
||||||
|
|
||||||
|
| Tool | URL | Use For |
|
||||||
|
|------|-----|---------|
|
||||||
|
| **Dokploy** | http://192.168.2.130:3000 | Deploy containers |
|
||||||
|
| **Swarmpit** | http://192.168.2.130:888 | Monitor cluster |
|
||||||
|
| **Traefik** | http://192.168.2.130:8080 | View routes |
|
||||||
|
| **MinIO** | http://192.168.2.18:9001 | Object storage |
|
||||||
|
| **DNS** | http://192.168.2.130:5380 | Manage DNS |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📁 Important Files
|
||||||
|
|
||||||
|
### In This Repo
|
||||||
|
|
||||||
|
```
|
||||||
|
cloud-compose/
|
||||||
|
├── HOMELAB_AUDIT.md # Complete cluster documentation
|
||||||
|
├── SERVICE_CATALOG.md # All services documented
|
||||||
|
├── ACTION_PLAN.md # Step-by-step improvement plan
|
||||||
|
├── QUICK_REFERENCE.md # Quick access commands
|
||||||
|
├── memos-compose.yml # Fixed memos configuration
|
||||||
|
├── infrastructure/ # Backup scripts & configs
|
||||||
|
│ ├── README.md # How to use backups
|
||||||
|
│ ├── scripts/ # Backup automation
|
||||||
|
│ │ ├── backup-compose-files.sh
|
||||||
|
│ │ └── backup-critical-data.sh
|
||||||
|
│ └── compose/ # Backed up compose files
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔄 Regular Maintenance
|
||||||
|
|
||||||
|
### Daily (Recommended)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check cluster health
|
||||||
|
ssh ubuntu@192.168.2.130 "docker service ls"
|
||||||
|
|
||||||
|
# Quick backup
|
||||||
|
./infrastructure/scripts/backup-compose-files.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Weekly
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full backup
|
||||||
|
./infrastructure/scripts/backup-critical-data.sh
|
||||||
|
|
||||||
|
# Review Swarmpit dashboard
|
||||||
|
open http://192.168.2.130:888
|
||||||
|
|
||||||
|
# Commit changes to git
|
||||||
|
git add .
|
||||||
|
git commit -m "Weekly backup $(date +%Y-%m-%d)"
|
||||||
|
git push
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🛠️ Common Tasks
|
||||||
|
|
||||||
|
### Deploy a New Service
|
||||||
|
|
||||||
|
1. Create `docker-compose.yml` file
|
||||||
|
2. Add Traefik labels for HTTPS:
|
||||||
|
```yaml
|
||||||
|
labels:
|
||||||
|
- traefik.http.routers.myapp.rule=Host(`myapp.bendtstudio.com`)
|
||||||
|
- traefik.http.routers.myapp.tls.certresolver=letsencrypt
|
||||||
|
- traefik.enable=true
|
||||||
|
```
|
||||||
|
3. Upload via Dokploy UI
|
||||||
|
4. Deploy
|
||||||
|
5. Add DNS record in Technitium DNS
|
||||||
|
6. Commit compose file to this repo
|
||||||
|
|
||||||
|
### Backup Everything
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run both backup scripts
|
||||||
|
cd infrastructure/scripts
|
||||||
|
./backup-compose-files.sh
|
||||||
|
./backup-critical-data.sh
|
||||||
|
|
||||||
|
# Commit to git
|
||||||
|
cd ../..
|
||||||
|
git add infrastructure/
|
||||||
|
git commit -m "Backup $(date +%Y-%m-%d)"
|
||||||
|
git push
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Service Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View logs
|
||||||
|
ssh ubuntu@192.168.2.130 "docker service logs <service-name> --tail 50 -f"
|
||||||
|
|
||||||
|
# Common services:
|
||||||
|
# - bewcloud-memos-ssogxn-memos
|
||||||
|
# - bendtstudio-webstatic-iq9evl
|
||||||
|
# - dokploy
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restart a Service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Via SSH
|
||||||
|
ssh ubuntu@192.168.2.130 "docker service update --force <service-name>"
|
||||||
|
|
||||||
|
# Or via Dokploy UI
|
||||||
|
open http://192.168.2.130:3000
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🆘 Troubleshooting
|
||||||
|
|
||||||
|
### Service Won't Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check logs
|
||||||
|
ssh ubuntu@192.168.2.130 "docker service logs <service-name> --tail 100"
|
||||||
|
|
||||||
|
# Common fixes:
|
||||||
|
# 1. Permission issues (like memos had)
|
||||||
|
# 2. Missing environment variables
|
||||||
|
# 3. Port conflicts
|
||||||
|
# 4. Image not found
|
||||||
|
```
|
||||||
|
|
||||||
|
### Can't Access Website
|
||||||
|
|
||||||
|
1. Check if container is running:
|
||||||
|
```bash
|
||||||
|
ssh ubuntu@192.168.2.130 "docker ps | grep <service>"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Check Traefik routes:
|
||||||
|
```bash
|
||||||
|
open http://192.168.2.130:8080
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Check DNS:
|
||||||
|
```bash
|
||||||
|
dig memos.bendtstudio.com @192.168.2.130
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Issues
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check database container
|
||||||
|
ssh ubuntu@192.168.2.130 "docker ps | grep postgres"
|
||||||
|
|
||||||
|
# Check database logs
|
||||||
|
ssh ubuntu@192.168.2.130 "docker service logs dokploy-postgres --tail 50"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📊 Current Status
|
||||||
|
|
||||||
|
### Nodes
|
||||||
|
- **tpi-n1** (192.168.2.130) - Manager ✅
|
||||||
|
- **tpi-n2** (192.168.2.19) - Worker ✅
|
||||||
|
- **node-nas** (192.168.2.18) - Storage ✅
|
||||||
|
|
||||||
|
### Running Services
|
||||||
|
```bash
|
||||||
|
# Check all services
|
||||||
|
ssh ubuntu@192.168.2.130 "docker service ls"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Known Issues
|
||||||
|
- **Syncthing** - Stopped (0 replicas), start when needed:
|
||||||
|
```bash
|
||||||
|
docker service scale syncthing=1
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📚 Documentation
|
||||||
|
|
||||||
|
- **Service Catalog** - `SERVICE_CATALOG.md`
|
||||||
|
- **Infrastructure Guide** - `infrastructure/README.md`
|
||||||
|
- **Audit Report** - `HOMELAB_AUDIT.md`
|
||||||
|
- **Action Plan** - `ACTION_PLAN.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🎓 Learning Resources
|
||||||
|
|
||||||
|
### Docker Swarm
|
||||||
|
- Official Docs: https://docs.docker.com/engine/swarm/
|
||||||
|
- Service Management: `docker service --help`
|
||||||
|
|
||||||
|
### Dokploy
|
||||||
|
- Documentation: https://dokploy.com/docs/
|
||||||
|
- UI at: http://192.168.2.130:3000
|
||||||
|
|
||||||
|
### Traefik
|
||||||
|
- Routing: http://192.168.2.130:8080/dashboard/
|
||||||
|
- Labels Reference: https://doc.traefik.io/traefik/routing/providers/docker/
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📞 Quick Commands Reference
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# SSH to nodes
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.130 # Controller
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.19 # Worker
|
||||||
|
ssh tim@192.168.2.18 # NAS
|
||||||
|
|
||||||
|
# Check cluster
|
||||||
|
docker node ls
|
||||||
|
docker service ls
|
||||||
|
docker stack ls
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
docker service logs <service> --tail 100 -f
|
||||||
|
|
||||||
|
# Scale service
|
||||||
|
docker service scale <service>=<replicas>
|
||||||
|
|
||||||
|
# Update service
|
||||||
|
docker service update --force <service>
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ✅ Next Steps
|
||||||
|
|
||||||
|
### High Priority (This Week)
|
||||||
|
- [ ] Test backup scripts
|
||||||
|
- [ ] Set up automated daily backups (cron)
|
||||||
|
- [ ] Verify all services have working HTTPS
|
||||||
|
- [ ] Add missing compose files to version control
|
||||||
|
|
||||||
|
### Medium Priority (This Month)
|
||||||
|
- [ ] Create disaster recovery procedures
|
||||||
|
- [ ] Set up monitoring alerts
|
||||||
|
- [ ] Document all environment variables
|
||||||
|
- [ ] Test restore procedures
|
||||||
|
|
||||||
|
### Low Priority (When Time)
|
||||||
|
- [ ] Set up Prometheus/Grafana
|
||||||
|
- [ ] Create network diagrams
|
||||||
|
- [ ] Automate SSL certificate renewal checks
|
||||||
|
- [ ] Security audit
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Questions or issues?** Check the detailed documentation in:
|
||||||
|
- `HOMELAB_AUDIT.md` - Complete technical details
|
||||||
|
- `SERVICE_CATALOG.md` - Service-specific information
|
||||||
|
- `infrastructure/README.md` - Backup and management guide
|
||||||
|
|
||||||
|
*Last Updated: February 9, 2026*
|
||||||
34
QUICK_REFERENCE.md
Normal file
34
QUICK_REFERENCE.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
## Cluster Access
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# SSH to controller
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.130
|
||||||
|
|
||||||
|
# SSH to worker
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.19
|
||||||
|
|
||||||
|
# SSH to NAS node
|
||||||
|
ssh tim@192.168.2.18
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dokploy
|
||||||
|
|
||||||
|
Configured to deploy across the nodes in the cluster. The nas node has access to a NFS share at <????>
|
||||||
|
|
||||||
|
Dokploy has an s3 compatible storage which is pointing to minio on the nas node. This is used for backups.
|
||||||
|
|
||||||
|
## Minio
|
||||||
|
|
||||||
|
Minio is installed on the nas node. It is used for backups of the dokploy database and for storing the dokploy s3 compatible storage.
|
||||||
|
|
||||||
|
## Traefik
|
||||||
|
|
||||||
|
Traefik is installed on the controller node. It is used to route traffic to the various services on the cluster.
|
||||||
|
|
||||||
|
## Gitea
|
||||||
|
|
||||||
|
Used for storing all the compose and stack yml files for each service.
|
||||||
|
|
||||||
|
## Technitium DNS
|
||||||
|
|
||||||
|
Using internal DNS requires configuring the docker DNS to point to the Technitium DNS server.
|
||||||
103
SETUP.md
Normal file
103
SETUP.md
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
Dokploy + Docker Swarm Homelab Setup Instructions
|
||||||
|
This guide walks through setting up a fresh, multi-node Docker Swarm cluster using Dokploy for quick web app deployment and easy hosting of infrastructure services (like Pi-hole and Minio), including shared storage via NFS from your NAS node.
|
||||||
|
|
||||||
|
1. Prepare Environment
|
||||||
|
• Choose a primary node (can be any capable Linux server).
|
||||||
|
• Identify your NAS node (high capacity storage).
|
||||||
|
• Gather all SSH credentials.
|
||||||
|
• Ensure all nodes have Docker installed (curl -fsSL https://get.docker.com | sh).
|
||||||
|
|
||||||
|
2. Initialize Docker Swarm Cluster
|
||||||
|
On your primary node:
|
||||||
|
docker swarm init --advertise-addr <PRIMARY_NODE_IP>
|
||||||
|
On each additional node:
|
||||||
|
• Run the join command given by the previous step, e.g.:
|
||||||
|
docker swarm join --token <TOKEN> <PRIMARY_NODE_IP>:2377
|
||||||
|
|
||||||
|
3. Label Nodes for Placement Constraints
|
||||||
|
On your primary node, label nodes:
|
||||||
|
docker node update --label-add role=storage nas-node-01
|
||||||
|
docker node update --label-add storage=high nas-node-01
|
||||||
|
docker node update --label-add role=compute node-light-01
|
||||||
|
docker node update --label-add infra=true nas-node-01
|
||||||
|
(Replace node names as appropriate)
|
||||||
|
|
||||||
|
4. Set Up Dokploy
|
||||||
|
On primary node:
|
||||||
|
curl -sSL https://dokploy.com/install.sh | sh
|
||||||
|
• Dokploy UI will be available on port 8080.
|
||||||
|
• Default credentials: admin / admin (change ASAP).
|
||||||
|
|
||||||
|
5. Set Up Shared NFS Storage from Your NAS
|
||||||
|
On your NAS node:
|
||||||
|
• Install NFS server (Debian/Ubuntu):
|
||||||
|
sudo apt install nfs-kernel-server
|
||||||
|
• Export a directory:
|
||||||
|
o Edit /etc/exports, add:
|
||||||
|
/mnt/storage/docker-data *(rw,sync,no_subtree_check)
|
||||||
|
o Restart NFS:
|
||||||
|
sudo exportfs -ra
|
||||||
|
sudo systemctl restart nfs-kernel-server
|
||||||
|
|
||||||
|
6. Create Shared NFS Volume in Docker
|
||||||
|
On the manager node:
|
||||||
|
docker volume create
|
||||||
|
--driver local
|
||||||
|
--opt type=nfs
|
||||||
|
--opt o=addr=<NAS_IP>,rw,nolock,nfsvers=4
|
||||||
|
--opt device=:/mnt/storage/docker-data
|
||||||
|
shared-data
|
||||||
|
(Replace <NAS_IP> with your NAS's address.)
|
||||||
|
|
||||||
|
7. Deploy Apps with Dokploy + Placement Constraints
|
||||||
|
• Use Dokploy UI to:
|
||||||
|
o Deploy your web apps (Node.js, PHP, static sites)
|
||||||
|
o Set replica counts (scaling)
|
||||||
|
o Pin infrastructure apps (like Pi-hole or Minio) to the NAS node via placement constraints.
|
||||||
|
o Use the shared NFS volume for persistent data.
|
||||||
|
Example Docker Compose snippet for Pinning:
|
||||||
|
services:
|
||||||
|
pihole:
|
||||||
|
image: pihole/pihole
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.role==storage
|
||||||
|
volumes:
|
||||||
|
- shared-data:/etc/pihole
|
||||||
|
|
||||||
|
8. (Optional) Set Up Minio (S3-Compatible Storage)
|
||||||
|
• Deploy Minio with Dokploy, pin it to your NAS, and use shared volume for data:
|
||||||
|
services:
|
||||||
|
minio:
|
||||||
|
image: minio/minio
|
||||||
|
command: server /data --console-address ":9001"
|
||||||
|
environment:
|
||||||
|
MINIO_ROOT_USER: admin
|
||||||
|
MINIO_ROOT_PASSWORD: changeme123
|
||||||
|
volumes:
|
||||||
|
- shared-data:/data
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.role==storage
|
||||||
|
ports:
|
||||||
|
- "9000:9000"
|
||||||
|
- "9001:9001"
|
||||||
|
|
||||||
|
9. Add Web Apps and Experiment!
|
||||||
|
• Use Dokploy's UI to connect to your Gitea instance, auto-deploy repos, and experiment rapidly.
|
||||||
|
• Traefik integration and SSL setup is handled automatically in Dokploy.
|
||||||
|
|
||||||
|
10. Restore K3s (Optional, Later)
|
||||||
|
• Your original K3s manifests are saved in git—just reapply if you wish to revert:
|
||||||
|
k3s server
|
||||||
|
kubectl apply -f <your-manifests>
|
||||||
|
|
||||||
|
References
|
||||||
|
• Docker Swarm Docs: https://docs.docker.com/engine/swarm/
|
||||||
|
• Dokploy Docs: https://dokploy.com/docs/
|
||||||
|
• Docker Volumes: https://docs.docker.com/engine/storage/volumes/
|
||||||
|
• NFS on Linux: https://help.ubuntu.com/community/NFS
|
||||||
|
|
||||||
|
This guide gives you a fast start for a declarative, multi-node homelab with web app simplicity and infrastructure reliability using Dokploy and Docker Swarm!
|
||||||
@@ -7,23 +7,32 @@ services:
|
|||||||
POSTGRES_USER: immich
|
POSTGRES_USER: immich
|
||||||
volumes:
|
volumes:
|
||||||
- immich-postgres:/var/lib/postgresql/data
|
- immich-postgres:/var/lib/postgresql/data
|
||||||
|
networks:
|
||||||
|
- dokploy-network
|
||||||
deploy:
|
deploy:
|
||||||
restart_policy:
|
restart_policy:
|
||||||
condition: on-failure
|
condition: on-failure
|
||||||
|
|
||||||
immich-machine-learning:
|
immich-machine-learning:
|
||||||
image: ghcr.io/immich-app/immich-machine-learning:v1.106.4
|
image: ghcr.io/immich-app/immich-machine-learning:v2.3.0
|
||||||
volumes:
|
volumes:
|
||||||
- immich-model-cache:/cache
|
- immich-model-cache:/cache
|
||||||
|
networks:
|
||||||
|
- dokploy-network
|
||||||
deploy:
|
deploy:
|
||||||
restart_policy:
|
restart_policy:
|
||||||
condition: on-failure
|
condition: on-failure
|
||||||
|
|
||||||
immich-redis:
|
immich-redis:
|
||||||
image: redis:6.2-alpine
|
image: redis:6.2-alpine
|
||||||
volumes:
|
volumes:
|
||||||
- immich-redis-data:/data
|
- immich-redis-data:/data
|
||||||
|
networks:
|
||||||
|
- dokploy-network
|
||||||
deploy:
|
deploy:
|
||||||
restart_policy:
|
restart_policy:
|
||||||
condition: on-failure
|
condition: on-failure
|
||||||
|
|
||||||
immich-server:
|
immich-server:
|
||||||
image: ghcr.io/immich-app/immich-server:v2.3.0
|
image: ghcr.io/immich-app/immich-server:v2.3.0
|
||||||
environment:
|
environment:
|
||||||
@@ -37,20 +46,45 @@ services:
|
|||||||
REDIS_HOSTNAME: immich-redis
|
REDIS_HOSTNAME: immich-redis
|
||||||
REDIS_PORT: '6379'
|
REDIS_PORT: '6379'
|
||||||
SERVER_URL: https://immich.bendtstudio.com
|
SERVER_URL: https://immich.bendtstudio.com
|
||||||
|
IMMICH_HOST: 0.0.0.0
|
||||||
volumes:
|
volumes:
|
||||||
- /mnt/synology-data/immich:/usr/src/app/upload
|
- /mnt/synology-data/immich:/usr/src/app/upload
|
||||||
- /mnt/photos:/var/photos
|
- /mnt/photos:/var/photos
|
||||||
- /etc/localtime:/etc/localtime:ro
|
- /etc/localtime:/etc/localtime:ro
|
||||||
|
networks:
|
||||||
|
- dokploy-network
|
||||||
deploy:
|
deploy:
|
||||||
restart_policy:
|
restart_policy:
|
||||||
condition: on-failure
|
condition: on-failure
|
||||||
|
delay: 10s
|
||||||
|
max_attempts: 10
|
||||||
placement:
|
placement:
|
||||||
constraints:
|
constraints:
|
||||||
- node.labels.type == nas
|
- node.labels.type == nas
|
||||||
|
labels:
|
||||||
|
- traefik.enable=true
|
||||||
|
- traefik.http.routers.immich-web.rule=Host(`immich.bendtstudio.com`)
|
||||||
|
- traefik.http.routers.immich-web.entrypoints=web
|
||||||
|
- traefik.http.services.immich-web.loadbalancer.server.port=2283
|
||||||
|
- traefik.http.routers.immich-web.service=immich-web
|
||||||
|
- traefik.http.routers.immich-web.middlewares=redirect-to-https@file
|
||||||
|
- traefik.http.routers.immich-websecure.rule=Host(`immich.bendtstudio.com`)
|
||||||
|
- traefik.http.routers.immich-websecure.entrypoints=websecure
|
||||||
|
- traefik.http.services.immich-websecure.loadbalancer.server.port=2283
|
||||||
|
- traefik.http.routers.immich-websecure.service=immich-websecure
|
||||||
|
- traefik.http.routers.immich-websecure.tls.certresolver=letsencrypt
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:2283/api/server/ping"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 20s
|
||||||
|
retries: 10
|
||||||
|
start_period: 120s
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
immich-redis-data:
|
immich-redis-data:
|
||||||
immich-postgres:
|
immich-postgres:
|
||||||
immich-model-cache:
|
immich-model-cache:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
dokploy-network:
|
||||||
|
external: true
|
||||||
|
|||||||
190
infrastructure/README.md
Normal file
190
infrastructure/README.md
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
# Infrastructure Management
|
||||||
|
|
||||||
|
This directory contains backups, scripts, and documentation for managing the homelab infrastructure.
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
infrastructure/
|
||||||
|
├── compose/ # Docker Compose files backed up from cluster
|
||||||
|
├── stacks/ # Docker Stack definitions
|
||||||
|
├── traefik/ # Traefik configuration backups
|
||||||
|
├── scripts/ # Management and backup scripts
|
||||||
|
├── backups/ # Critical data backups (created by scripts)
|
||||||
|
└── BACKUP_MANIFEST.md # Auto-generated backup manifest
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### 1. Backup Compose Files
|
||||||
|
|
||||||
|
Run this to back up all compose configurations from the cluster:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/backup-compose-files.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This will:
|
||||||
|
- Copy all Dokploy compose files from `/etc/dokploy/compose/`
|
||||||
|
- Copy Traefik configuration
|
||||||
|
- Copy stack files
|
||||||
|
- Generate a backup manifest
|
||||||
|
|
||||||
|
### 2. Backup Critical Data
|
||||||
|
|
||||||
|
Run this to back up databases and application data:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/backup-critical-data.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This will:
|
||||||
|
- Backup PostgreSQL databases (Dokploy, Immich, BewCloud)
|
||||||
|
- Backup MariaDB databases (Pancake)
|
||||||
|
- Backup application volumes (Memos, Gitea)
|
||||||
|
- Clean up old backups (30+ days)
|
||||||
|
- Generate a backup report
|
||||||
|
|
||||||
|
## Automated Backups
|
||||||
|
|
||||||
|
### Set up cron jobs on your local machine:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Edit crontab
|
||||||
|
crontab -e
|
||||||
|
|
||||||
|
# Add these lines:
|
||||||
|
# Backup compose files daily at 2 AM
|
||||||
|
0 2 * * * cd /Users/timothy.bendt/developer/cloud-compose/infrastructure/scripts && ./backup-compose-files.sh >> /var/log/homelab-backup.log 2>&1
|
||||||
|
|
||||||
|
# Backup critical data daily at 3 AM
|
||||||
|
0 3 * * * cd /Users/timothy.bendt/developer/cloud-compose/infrastructure/scripts && ./backup-critical-data.sh >> /var/log/homelab-backup.log 2>&1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Or run manually whenever you make changes:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# After modifying any service
|
||||||
|
cd /Users/timothy.bendt/developer/cloud-compose
|
||||||
|
./infrastructure/scripts/backup-compose-files.sh
|
||||||
|
git add infrastructure/
|
||||||
|
git commit -m "Backup infrastructure configs"
|
||||||
|
git push
|
||||||
|
```
|
||||||
|
|
||||||
|
## Restore Procedures
|
||||||
|
|
||||||
|
### Restore a Compose File
|
||||||
|
|
||||||
|
1. Copy the compose file from `infrastructure/compose/<project>/docker-compose.yml`
|
||||||
|
2. Upload via Dokploy UI
|
||||||
|
3. Deploy
|
||||||
|
|
||||||
|
### Restore a Database
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: Restore Dokploy database
|
||||||
|
scp infrastructure/backups/dokploy-postgres-dokploy-2026-02-09.sql ubuntu@192.168.2.130:/tmp/
|
||||||
|
ssh ubuntu@192.168.2.130 "docker exec -i dokploy-postgres.1.<container-id> psql -U postgres dokploy < /tmp/dokploy-postgres-dokploy-2026-02-09.sql"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restore Volume Data
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: Restore Memos data
|
||||||
|
scp infrastructure/backups/bewcloud-memos-ssogxn-memos-data-2026-02-09.tar.gz ubuntu@192.168.2.130:/tmp/
|
||||||
|
ssh ubuntu@192.168.2.130 "docker run --rm -v bewcloud-memos-ssogxn_memos_data:/data -v /tmp:/backup alpine sh -c 'cd /data && tar xzf /backup/bewcloud-memos-ssogxn-memos-data-2026-02-09.tar.gz'"
|
||||||
|
```
|
||||||
|
|
||||||
|
## SSH Access
|
||||||
|
|
||||||
|
### Controller (tpi-n1)
|
||||||
|
```bash
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.130
|
||||||
|
```
|
||||||
|
|
||||||
|
### Worker (tpi-n2)
|
||||||
|
```bash
|
||||||
|
ssh -i ~/.ssh/id_ed25519 ubuntu@192.168.2.19
|
||||||
|
```
|
||||||
|
|
||||||
|
### NAS (node-nas)
|
||||||
|
```bash
|
||||||
|
ssh tim@192.168.2.18
|
||||||
|
```
|
||||||
|
|
||||||
|
## Useful Commands
|
||||||
|
|
||||||
|
### Check Service Status
|
||||||
|
```bash
|
||||||
|
ssh ubuntu@192.168.2.130 "docker service ls"
|
||||||
|
```
|
||||||
|
|
||||||
|
### View Service Logs
|
||||||
|
```bash
|
||||||
|
ssh ubuntu@192.168.2.130 "docker service logs <service-name> --tail 100 -f"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scale a Service
|
||||||
|
```bash
|
||||||
|
ssh ubuntu@192.168.2.130 "docker service scale <service-name>=<replicas>"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Node Status
|
||||||
|
```bash
|
||||||
|
ssh ubuntu@192.168.2.130 "docker node ls"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Web Interfaces
|
||||||
|
|
||||||
|
| Service | URL | Purpose |
|
||||||
|
|---------|-----|---------|
|
||||||
|
| Dokploy | http://192.168.2.130:3000 | Container management |
|
||||||
|
| Swarmpit | http://192.168.2.130:888 | Swarm monitoring |
|
||||||
|
| Traefik | http://192.168.2.130:8080 | Reverse proxy dashboard |
|
||||||
|
| MinIO | http://192.168.2.18:9001 | Object storage console |
|
||||||
|
|
||||||
|
## Backup Storage
|
||||||
|
|
||||||
|
### Local
|
||||||
|
Backups are stored in `infrastructure/backups/` with date stamps.
|
||||||
|
|
||||||
|
### Offsite (Recommended)
|
||||||
|
Consider copying backups to:
|
||||||
|
- MinIO bucket (`backups/`)
|
||||||
|
- External hard drive
|
||||||
|
- Cloud storage (AWS S3, etc.)
|
||||||
|
- Another server
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```bash
|
||||||
|
# Copy to MinIO
|
||||||
|
mc cp infrastructure/backups/* minio/backups/
|
||||||
|
```
|
||||||
|
|
||||||
|
## Maintenance Checklist
|
||||||
|
|
||||||
|
### Daily
|
||||||
|
- [ ] Check backup logs for errors
|
||||||
|
- [ ] Verify critical services are running
|
||||||
|
|
||||||
|
### Weekly
|
||||||
|
- [ ] Review Swarmpit dashboard
|
||||||
|
- [ ] Check disk usage on all nodes
|
||||||
|
- [ ] Review backup integrity
|
||||||
|
|
||||||
|
### Monthly
|
||||||
|
- [ ] Test restore procedures
|
||||||
|
- [ ] Update documentation
|
||||||
|
- [ ] Review and update services
|
||||||
|
- [ ] Clean up unused images/volumes
|
||||||
|
|
||||||
|
### Quarterly
|
||||||
|
- [ ] Full disaster recovery drill
|
||||||
|
- [ ] Security audit
|
||||||
|
- [ ] Update base images
|
||||||
|
- [ ] Review access controls
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Infrastructure Management Guide - February 2026*
|
||||||
78
infrastructure/scripts/backup-compose-files.sh
Executable file
78
infrastructure/scripts/backup-compose-files.sh
Executable file
@@ -0,0 +1,78 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# backup-compose-files.sh
|
||||||
|
# Backs up all compose files from the cluster to local repo
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
BACKUP_DIR="/Users/timothy.bendt/developer/cloud-compose/infrastructure"
|
||||||
|
CONTROLLER="ubuntu@192.168.2.130"
|
||||||
|
DATE=$(date +%Y-%m-%d)
|
||||||
|
|
||||||
|
echo "📦 Backing up compose files from cluster..."
|
||||||
|
echo "Date: $DATE"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Backup Dokploy compose files
|
||||||
|
echo "📁 Backing up Dokploy compose files..."
|
||||||
|
ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "find /etc/dokploy/compose -name 'docker-compose.yml' -type f" | while read file; do
|
||||||
|
# Get the project name from the path
|
||||||
|
project=$(echo $file | grep -oP 'compose/\K[^/]+' | head -1)
|
||||||
|
if [ ! -z "$project" ]; then
|
||||||
|
echo " - $project"
|
||||||
|
mkdir -p "$BACKUP_DIR/compose/$project"
|
||||||
|
scp -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:"$file" "$BACKUP_DIR/compose/$project/"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Backup Traefik configuration
|
||||||
|
echo "📁 Backing up Traefik configuration..."
|
||||||
|
mkdir -p "$BACKUP_DIR/traefik"
|
||||||
|
scp -r -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:/etc/dokploy/traefik/* "$BACKUP_DIR/traefik/" 2>/dev/null || echo " (No Traefik config or permission denied)"
|
||||||
|
|
||||||
|
# Backup local compose files from home directory
|
||||||
|
echo "📁 Backing up local compose files..."
|
||||||
|
scp -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:~/minio-stack.yml "$BACKUP_DIR/stacks/" 2>/dev/null || true
|
||||||
|
|
||||||
|
# Create manifest
|
||||||
|
echo "📄 Creating backup manifest..."
|
||||||
|
cat > "$BACKUP_DIR/BACKUP_MANIFEST.md" << EOF
|
||||||
|
# Infrastructure Backup Manifest
|
||||||
|
|
||||||
|
**Backup Date:** $DATE
|
||||||
|
**Source:** $CONTROLLER
|
||||||
|
|
||||||
|
## Contents
|
||||||
|
|
||||||
|
### Compose Files
|
||||||
|
All Docker Compose files from Dokploy-managed projects:
|
||||||
|
\`\`\`
|
||||||
|
$(ls -1 $BACKUP_DIR/compose/)
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
### Stack Files
|
||||||
|
Standalone stack definitions:
|
||||||
|
\`\`\`
|
||||||
|
$(ls -1 $BACKUP_DIR/stacks/ 2>/dev/null || echo "None")
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
### Traefik Configuration
|
||||||
|
Reverse proxy configuration backed up to ./traefik/
|
||||||
|
|
||||||
|
## Restoration
|
||||||
|
To restore these configurations:
|
||||||
|
1. Copy compose files to /etc/dokploy/compose/<project>/code/
|
||||||
|
2. Copy stack files to ~/ on the controller
|
||||||
|
3. Copy Traefik config to /etc/dokploy/traefik/
|
||||||
|
4. Redeploy via Dokploy UI or docker stack deploy
|
||||||
|
|
||||||
|
## Services Backed Up
|
||||||
|
$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker service ls --format '- {{.Name}}'" 2>/dev/null || echo "Unable to fetch service list")
|
||||||
|
|
||||||
|
---
|
||||||
|
Generated: $(date)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Backup complete!"
|
||||||
|
echo "📁 Location: $BACKUP_DIR/"
|
||||||
|
echo "📄 Manifest: $BACKUP_DIR/BACKUP_MANIFEST.md"
|
||||||
139
infrastructure/scripts/backup-critical-data.sh
Executable file
139
infrastructure/scripts/backup-critical-data.sh
Executable file
@@ -0,0 +1,139 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# backup-critical-data.sh
|
||||||
|
# Backs up critical databases and data from the homelab
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
BACKUP_DIR="/Users/timothy.bendt/developer/cloud-compose/backups"
|
||||||
|
CONTROLLER="ubuntu@192.168.2.130"
|
||||||
|
NAS="tim@192.168.2.18"
|
||||||
|
DATE=$(date +%Y-%m-%d)
|
||||||
|
RETENTION_DAYS=30
|
||||||
|
|
||||||
|
echo "💾 Starting critical data backup..."
|
||||||
|
echo "Date: $DATE"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
mkdir -p "$BACKUP_DIR"
|
||||||
|
|
||||||
|
# Function to backup a PostgreSQL database
|
||||||
|
backup_postgres() {
|
||||||
|
local service=$1
|
||||||
|
local db_name=$2
|
||||||
|
local output_file="$BACKUP_DIR/${service}-${db_name}-${DATE}.sql"
|
||||||
|
|
||||||
|
echo " 📊 Backing up PostgreSQL: $service/$db_name..."
|
||||||
|
|
||||||
|
# Find the container
|
||||||
|
container=$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker ps -q -f name=$service" | head -1)
|
||||||
|
|
||||||
|
if [ ! -z "$container" ]; then
|
||||||
|
ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker exec $container pg_dump -U postgres $db_name" > "$output_file"
|
||||||
|
echo " ✅ Saved to: $output_file"
|
||||||
|
else
|
||||||
|
echo " ❌ Container not found: $service"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to backup a MariaDB database
|
||||||
|
backup_mariadb() {
|
||||||
|
local service=$1
|
||||||
|
local db_name=$2
|
||||||
|
local output_file="$BACKUP_DIR/${service}-${db_name}-${DATE}.sql"
|
||||||
|
|
||||||
|
echo " 📊 Backing up MariaDB: $service/$db_name..."
|
||||||
|
|
||||||
|
container=$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker ps -q -f name=$service" | head -1)
|
||||||
|
|
||||||
|
if [ ! -z "$container" ]; then
|
||||||
|
ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker exec $container mariadb-dump -u root -p'3QU5eA&U^Y&3DQm6' $db_name" > "$output_file" 2>/dev/null || \
|
||||||
|
echo " ⚠️ Could not backup (check credentials)"
|
||||||
|
else
|
||||||
|
echo " ❌ Container not found: $service"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to backup volume data
|
||||||
|
backup_volume() {
|
||||||
|
local service=$1
|
||||||
|
local volume=$2
|
||||||
|
local output_file="$BACKUP_DIR/${service}-data-${DATE}.tar.gz"
|
||||||
|
|
||||||
|
echo " 💿 Backing up volume: $volume..."
|
||||||
|
|
||||||
|
ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker run --rm -v $volume:/data -v /tmp:/backup alpine tar czf /backup/${service}-backup.tar.gz -C /data ."
|
||||||
|
scp -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:/tmp/${service}-backup.tar.gz "$output_file"
|
||||||
|
ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "rm -f /tmp/${service}-backup.tar.gz"
|
||||||
|
|
||||||
|
echo " ✅ Saved to: $output_file"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "📦 Backing up databases..."
|
||||||
|
|
||||||
|
# Dokploy database
|
||||||
|
backup_postgres "dokploy-postgres" "dokploy"
|
||||||
|
|
||||||
|
# Immich database
|
||||||
|
backup_postgres "immich3-compose-ubyhe9-immich-database" "immich"
|
||||||
|
|
||||||
|
# BewCloud database
|
||||||
|
backup_postgres "bewcloud-postgres-in40hh" "bewcloud"
|
||||||
|
|
||||||
|
# Pancake database
|
||||||
|
backup_mariadb "bendtstudio-pancake-bzgfpc" "pancake"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "📦 Backing up application data..."
|
||||||
|
|
||||||
|
# Memos data
|
||||||
|
backup_volume "bewcloud-memos-ssogxn-memos" "bewcloud-memos-ssogxn_memos_data"
|
||||||
|
|
||||||
|
# Gitea data
|
||||||
|
echo " 💿 Backing up Gitea repositories..."
|
||||||
|
container=$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker ps -q -f name=gitea" | head -1)
|
||||||
|
if [ ! -z "$container" ]; then
|
||||||
|
ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker exec $container tar czf /tmp/gitea-backup.tar.gz -C /data ."
|
||||||
|
scp -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER:/tmp/gitea-backup.tar.gz "$BACKUP_DIR/gitea-data-${DATE}.tar.gz"
|
||||||
|
ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "rm -f /tmp/gitea-backup.tar.gz"
|
||||||
|
echo " ✅ Saved to: $BACKUP_DIR/gitea-data-${DATE}.tar.gz"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "🧹 Cleaning up old backups (older than $RETENTION_DAYS days)..."
|
||||||
|
find "$BACKUP_DIR" -type f -mtime +$RETENTION_DAYS -delete
|
||||||
|
echo " ✅ Cleanup complete"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "📄 Creating backup report..."
|
||||||
|
cat > "$BACKUP_DIR/BACKUP_REPORT-${DATE}.md" << EOF
|
||||||
|
# Backup Report - $DATE
|
||||||
|
|
||||||
|
## Backup Location
|
||||||
|
$BACKUP_DIR
|
||||||
|
|
||||||
|
## Files Backed Up
|
||||||
|
\`\`\`
|
||||||
|
$(ls -lh $BACKUP_DIR/*-${DATE}* 2>/dev/null || echo "No files found")
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
## Services Status
|
||||||
|
$(ssh -o ConnectTimeout=10 -i ~/.ssh/id_ed25519 $CONTROLLER "docker service ls --format 'table {{.Name}}\t{{.Replicas}}\t{{.Image}}'" 2>/dev/null || echo "Unable to fetch status")
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
1. Verify backup files are valid
|
||||||
|
2. Copy backups to offsite storage (MinIO, external drive, etc.)
|
||||||
|
3. Test restore procedure periodically
|
||||||
|
|
||||||
|
## Retention Policy
|
||||||
|
Backups older than $RETENTION_DAYS days are automatically deleted.
|
||||||
|
|
||||||
|
---
|
||||||
|
Generated: $(date)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Backup complete!"
|
||||||
|
echo "📁 Location: $BACKUP_DIR/"
|
||||||
|
echo "📄 Report: $BACKUP_DIR/BACKUP_REPORT-${DATE}.md"
|
||||||
|
echo ""
|
||||||
|
echo "⚠️ Remember to copy backups to offsite storage!"
|
||||||
@@ -1,5 +1,3 @@
|
|||||||
version: "3.8"
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
memos:
|
memos:
|
||||||
image: neosmemo/memos:stable
|
image: neosmemo/memos:stable
|
||||||
@@ -11,5 +9,24 @@ services:
|
|||||||
- MEMOS_PORT=${MEMOS_PORT}
|
- MEMOS_PORT=${MEMOS_PORT}
|
||||||
ports:
|
ports:
|
||||||
- "5230"
|
- "5230"
|
||||||
|
networks:
|
||||||
|
- dokploy-network
|
||||||
|
labels:
|
||||||
|
- traefik.http.routers.memos-web.rule=Host(`memos.bendtstudio.com`)
|
||||||
|
- traefik.http.routers.memos-web.entrypoints=web
|
||||||
|
- traefik.http.services.memos-web.loadbalancer.server.port=5230
|
||||||
|
- traefik.http.routers.memos-web.service=memos-web
|
||||||
|
- traefik.http.routers.memos-web.middlewares=redirect-to-https@file
|
||||||
|
- traefik.http.routers.memos-websecure.rule=Host(`memos.bendtstudio.com`)
|
||||||
|
- traefik.http.routers.memos-websecure.entrypoints=websecure
|
||||||
|
- traefik.http.services.memos-websecure.loadbalancer.server.port=5230
|
||||||
|
- traefik.http.routers.memos-websecure.service=memos-websecure
|
||||||
|
- traefik.http.routers.memos-websecure.tls.certresolver=letsencrypt
|
||||||
|
- traefik.enable=true
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
memos_data:
|
memos_data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
dokploy-network:
|
||||||
|
external: true
|
||||||
|
|||||||
11650
pancake.sql
Normal file
11650
pancake.sql
Normal file
File diff suppressed because one or more lines are too long
128
posse-party-compose.yml
Normal file
128
posse-party-compose.yml
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:17-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-postgres}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-posse_party}
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
volumes:
|
||||||
|
- db_data:/var/lib/postgresql/data
|
||||||
|
networks:
|
||||||
|
- posse-party-network
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.type == nas
|
||||||
|
|
||||||
|
migrate:
|
||||||
|
image: ghcr.io/searlsco/posse_party:latest
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-posse_party}
|
||||||
|
RAILS_ENV: production
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
command: ["./script/release"]
|
||||||
|
restart: "no"
|
||||||
|
networks:
|
||||||
|
- posse-party-network
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.type == nas
|
||||||
|
restart_policy:
|
||||||
|
condition: none
|
||||||
|
|
||||||
|
web:
|
||||||
|
image: ghcr.io/searlsco/posse_party:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-posse_party}
|
||||||
|
RAILS_ENV: production
|
||||||
|
APP_HOST: ${APP_HOST:-posseparty.bendtstudio.com}
|
||||||
|
SECRET_KEY_BASE: ${SECRET_KEY_BASE}
|
||||||
|
FORCE_SSL: "false"
|
||||||
|
command: ["./script/server"]
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "curl -fsS http://localhost:3000/up || exit 1"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
start_period: 20s
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
migrate:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
stdin_open: true
|
||||||
|
tty: true
|
||||||
|
networks:
|
||||||
|
- posse-party-network
|
||||||
|
- dokploy-network
|
||||||
|
labels:
|
||||||
|
- traefik.enable=true
|
||||||
|
- traefik.http.routers.posseparty-web.rule=Host(`${APP_HOST:-posseparty.bendtstudio.com}`)
|
||||||
|
- traefik.http.routers.posseparty-web.entrypoints=web
|
||||||
|
- traefik.http.services.posseparty-web.loadbalancer.server.port=3000
|
||||||
|
- traefik.http.routers.posseparty-web.service=posseparty-web
|
||||||
|
- traefik.http.routers.posseparty-web.middlewares=redirect-to-https@file
|
||||||
|
- traefik.http.routers.posseparty-websecure.rule=Host(`${APP_HOST:-posseparty.bendtstudio.com}`)
|
||||||
|
- traefik.http.routers.posseparty-websecure.entrypoints=websecure
|
||||||
|
- traefik.http.services.posseparty-websecure.loadbalancer.server.port=3000
|
||||||
|
- traefik.http.routers.posseparty-websecure.service=posseparty-websecure
|
||||||
|
- traefik.http.routers.posseparty-websecure.tls.certresolver=letsencrypt
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.type == nas
|
||||||
|
|
||||||
|
worker:
|
||||||
|
image: ghcr.io/searlsco/posse_party:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgres://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-posse_party}
|
||||||
|
RAILS_ENV: production
|
||||||
|
APP_HOST: ${APP_HOST:-posseparty.bendtstudio.com}
|
||||||
|
SECRET_KEY_BASE: ${SECRET_KEY_BASE}
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
migrate:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
command: ["./script/worker"]
|
||||||
|
healthcheck:
|
||||||
|
test: [
|
||||||
|
"CMD-SHELL",
|
||||||
|
"./bin/rails runner 'exit(SolidQueue::Process.where(\"last_heartbeat_at > ?\", SolidQueue.process_alive_threshold.ago).exists? ? 0 : 1)'"
|
||||||
|
]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 60s
|
||||||
|
networks:
|
||||||
|
- posse-party-network
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.type == nas
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
db_data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
posse-party-network:
|
||||||
|
driver: bridge
|
||||||
|
dokploy-network:
|
||||||
|
external: true
|
||||||
113
posse-party-stack.yml
Normal file
113
posse-party-stack.yml
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:17-alpine
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
POSTGRES_DB: posse_party
|
||||||
|
# Performance tuning for 8GB RAM systems
|
||||||
|
POSTGRES_INITDB_ARGS: "--encoding=UTF8 --locale=en_US.UTF-8"
|
||||||
|
command: >
|
||||||
|
postgres
|
||||||
|
-c shared_buffers=2GB
|
||||||
|
-c effective_cache_size=6GB
|
||||||
|
-c work_mem=16MB
|
||||||
|
-c maintenance_work_mem=512MB
|
||||||
|
-c random_page_cost=1.1
|
||||||
|
-c effective_io_concurrency=200
|
||||||
|
-c wal_buffers=16MB
|
||||||
|
-c min_wal_size=1GB
|
||||||
|
-c max_wal_size=4GB
|
||||||
|
-c max_connections=100
|
||||||
|
-c log_min_duration_statement=1000
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
volumes:
|
||||||
|
- db_data:/var/lib/postgresql/data
|
||||||
|
networks:
|
||||||
|
- posseparty-network
|
||||||
|
deploy:
|
||||||
|
# Allow to run on any node (not just NAS)
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
|
||||||
|
migrate:
|
||||||
|
image: ghcr.io/searlsco/posse_party:latest
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgres://postgres:${POSTGRES_PASSWORD}@db:5432/posse_party
|
||||||
|
RAILS_ENV: production
|
||||||
|
SECRET_KEY_BASE: ${SECRET_KEY_BASE}
|
||||||
|
command: ["./script/release"]
|
||||||
|
networks:
|
||||||
|
- posseparty-network
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.type == nas
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
delay: 10s
|
||||||
|
max_attempts: 10
|
||||||
|
|
||||||
|
web:
|
||||||
|
image: ghcr.io/searlsco/posse_party:latest
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgres://postgres:${POSTGRES_PASSWORD}@db:5432/posse_party
|
||||||
|
RAILS_ENV: production
|
||||||
|
APP_HOST: ${APP_HOST:-posseparty.bendtstudio.com}
|
||||||
|
SECRET_KEY_BASE: ${SECRET_KEY_BASE}
|
||||||
|
FORCE_SSL: "false"
|
||||||
|
command: ["./script/server"]
|
||||||
|
networks:
|
||||||
|
- posseparty-network
|
||||||
|
- dokploy-network
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.type == nas
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
delay: 30s
|
||||||
|
max_attempts: 10
|
||||||
|
labels:
|
||||||
|
- traefik.enable=true
|
||||||
|
- traefik.http.routers.posseparty.rule=Host(`${APP_HOST:-posseparty.bendtstudio.com}`)
|
||||||
|
- traefik.http.routers.posseparty.entrypoints=web
|
||||||
|
- traefik.http.services.posseparty.loadbalancer.server.port=3000
|
||||||
|
- traefik.http.routers.posseparty.middlewares=redirect-to-https@file
|
||||||
|
- traefik.http.routers.posseparty-secure.rule=Host(`${APP_HOST:-posseparty.bendtstudio.com}`)
|
||||||
|
- traefik.http.routers.posseparty-secure.entrypoints=websecure
|
||||||
|
- traefik.http.routers.posseparty-secure.tls.certresolver=letsencrypt
|
||||||
|
|
||||||
|
worker:
|
||||||
|
image: ghcr.io/searlsco/posse_party:latest
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgres://postgres:${POSTGRES_PASSWORD}@db:5432/posse_party
|
||||||
|
RAILS_ENV: production
|
||||||
|
APP_HOST: ${APP_HOST:-posseparty.bendtstudio.com}
|
||||||
|
SECRET_KEY_BASE: ${SECRET_KEY_BASE}
|
||||||
|
command: ["./script/worker"]
|
||||||
|
networks:
|
||||||
|
- posseparty-network
|
||||||
|
deploy:
|
||||||
|
placement:
|
||||||
|
constraints:
|
||||||
|
- node.labels.type == nas
|
||||||
|
restart_policy:
|
||||||
|
condition: on-failure
|
||||||
|
delay: 30s
|
||||||
|
max_attempts: 10
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
db_data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
posseparty-network:
|
||||||
|
driver: overlay
|
||||||
|
dokploy-network:
|
||||||
|
external: true
|
||||||
109
posse-party.env.example
Normal file
109
posse-party.env.example
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
# Posse-Party Configuration
|
||||||
|
# Copy this file to .env and fill in your values before deploying
|
||||||
|
|
||||||
|
# Required Settings
|
||||||
|
# =================
|
||||||
|
|
||||||
|
# Your domain name for Posse-Party
|
||||||
|
# This will be used for HTTPS certificates and Rails URL generation
|
||||||
|
APP_HOST=posseparty.bendtstudio.com
|
||||||
|
|
||||||
|
# Secret key for Rails (generate with: openssl rand -hex 64)
|
||||||
|
SECRET_KEY_BASE=
|
||||||
|
|
||||||
|
# Database Configuration
|
||||||
|
# ======================
|
||||||
|
POSTGRES_USER=postgres
|
||||||
|
POSTGRES_PASSWORD=CHANGE_ME_TO_A_STRONG_PASSWORD
|
||||||
|
POSTGRES_DB=posse_party
|
||||||
|
|
||||||
|
# Optional: Email Configuration (required for login emails and notifications)
|
||||||
|
# Uncomment and configure one of the following providers:
|
||||||
|
|
||||||
|
# Option 1: SMTP (Generic)
|
||||||
|
# MAIL_PROVIDER=smtp
|
||||||
|
# MAIL_ADDRESS=smtp.example.com
|
||||||
|
# MAIL_PORT=587
|
||||||
|
# MAIL_USER_NAME=your-email@example.com
|
||||||
|
# MAIL_PASSWORD=your-email-password
|
||||||
|
# MAIL_AUTHENTICATION=plain
|
||||||
|
# MAIL_ENABLE_STARTTLS_AUTO=true
|
||||||
|
|
||||||
|
# Option 2: SendGrid
|
||||||
|
# MAIL_PROVIDER=sendgrid
|
||||||
|
# SENDGRID_API_KEY=your-api-key
|
||||||
|
|
||||||
|
# Option 3: Mailgun
|
||||||
|
# MAIL_PROVIDER=mailgun
|
||||||
|
# MAILGUN_API_KEY=your-api-key
|
||||||
|
# MAILGUN_DOMAIN=your-domain.com
|
||||||
|
|
||||||
|
# Option 4: Resend
|
||||||
|
# MAIL_PROVIDER=resend
|
||||||
|
# RESEND_API_KEY=your-api-key
|
||||||
|
|
||||||
|
# Option 5: Postmark
|
||||||
|
# MAIL_PROVIDER=postmark
|
||||||
|
# POSTMARK_API_KEY=your-api-key
|
||||||
|
|
||||||
|
# Option 6: Amazon SES
|
||||||
|
# MAIL_PROVIDER=amazon_ses
|
||||||
|
# AWS_ACCESS_KEY_ID=your-access-key
|
||||||
|
# AWS_SECRET_ACCESS_KEY=your-secret-key
|
||||||
|
# AWS_REGION=us-east-1
|
||||||
|
|
||||||
|
# Option 7: Brevo (formerly Sendinblue)
|
||||||
|
# MAIL_PROVIDER=brevo
|
||||||
|
# BREVO_API_KEY=your-api-key
|
||||||
|
|
||||||
|
# Option 8: Mailjet
|
||||||
|
# MAIL_PROVIDER=mailjet
|
||||||
|
# MAILJET_API_KEY=your-api-key
|
||||||
|
# MAILJET_SECRET_KEY=your-secret-key
|
||||||
|
|
||||||
|
# From Address (required if using email)
|
||||||
|
# MAIL_FROM_ADDRESS=posseparty@bendtstudio.com
|
||||||
|
|
||||||
|
# Optional: OAuth Providers (for social login)
|
||||||
|
# Uncomment and configure the providers you want to use:
|
||||||
|
|
||||||
|
# GitHub OAuth
|
||||||
|
# GITHUB_CLIENT_ID=your-github-client-id
|
||||||
|
# GITHUB_CLIENT_SECRET=your-github-client-secret
|
||||||
|
|
||||||
|
# Google OAuth
|
||||||
|
# GOOGLE_CLIENT_ID=your-google-client-id
|
||||||
|
# GOOGLE_CLIENT_SECRET=your-google-client-secret
|
||||||
|
|
||||||
|
# Twitter/X OAuth
|
||||||
|
# TWITTER_CLIENT_ID=your-twitter-client-id
|
||||||
|
# TWITTER_CLIENT_SECRET=your-twitter-client-secret
|
||||||
|
|
||||||
|
# LinkedIn OAuth
|
||||||
|
# LINKEDIN_CLIENT_ID=your-linkedin-client-id
|
||||||
|
# LINKEDIN_CLIENT_SECRET=your-linkedin-client-secret
|
||||||
|
|
||||||
|
# Optional: External Services
|
||||||
|
# ============================
|
||||||
|
|
||||||
|
# CDN for static assets (optional)
|
||||||
|
# RAILS_ASSET_HOST=https://cdn.bendtstudio.com
|
||||||
|
|
||||||
|
# S3/MinIO for file storage (optional, uses local filesystem by default)
|
||||||
|
# AWS_ACCESS_KEY_ID=your-access-key
|
||||||
|
# AWS_SECRET_ACCESS_KEY=your-secret-key
|
||||||
|
# AWS_REGION=us-east-1
|
||||||
|
# S3_BUCKET_NAME=posse-party-uploads
|
||||||
|
# S3_ENDPOINT=https://s3.amazonaws.com # or your MinIO endpoint
|
||||||
|
|
||||||
|
# Optional: Feature Flags
|
||||||
|
# =======================
|
||||||
|
|
||||||
|
# Enable/disable user registrations
|
||||||
|
# REGISTRATION_ENABLED=true
|
||||||
|
|
||||||
|
# Default locale
|
||||||
|
# LOCALE=en
|
||||||
|
|
||||||
|
# Time zone
|
||||||
|
# TIME_ZONE=America/New_York
|
||||||
Reference in New Issue
Block a user