feat: Add dynamic participant scaling and improved UX

- Makefile: Add local-vm-test/local-vm-full targets, improve error messages
- README.md: Document dynamic scaling, add troubleshooting section
- flake.nix: Implement dynamic container generation based on PARTICIPANTS env var

This enables running the workshop VM with 1-15 containers instead of fixed 15,
making local development more accessible on resource-constrained machines.
This commit is contained in:
2025-08-14 18:52:04 +02:00
parent c49eb614d5
commit 82780552f0
3 changed files with 563 additions and 470 deletions

View File

@@ -4,67 +4,74 @@ export
.PHONY: help deploy-cloud build-usb flash-usb local-vm-run clean status destroy-cloud opencode lint
DOMAIN := $(or $(WORKSHOP_DOMAIN),codecrispi.es)
PARTICIPANTS := $(or $(WORKSHOP_PARTICIPANTS),3)
PARTICIPANTS := $(or $(PARTICIPANTS),3)
USB_DEVICE := $(or $(USB_DEVICE),/dev/sdX)
help:
@echo "CODE CRISPIES Workshop"
@echo "CODE CRISPIES Workshop Infrastructure"
@echo ""
@echo "Cloud Infrastructure (Hetzner):"
@echo "🌍 Cloud Infrastructure (Hetzner):"
@echo " make deploy-cloud - Deploy 15 VMs to Hetzner Cloud"
@echo " make status-cloud - Check server health"
@echo " make destroy-cloud - Destroy cloud infrastructure"
@echo ""
@echo "USB Boot Drive:"
@echo "💾 USB Boot Drive:"
@echo " make build-usb - Build NixOS workshop ISO"
@echo " make flash-usb - Flash ISO to USB drive"
@echo ""
@echo "Local Development:"
@echo " make local-vm-run - Start local VM with 15 containers"
@echo "🖥️ Local Development:"
@echo " make local-vm-run - Start local VM with containers"
@echo " make local-vm-test - Test with 2 containers only"
@echo " make local-vm-full - Test with all 15 containers"
@echo " make clean - Clean build artifacts"
@echo ""
@echo "Development:"
@echo "⚙️ Development:"
@echo " make opencode - Start opencode in dev shell"
@echo " make lint - Run linting checks"
@echo " make check-vm - Verify VM builds correctly"
@echo ""
@echo "Current Config:"
@echo " Domain: $(DOMAIN)"
@echo " Participants: $(PARTICIPANTS)"
@echo " USB Device: $(USB_DEVICE)"
@echo ""
@echo "Config: Domain=$(DOMAIN), USB=$(USB_DEVICE)"
@echo "Required: HCLOUD_TOKEN, SSH key at ~/.ssh/id_ed25519.pub"
build-usb:
@echo "Building NixOS workshop ISO for $(DOMAIN)..."
@echo "🔨 Building NixOS workshop ISO..."
@if [ ! -f ~/.ssh/id_ed25519.pub ]; then \
echo "SSH key not found at ~/.ssh/id_ed25519.pub"; \
echo "SSH key not found at ~/.ssh/id_ed25519.pub"; \
echo "Generate with: ssh-keygen -t ed25519"; \
exit 1; \
fi
nix build .#live-iso --show-trace
@echo "ISO built: result/iso/nixos.iso"
@echo "Size: $$(du -h result/iso/nixos.iso | cut -f1)"
@echo "ISO built: result/iso/nixos.iso"
@echo "📦 Size: $$(du -h result/iso/nixos.iso | cut -f1)"
flash-usb: build-usb
@if [ "$(USB_DEVICE)" = "/dev/sdX" ]; then \
echo "Set USB_DEVICE=/dev/sdX (find with 'lsblk')"; \
echo "Set USB_DEVICE=/dev/sdX (find with 'lsblk')"; \
exit 1; \
fi
@echo "About to flash $(USB_DEVICE) - THIS WILL ERASE ALL DATA!"
@echo "Verify device: $$(lsblk $(USB_DEVICE) 2>/dev/null || echo 'DEVICE NOT FOUND')"
@echo "⚠️ About to flash $(USB_DEVICE) - THIS WILL ERASE ALL DATA!"
@echo "Device info: $$(lsblk $(USB_DEVICE) 2>/dev/null || echo 'DEVICE NOT FOUND')"
@read -p "Continue? [y/N]: " confirm && [ "$$confirm" = "y" ]
sudo dd if=result/iso/nixos.iso of=$(USB_DEVICE) bs=4M status=progress oflag=sync
sync
@echo "USB drive ready for workshop!"
@echo "USB drive ready for workshop!"
deploy-cloud:
@if [ -z "$(HCLOUD_TOKEN)" ]; then \
echo "HCLOUD_TOKEN not set"; \
echo "HCLOUD_TOKEN not set"; \
echo "Get token from: https://console.hetzner.cloud/"; \
exit 1; \
fi
@if [ ! -f ~/.ssh/id_ed25519.pub ]; then \
echo "SSH key not found at ~/.ssh/id_ed25519.pub"; \
echo "SSH key not found at ~/.ssh/id_ed25519.pub"; \
echo "Generate with: ssh-keygen -t ed25519"; \
exit 1; \
fi
@echo "Deploying 15 workshop servers to Hetzner Cloud..."
@echo "🚀 Deploying 15 workshop servers to Hetzner Cloud..."
@echo "Domain: $(DOMAIN)"
cd terraform && terraform init
cd terraform && terraform apply -auto-approve \
@@ -73,47 +80,62 @@ deploy-cloud:
-var="dns_zone_id=$(DNS_ZONE_ID)" \
-var="domain=$(DOMAIN)" \
-var="ssh_public_key=$$(cat ~/.ssh/id_ed25519.pub)"
@echo "Running health checks..."
@echo "Running health checks..."
@sleep 60
$(MAKE) status-cloud
@echo "Cloud deployment complete!"
@echo "Cloud deployment complete!"
status-cloud:
@echo "Checking server health..."
@echo "🔍 Checking server health..."
@for name in hopper curie lovelace noether hamilton franklin johnson clarke goldberg liskov wing rosen shaw karp rich; do \
printf "%-10s " "$$name:"; \
if timeout 10 curl -s -f https://traefik.$$name.$(DOMAIN)/ping >/dev/null 2>&1; then \
echo "Ready"; \
echo "Ready"; \
elif timeout 5 ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no workshop@$$name.$(DOMAIN) "echo ok" >/dev/null 2>&1; then \
echo "SSH OK, Traefik starting..."; \
echo "SSH OK, Traefik starting..."; \
else \
echo "Not ready"; \
echo "Not ready"; \
fi; \
done
destroy-cloud:
@echo "This will destroy ALL workshop servers!"
@echo "⚠️ This will destroy ALL workshop servers!"
@read -p "Continue? [y/N]: " confirm && [ "$$confirm" = "y" ]
cd terraform && terraform destroy -auto-approve
@echo "✅ Cloud infrastructure destroyed"
local-vm-run:
@echo "Starting local workshop VM with $(PARTICIPANTS) containers..."
@echo "🖥️ Starting local workshop VM with $(PARTICIPANTS) containers..."
@echo "VM will open with desktop showing all participant containers"
nix run --impure .#local-vm
PARTICIPANTS=$(PARTICIPANTS) nix run --impure .#local-vm
local-vm-test:
@echo "🧪 Testing with 2 containers only..."
PARTICIPANTS=2 nix run --impure .#local-vm
local-vm-full:
@echo "🚀 Testing with all 15 containers (heavy resource usage!)..."
PARTICIPANTS=15 nix run --impure .#local-vm
check-vm:
@echo "✅ Verifying VM builds correctly..."
PARTICIPANTS=2 nix build --impure .#local-vm
@echo "✅ VM build successful"
clean:
rm -rf result .direnv terraform/.terraform terraform/terraform.tfstate*
@echo "Cleaned up build artifacts"
@echo "🧹 Cleaned up build artifacts"
opencode:
@echo "Starting opencode in Nix dev shell..."
@echo "💻 Starting opencode in Nix dev shell..."
nix develop --command opencode
lint:
@echo "Linting Markdown files..."
@echo "🔍 Linting project files..."
@echo "Markdown files..."
@markdownlint-cli . || true
@echo "Linting JSON files..."
@echo "JSON files..."
@find . -type f -name "*.json" -print0 | xargs -0 -I {} bash -c 'jq . "{}" >/dev/null || (echo "JSON lint error in {}" && exit 1)'
@echo "Linting Nix files..."
@nixpkgs-fmt . || true
@echo "Linting complete."
@echo "Nix files..."
@nixpkgs-fmt --check . || true
@echo "Linting complete"

111
README.md
View File

@@ -1,19 +1,23 @@
# 🍪 CODE CRISPIES Workshop Infrastructure
This repository contains the infrastructure for the Co-op Cloud workshop, providing three distinct deployment environments.
This repository contains the infrastructure for the Co-op Cloud workshop, providing three distinct deployment environments with dynamic scaling support.
---
## 🚀 Quick Start
```bash
# 1. Start the local development virtual machine (15 containers)
# 1. Start the local development virtual machine (default: 3 containers)
make local-vm-run
# 2. Build & flash USB drives for participants
# 2. Test with different container counts
PARTICIPANTS=2 make local-vm-test # Lightweight testing
PARTICIPANTS=15 make local-vm-full # Full workshop simulation
# 3. Build & flash USB drives for participants
make build-usb
make flash-usb USB_DEVICE=/dev/sdX
# 3. Deploy the production cloud infrastructure
# 4. Deploy the production cloud infrastructure
export HCLOUD_TOKEN="your_token_here"
make deploy-cloud
```
@@ -23,7 +27,7 @@ make deploy-cloud
## 📁 Project Structure
```
├── flake.nix # All Nix configurations (USB, VM)
├── flake.nix # All Nix configurations (USB, VM, containers)
├── terraform/ # Hetzner Cloud infrastructure
├── scripts/deploy.sh # Cloud setup automation
├── docs/USB_BOOT_INSTRUCTIONS.md
@@ -48,30 +52,34 @@ make deploy-cloud
### 3. Local (Development)
- **What:** A self-contained Virtual Machine (VM) that runs on your local computer with all 15 containers.
- **What:** A self-contained Virtual Machine (VM) that runs on your local computer with configurable container count.
- **Purpose:** Complete local testing environment that mirrors production setup without needing cloud servers.
- **Resources:** Creates 15 containers (heavy resource usage - ensure adequate RAM/CPU)
- **Scalability:** Supports 1-15 containers via `PARTICIPANTS` environment variable.
---
## 🔧 Local Development Workflow
1. **Start the VM**
Run the following command. A new window will open and automatically boot into a lightweight desktop.
1. **Choose Your Scale**
```bash
# Lightweight development (2 containers)
PARTICIPANTS=2 make local-vm-run
# Production simulation (15 containers) - requires 8GB+ RAM
PARTICIPANTS=15 make local-vm-run
# Use default (3 containers) - good balance
make local-vm-run
```
2. **Work Inside the VM**
All testing is now done inside the VM's graphical desktop.
All testing is now done inside the VM's graphical desktop:
* Open the **Terminal** to run commands.
* Open **Firefox** to view the deployed web applications.
3. **Example: Deploying WordPress**
* **In the VM's Terminal**, get a root shell and SSH into a participant's container:
**In the VM's Terminal**, get a root shell and SSH into a participant's container:
```bash
# Become root (no password needed)
sudo -i
@@ -79,19 +87,21 @@ make deploy-cloud
# Connect to participant 1 (hopper)
connect hopper
# Or direct SSH
# Or direct SSH (password: root)
ssh root@192.168.100.11
```
* **Inside the container**, deploy a WordPress site with `abra`:
**Inside the container**, deploy a WordPress site with `abra`:
```bash
abra app new wordpress -S --domain=blog.hopper.local
abra app deploy blog.hopper.local
```
* **In the VM's Firefox**, navigate to `http://blog.hopper.local`. You will see the WordPress installation screen.
**In the VM's Firefox**, navigate to `http://blog.hopper.local`. You will see the WordPress installation screen.
4. **Available Helper Commands**
```bash
sudo containers # List all 15 containers with IPs
sudo containers # List all containers with IPs
sudo logs # Show setup logs for all containers
sudo recipes # Display available Co-op Cloud recipes
sudo help # Show all available commands
@@ -144,7 +154,25 @@ The USB environment includes:
---
## 🧹 Cleanup
## ⚙️ Environment Variables
Control workshop behavior with environment variables:
```bash
# Number of containers (1-15, default: 3)
export PARTICIPANTS=5
make local-vm-run
# Workshop domain for cloud deployment
export WORKSHOP_DOMAIN=myworkshop.com
# USB device for flashing
export USB_DEVICE=/dev/sdb
```
---
## 🧹 Cleanup & Management
```bash
# Clean local build artifacts
@@ -153,7 +181,12 @@ make clean
# Destroy Hetzner cloud infrastructure
make destroy-cloud
# To stop the local VM, simply close its window
# Verify VM builds correctly
make check-vm
# Run development tools
make opencode # Start development environment
make lint # Code quality checks
```
---
@@ -166,7 +199,10 @@ make destroy-cloud
```
- **Nix:** NixOS or Nix package manager with flakes enabled
- **Cloud Tokens:** Hetzner Cloud API token for deployment
- **Resources:** For local VM: 8GB+ RAM recommended (runs 15 containers)
- **Resources:**
- 2-3 containers: 4GB+ RAM
- 5-10 containers: 8GB+ RAM
- 15 containers: 16GB+ RAM
---
@@ -175,6 +211,39 @@ make destroy-cloud
1. **Preparation:** Deploy cloud infrastructure with `make deploy-cloud`
2. **Distribution:** Flash USB drives for participants with `make build-usb && make flash-usb`
3. **Workshop:** Participants boot from USB and connect to their assigned cloud servers
4. **Development:** Use local VM (`make local-vm-run`) for testing and development
4. **Development:** Use local VM with `make local-vm-run` for testing and development
The architecture ensures participants get identical environments whether connecting from USB boot drives to cloud servers, or testing locally in the development VM.
---
## 🐛 Troubleshooting
### VM Won't Start
```bash
# Check if build works
make check-vm
# Try with fewer containers
PARTICIPANTS=2 make local-vm-run
```
### Containers Not Accessible
```bash
# Check container status inside VM
sudo containers
# View setup logs
sudo logs
# Manual SSH test
ssh root@192.168.100.11 # Password: root
```
### Abra Not Working in Container
```bash
# Inside container, check installation
ls -la /root/.local/bin/abra
export PATH="/root/.local/bin:$PATH"
abra --version
```

158
flake.nix
View File

@@ -13,32 +13,27 @@
let
system = "x86_64-linux";
pkgs = nixpkgs.legacyPackages.${system};
# All possible participant names for the workshop
allParticipantNames = [
"hopper"
"curie"
"lovelace"
"noether"
"hamilton"
"franklin"
"johnson"
"clarke"
"goldberg"
"liskov"
"wing"
"rosen"
"shaw"
"karp"
"rich"
"hopper" "curie" "lovelace" "noether" "hamilton"
"franklin" "johnson" "clarke" "goldberg" "liskov"
"wing" "rosen" "shaw" "karp" "rich"
];
numLookup = {
"0" = 0; "1" = 1; "2" = 2; "3" = 3; "4" = 4; "5" = 5; "6" = 6; "7" = 7; "8" = 8; "9" = 9;
"10" = 10; "11" = 11; "12" = 12; "13" = 13; "14" = 14; "15" = 15;
};
# Dynamic participant count (default 3, max 15)
participantsEnv = builtins.getEnv "PARTICIPANTS";
numParticipants = if builtins.hasAttr participantsEnv numLookup
then builtins.getAttr participantsEnv numLookup
numParticipants =
if participantsEnv != "" && builtins.match "^[0-9]+$" participantsEnv != null
then
let num = builtins.fromJSON participantsEnv;
in if num >= 1 && num <= 15 then num else 3
else 3;
participantNames = builtins.genList (i: builtins.elemAt allParticipantNames i) numParticipants;
# Selected participant names based on count
participantNames = builtins.genList
(i: builtins.elemAt allParticipantNames i)
numParticipants;
in
{
packages.${system} = {
@@ -70,12 +65,7 @@
security.sudo.wheelNeedsPassword = false;
environment.systemPackages = with pkgs; [
openssh
curl
git
networkmanager
firefox
xterm
openssh curl git networkmanager firefox xterm
];
programs.zsh = {
@@ -181,11 +171,10 @@
users.users.workshop = {
isNormalUser = true;
extraGroups = [ "wheel" ];
password = "";
password = "workshop";
shell = pkgs.bash;
};
security.pam.services.login.allowNullPassword = true;
security.sudo.wheelNeedsPassword = false;
services.xserver = {
@@ -202,37 +191,30 @@
services.xserver.displayManager.sessionCommands = ''
${pkgs.xfce.xfce4-terminal}/bin/xfce4-terminal --title="Workshop Terminal" \
--command="bash -c '
echo "Workshop VM Ready!";
echo "";
echo "SSH into containers:";
${builtins.concatStringsSep "
" (map (name:
let ip = "192.168.100.${toString (11 + (builtins.elemAt (builtins.genList (x: x) (builtins.length participantNames))
(builtins.elemAt
(builtins.filter (i: builtins.elemAt participantNames i == name)
(builtins.genList (x: x) (builtins.length participantNames))) 0)))}";
in "echo \" sudo connect ${name} # Container login to ${name}\""
) participantNames)}
echo " (Total: ${toString numParticipants} containers)";
echo "";
echo "Container management:";
echo " sudo containers # List all containers";
echo " sudo logs # Show setup logs";
echo " sudo recipes # Show available recipes";
echo "";
echo "Abra is pre-installed in containers!";
echo "";
echo \"Workshop VM Ready!\";
echo \"\";
echo \"SSH into containers:\";
${builtins.concatStringsSep "\n" (builtins.genList (i:
let
name = builtins.elemAt participantNames i;
ip = "192.168.100.${toString (11 + i)}";
in "echo \" sudo connect ${name} # Container login to ${name} (${ip})\""
) (builtins.length participantNames))}
echo \" (Total: ${toString numParticipants} containers)\";
echo \"\";
echo \"Container management:\";
echo \" sudo containers # List all containers\";
echo \" sudo logs # Show setup logs\";
echo \" sudo recipes # Show available recipes\";
echo \"\";
echo \"Abra is pre-installed in containers!\";
echo \"\";
bash
'" &
'';
environment.systemPackages = with pkgs; [
firefox
curl
git
jq
nano
tree
nixos-container
firefox curl git jq nano tree nixos-container
(pkgs.writeScriptBin "connect" ''
#!/bin/bash
@@ -248,6 +230,14 @@
#!/bin/bash
echo "Active containers:"
nixos-container list
echo ""
echo "Container IPs:"
${builtins.concatStringsSep "\n" (builtins.genList (i:
let
name = builtins.elemAt participantNames i;
ip = "192.168.100.${toString (11 + i)}";
in "echo \" ${name}: ${ip}\""
) (builtins.length participantNames))}
'')
(pkgs.writeScriptBin "logs" ''
@@ -258,24 +248,33 @@
(pkgs.writeScriptBin "recipes" ''
#!/bin/bash
echo "Available Co-op Cloud Recipes"
echo "Available Co-op Cloud Recipes:"
echo ""
echo "Content Management:"
echo " wordpress ghost hedgedoc dokuwiki mediawiki"
echo ""
echo "File & Collaboration:"
echo " nextcloud seafile collabora onlyoffice"
echo ""
echo "Communication:"
echo " jitsi-meet matrix-synapse rocketchat mattermost"
echo ""
echo "E-commerce & Business:"
echo " prestashop invoiceninja kimai pretix"
echo ""
echo "Development & Tools:"
echo " gitea drone n8n gitlab jupyter-lab"
echo ""
echo "Analytics & Monitoring:"
echo " plausible matomo uptime-kuma grafana"
echo ""
echo "Media & Social:"
echo " peertube funkwhale mastodon pixelfed jellyfin"
echo ""
echo "Usage in container:"
echo " abra app new <recipe> -S --domain=myapp.<container-name>.local"
echo " abra app deploy myapp.<container-name>.local"
echo ""
echo "Browse all: https://recipes.coopcloud.tech"
'')
@@ -296,10 +295,11 @@
echo " sudo connect hopper"
echo " ssh root@192.168.100.11"
echo ""
echo "Available containers: ${builtins.concatStringsSep " " participantNames}"
echo "Available containers (${toString numParticipants}): ${builtins.concatStringsSep " " participantNames}"
'')
];
# Add local DNS resolution for .local domains
# Local DNS resolution for .local domains
networking = {
hostName = "workshop-vm";
firewall.enable = false;
@@ -316,6 +316,7 @@
) (builtins.length participantNames));
};
# Dynamic container generation
containers = builtins.listToAttrs (builtins.genList
(i:
let
@@ -333,10 +334,10 @@
config = {
system.stateVersion = "25.05";
users.users.root.password = "";
users.users.root.password = "root";
users.users.workshop = {
isNormalUser = true;
password = "";
password = "workshop";
extraGroups = [ "wheel" "docker" ];
};
@@ -345,27 +346,20 @@
settings = {
PasswordAuthentication = true;
PermitRootLogin = "yes";
PermitEmptyPasswords = true;
};
};
networking = {
hostName = name;
nameservers = [ "8.8.8.8" ];
nameservers = [ "8.8.8.8" "1.1.1.1" ];
firewall.enable = false;
};
security.sudo.wheelNeedsPassword = false;
security.pam.services.login.allowNullPassword = true;
virtualisation.docker.enable = true;
environment.systemPackages = with pkgs; [
docker
curl
git
wget
jq
bash
docker curl git wget jq bash nano tree
];
systemd.services.workshop-setup = {
@@ -375,39 +369,46 @@
script = ''
echo "Setting up ${name} container..."
for i in {1..10}; do
# Wait for network connectivity
for i in {1..15}; do
if ${pkgs.curl}/bin/curl -s --max-time 5 google.com >/dev/null 2>&1; then
echo "Network ready"
break
fi
echo "Waiting for network... ($i/10)"
sleep 2
echo "Waiting for network... ($i/15)"
sleep 3
done
${pkgs.docker}/bin/docker swarm init --advertise-addr ${ip} || true
# Initialize Docker Swarm
${pkgs.docker}/bin/docker swarm init --advertise-addr ${ip} || echo "Swarm already initialized or failed"
# Install abra
export HOME=/root
if [ ! -f /root/.local/bin/abra ]; then
echo "Installing abra..."
${pkgs.curl}/bin/curl -fsSL https://install.abra.coopcloud.tech | ${pkgs.bash}/bin/bash
echo "Abra installed"
echo "Abra installed to /root/.local/bin/abra"
fi
# Setup PATH in .bashrc
if ! grep -q "/.local/bin" /root/.bashrc 2>/dev/null; then
echo 'export PATH="$HOME/.local/bin:$PATH"' >> /root/.bashrc
fi
# Create system symlink for abra
if [ -f /root/.local/bin/abra ]; then
ln -sf /root/.local/bin/abra /usr/local/bin/abra 2>/dev/null || true
fi
# Add abra server config
if [ -f /root/.local/bin/abra ]; then
export PATH="/root/.local/bin:$PATH"
/root/.local/bin/abra server add ${name}.local 2>/dev/null || true
/root/.local/bin/abra server add ${name}.local 2>/dev/null || echo "Server already added or command failed"
fi
echo "${name} container ready!"
echo "SSH: ssh root@${ip} (no password)"
echo "SSH: ssh root@${ip} (password: root)"
echo "Workshop user: ssh workshop@${ip} (password: workshop)"
echo "Abra: Available via 'abra' command"
'';
serviceConfig = {
@@ -415,6 +416,7 @@
RemainAfterExit = true;
StandardOutput = "journal";
StandardError = "journal";
TimeoutStartSec = "300";
};
};