doran/scripts/hetzner/bootstrap.sh
2026-03-28 20:53:29 +01:00

302 lines
12 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR=$(cd "$(dirname "$0")/../.." && pwd)
TF_DIR="$ROOT_DIR/infra/terraform/hetzner"
STATE_DIR="$ROOT_DIR/.state/hetzner"
KUBECONFIG_PATH="$STATE_DIR/kubeconfig.yaml"
OVERLAY_DIR="$ROOT_DIR/deploy/k8s/overlays/hetzner-single-node"
DEFAULT_PROJECT_NAME="unrip"
DEFAULT_PROJECT_NAMESPACE="$DEFAULT_PROJECT_NAME"
mkdir -p "$STATE_DIR"
require() {
command -v "$1" >/dev/null 2>&1 || { echo "missing command: $1" >&2; exit 1; }
}
wait_for_url() {
local url="$1"
local label="$2"
local max_attempts="${3:-120}"
local sleep_seconds="${4:-5}"
local attempt=1
until curl -kfsS "$url" >/dev/null 2>&1; do
if (( attempt >= max_attempts )); then
echo "timed out waiting for ${label}: ${url}" >&2
return 1
fi
if (( attempt == 1 || attempt % 6 == 0 )); then
echo "waiting for ${label} (${attempt}/${max_attempts})..."
fi
sleep "$sleep_seconds"
attempt=$((attempt + 1))
done
}
wait_for_ssh() {
local target="$1"
local max_attempts="${2:-120}"
local sleep_seconds="${3:-5}"
local attempt=1
until ssh -i "$SSH_PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 "$target" 'echo ssh-ready' >/dev/null 2>&1; do
if (( attempt >= max_attempts )); then
echo "timed out waiting for ssh: ${target}" >&2
return 1
fi
if (( attempt == 1 || attempt % 6 == 0 )); then
echo "waiting for ssh (${attempt}/${max_attempts})..."
fi
sleep "$sleep_seconds"
attempt=$((attempt + 1))
done
}
wait_for_tailscale_node() {
local host_name="$1"
local max_attempts="${2:-120}"
local sleep_seconds="${3:-5}"
local attempt=1
command -v tailscale >/dev/null 2>&1 || {
echo "tailscale CLI is required locally for tailscale-first bootstrap" >&2
return 1
}
while true; do
local discovered
discovered=$(tailscale status --json 2>/dev/null | python3 - "$host_name" <<'PY'
import json,sys
host=sys.argv[1]
try:
data=json.load(sys.stdin)
except Exception:
print("")
raise SystemExit(0)
peers=data.get('Peer',{})
matches=[]
for peer in peers.values():
if peer.get('HostName') == host:
matches.append(peer)
for peer in sorted(matches, key=lambda p: ((p.get('Online') is True), p.get('DNSName') or ''), reverse=True):
if peer.get('Online'):
dns=(peer.get('DNSName') or '').rstrip('.')
if dns:
print(dns)
raise SystemExit(0)
for peer in sorted(matches, key=lambda p: p.get('DNSName') or '', reverse=True):
if peer.get('TailscaleIPs'):
print(peer['TailscaleIPs'][0])
raise SystemExit(0)
print("")
PY
)
if [[ -n "$discovered" ]]; then
printf '%s\n' "$discovered"
return 0
fi
if (( attempt >= max_attempts )); then
echo "timed out waiting for tailscale node: ${host_name}" >&2
return 1
fi
if (( attempt == 1 || attempt % 6 == 0 )); then
echo "waiting for tailscale node ${host_name} (${attempt}/${max_attempts})..." >&2
fi
sleep "$sleep_seconds"
attempt=$((attempt + 1))
done
}
require terraform
require kubectl
require docker
require curl
require python3
require ssh
require realpath
: "${HCLOUD_TOKEN:?set HCLOUD_TOKEN}"
: "${SSH_PUBLIC_KEY_PATH:?set SSH_PUBLIC_KEY_PATH}"
: "${PUBLIC_DOMAIN:?set PUBLIC_DOMAIN}"
: "${LETSENCRYPT_EMAIL:?set LETSENCRYPT_EMAIL}"
: "${TAILSCALE_AUTH_KEY:=}"
: "${TAILSCALE_CONTROL_PLANE_HOSTNAME:=}"
: "${NEAR_INTENTS_API_KEY:?set NEAR_INTENTS_API_KEY}"
: "${BASE_DOMAIN:?set BASE_DOMAIN}"
: "${FORGEJO_DOMAIN:=git.${BASE_DOMAIN}}"
: "${FORGEJO_ROOT_URL:=https://${FORGEJO_DOMAIN}/}"
: "${REGISTRY_DOMAIN:=registry.${BASE_DOMAIN}}"
: "${REGISTRY_USERNAME:?set REGISTRY_USERNAME}"
: "${REGISTRY_PASSWORD:?set REGISTRY_PASSWORD}"
: "${FORGEJO_RUNNER_REGISTRATION_TOKEN:?set FORGEJO_RUNNER_REGISTRATION_TOKEN}"
: "${TF_ADMIN_CIDR_BLOCKS:=}"
: "${PROJECT_NAME:=$DEFAULT_PROJECT_NAME}"
: "${PROJECT_NAMESPACE:=$DEFAULT_PROJECT_NAMESPACE}"
: "${PROJECT_OVERLAY_DIR:=$OVERLAY_DIR}"
: "${BOOTSTRAP_NODE_NAME:=unrip-1}"
: "${SKIP_TERRAFORM_APPLY:=0}"
: "${PROJECT_KUSTOMIZE_PATH:=../../projects/${PROJECT_NAME}/base}"
: "${PROJECT_SECRET_NAME:=${PROJECT_NAME}-secrets}"
: "${PROJECT_SECRET_ENV_BASENAME:=${PROJECT_NAME}.env}"
: "${PROJECT_REGISTRY_SECRET_NAME:=${PROJECT_NAME}-registry-creds}"
: "${PROJECT_IMAGE_REPOSITORY:=${PROJECT_NAME}}"
: "${PROJECT_DEPLOYMENTS:=near-intents-ingest dummy-reactor dummy-executor dummy-consumer}"
BOOTSTRAP_IMAGE="${PROJECT_IMAGE_REPOSITORY}:bootstrap"
PROJECT_SECRET_ENV_PATH="$PROJECT_OVERLAY_DIR/secrets/$PROJECT_SECRET_ENV_BASENAME"
GENERATED_OVERLAY_DIR="$STATE_DIR/generated-overlay"
GENERATED_OVERLAY_KUSTOMIZATION="$GENERATED_OVERLAY_DIR/kustomization.yaml"
SSH_PUBLIC_KEY=$(cat "$SSH_PUBLIC_KEY_PATH")
SSH_PRIVATE_KEY_PATH="${SSH_PUBLIC_KEY_PATH%.pub}"
if [[ ! -f "$SSH_PRIVATE_KEY_PATH" ]]; then
echo "missing ssh private key for bootstrap: $SSH_PRIVATE_KEY_PATH" >&2
exit 1
fi
TF_VARS=(
-var "hcloud_token=$HCLOUD_TOKEN"
-var "ssh_public_key=$SSH_PUBLIC_KEY"
-var "public_domain=$PUBLIC_DOMAIN"
-var "bootstrap_repo_url=local-bootstrap"
-var "tailscale_auth_key=$TAILSCALE_AUTH_KEY"
-var "tailscale_control_plane_hostname=$TAILSCALE_CONTROL_PLANE_HOSTNAME"
)
if [[ -n "$TF_ADMIN_CIDR_BLOCKS" && "$TF_ADMIN_CIDR_BLOCKS" != '[]' ]]; then
TF_VARS+=(-var "admin_cidr_blocks=$TF_ADMIN_CIDR_BLOCKS")
fi
if [[ -n "$TAILSCALE_AUTH_KEY" ]]; then
bash "$ROOT_DIR/scripts/hetzner/print-tailscale-firewall-note.sh"
fi
terraform -chdir="$TF_DIR" init
if [[ "$SKIP_TERRAFORM_APPLY" != "1" ]]; then
terraform -chdir="$TF_DIR" apply -auto-approve "${TF_VARS[@]}"
fi
SERVER_IP=$(terraform -chdir="$TF_DIR" output -raw server_ipv4)
K3S_API_URL=$(terraform -chdir="$TF_DIR" output -raw k3s_api_url)
if [[ -n "$TAILSCALE_AUTH_KEY" ]]; then
DISCOVERED_TAILSCALE_HOST="${TAILSCALE_CONTROL_PLANE_HOSTNAME:-$(wait_for_tailscale_node "$BOOTSTRAP_NODE_NAME")}"
SSH_TARGET="root@${DISCOVERED_TAILSCALE_HOST}"
K3S_API_URL="https://${DISCOVERED_TAILSCALE_HOST}:6443"
else
SSH_TARGET="root@${SERVER_IP}"
fi
if [[ -n "${CLOUDFLARE_API_TOKEN:-}" && -n "${CLOUDFLARE_ZONE_ID:-}" ]]; then
if ! SERVER_IP="$SERVER_IP" BASE_DOMAIN="$BASE_DOMAIN" bash "$ROOT_DIR/scripts/hetzner/configure-cloudflare-dns.sh"; then
echo "warning: cloudflare DNS automation failed; continuing without automated DNS" >&2
fi
elif [[ -n "${PORKBUN_API_KEY:-}" && -n "${PORKBUN_SECRET_API_KEY:-}" ]]; then
if ! SERVER_IP="$SERVER_IP" BASE_DOMAIN="$BASE_DOMAIN" bash "$ROOT_DIR/scripts/hetzner/configure-porkbun-dns.sh"; then
echo "warning: porkbun DNS automation failed; continuing without automated DNS" >&2
fi
fi
wait_for_ssh "$SSH_TARGET"
echo "waiting for Kubernetes API on $K3S_API_URL..."
wait_for_url "${K3S_API_URL}/readyz" "k3s API readiness"
ssh -i "$SSH_PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "$SSH_TARGET" 'sudo cat /etc/rancher/k3s/k3s.yaml' \
| sed "s|https://127.0.0.1:6443|${K3S_API_URL}|" > "$KUBECONFIG_PATH"
export KUBECONFIG="$KUBECONFIG_PATH"
mkdir -p "$PROJECT_OVERLAY_DIR/secrets" "$GENERATED_OVERLAY_DIR"
cat > "$PROJECT_SECRET_ENV_PATH" <<EOF
NEAR_INTENTS_API_KEY=$NEAR_INTENTS_API_KEY
EOF
cat > "$PROJECT_OVERLAY_DIR/secrets/forgejo.env" <<EOF
root_url=$FORGEJO_ROOT_URL
domain=$FORGEJO_DOMAIN
runner_registration_token=$FORGEJO_RUNNER_REGISTRATION_TOKEN
EOF
python3 - <<PY
from pathlib import Path
root = Path("$PROJECT_OVERLAY_DIR")
generated_root = Path("$GENERATED_OVERLAY_DIR")
project_kustomize_path = "$PROJECT_KUSTOMIZE_PATH"
project_namespace = "$PROJECT_NAMESPACE"
project_secret_name = "$PROJECT_SECRET_NAME"
project_secret_env_basename = "$PROJECT_SECRET_ENV_BASENAME"
project_overlay_dir = Path("$PROJECT_OVERLAY_DIR").relative_to(Path("$ROOT_DIR"))
resources = [f"../../{project_overlay_dir}/../../platform/base"]
if project_kustomize_path:
resources.append(f"../../{project_overlay_dir}/{project_kustomize_path}")
generated_root.mkdir(parents=True, exist_ok=True)
(generated_root / "kustomization.yaml").write_text(
"""apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
"""
+ "".join(f" - {resource}\n" for resource in resources)
+ """patches:
- path: ingress-hosts.patch.yaml
- path: issuer-email.patch.yaml
- path: storage-class.patch.yaml
secretGenerator:
- name: {project_secret_name}
namespace: {project_namespace}
envs:
- ../../{project_overlay_dir}/secrets/{project_secret_env_basename}
- name: forgejo-secrets
namespace: forgejo
envs:
- ../../{project_overlay_dir}/secrets/forgejo.env
- name: registry-secrets
namespace: registry
files:
- htpasswd=../../{project_overlay_dir}/secrets/registry.htpasswd
generatorOptions:
disableNameSuffixHash: true
""".format(
project_secret_name=project_secret_name,
project_namespace=project_namespace,
project_overlay_dir=project_overlay_dir,
project_secret_env_basename=project_secret_env_basename,
)
)
(generated_root / "storage-class.patch.yaml").write_text((root / "storage-class.patch.yaml").read_text())
(generated_root / "issuer-email.patch.yaml").write_text(f'''apiVersion: cert-manager.io/v1\nkind: ClusterIssuer\nmetadata:\n name: letsencrypt-staging\nspec:\n acme:\n email: {"$LETSENCRYPT_EMAIL"}\n---\napiVersion: cert-manager.io/v1\nkind: ClusterIssuer\nmetadata:\n name: letsencrypt-production\nspec:\n acme:\n email: {"$LETSENCRYPT_EMAIL"}\n''')
(generated_root / "ingress-hosts.patch.yaml").write_text(f'''apiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: forgejo\n namespace: forgejo\nspec:\n tls:\n - hosts:\n - {"$FORGEJO_DOMAIN"}\n secretName: forgejo-tls\n rules:\n - host: {"$FORGEJO_DOMAIN"}\n---\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: registry\n namespace: registry\nspec:\n tls:\n - hosts:\n - {"$REGISTRY_DOMAIN"}\n secretName: registry-tls\n rules:\n - host: {"$REGISTRY_DOMAIN"}\n''')
PY
kubectl apply -f "$ROOT_DIR/deploy/k8s/platform/base/namespace.yaml"
kubectl create namespace "$PROJECT_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
kubectl -n registry create secret generic registry-secrets \
--from-file=htpasswd=<(docker run --rm --entrypoint htpasswd httpd:2 -Bbn "$REGISTRY_USERNAME" "$REGISTRY_PASSWORD") \
--dry-run=client -o yaml | kubectl apply -f -
kubectl -n "$PROJECT_NAMESPACE" create secret docker-registry "$PROJECT_REGISTRY_SECRET_NAME" \
--docker-server="$REGISTRY_DOMAIN" \
--docker-username="$REGISTRY_USERNAME" \
--docker-password="$REGISTRY_PASSWORD" \
--dry-run=client -o yaml | kubectl apply -f -
kubectl apply -k "$GENERATED_OVERLAY_DIR"
docker build -t "$BOOTSTRAP_IMAGE" "$ROOT_DIR"
docker save "$BOOTSTRAP_IMAGE" \
| ssh -i "$SSH_PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "$SSH_TARGET" 'sudo k3s ctr images import -'
for deployment in $PROJECT_DEPLOYMENTS; do
kubectl -n "$PROJECT_NAMESPACE" set image "deployment/${deployment}" app="$BOOTSTRAP_IMAGE"
done
for deployment in $PROJECT_DEPLOYMENTS; do
kubectl -n "$PROJECT_NAMESPACE" rollout status "deployment/${deployment}" --timeout=180s
done
echo "bootstrap complete"
echo "project_name=$PROJECT_NAME"
echo "project_namespace=$PROJECT_NAMESPACE"
echo "project_overlay_dir=$PROJECT_OVERLAY_DIR"
echo "server_ip=$SERVER_IP"
echo "ssh_target=$SSH_TARGET"
echo "k3s_api_url=$K3S_API_URL"
echo "kubeconfig=$KUBECONFIG_PATH"
echo "bootstrap_image=$BOOTSTRAP_IMAGE"
echo "forgejo_url=$FORGEJO_ROOT_URL"
echo "registry_url=https://$REGISTRY_DOMAIN"
echo "dns_provider=${CLOUDFLARE_API_TOKEN:+cloudflare}${PORKBUN_API_KEY:+porkbun}"