feat: bootstrap hetzner k3s deployment

This commit is contained in:
Philipp 2026-03-28 20:52:48 +01:00
parent 20c3feb4d2
commit 2a32461e39
73 changed files with 4028 additions and 206 deletions

6
.dockerignore Normal file
View file

@ -0,0 +1,6 @@
node_modules
npm-debug.log
.git
.gitignore
.env
var

View file

@ -1,6 +1,40 @@
NEAR_INTENTS_API_KEY=your_solver_jwt
# Local dev / container runtime values
NEAR_INTENTS_API_KEY=replace_me
NEAR_INTENTS_WS_URL=wss://solver-relay-v2.chaindefuser.com/ws
KAFKA_BROKERS=127.0.0.1:9092
KAFKA_CLIENT_ID=trading-system
KAFKA_BROKERS=redpanda:9092
KAFKA_CLIENT_ID=unrip
KAFKA_TOPIC_RAW_NEAR_INTENTS_QUOTE=raw.near_intents.quote
KAFKA_TOPIC_NORM_SWAP_DEMAND=norm.swap_demand
KAFKA_TOPIC_CMD_EXECUTE_TRADE=cmd.execute_trade
KAFKA_TOPIC_EXEC_TRADE_RESULT=exec.trade_result
KAFKA_CONSUMER_GROUP_DUMMY=dummy-reactor-v1
KAFKA_CONSUMER_GROUP_EXECUTOR=dummy-executor-v1
EXECUTOR_STATE_DIR=/var/lib/unrip/executor-state
# Repo-driven Hetzner bootstrap values live separately from the app .env.
# Copy scripts/hetzner/bootstrap-secrets.env.example to
# scripts/hetzner/bootstrap-secrets.env, fill in the values, then:
# source scripts/hetzner/bootstrap-secrets.env
# bash scripts/hetzner/bootstrap.sh
#
# The local-machine bootstrap flow is:
# 1. provide Hetzner token + SSH key path + DNS/ingress values + app/bootstrap secrets
# 2. run Terraform from infra/terraform/hetzner
# 3. wait for cloud-init/k3s readiness
# 4. fetch kubeconfig to .state/hetzner/kubeconfig.yaml
# 5. create Kubernetes Secrets from local values
# 6. build/import the current app image into k3s
# 7. apply repo Kubernetes manifests and let the bootstrap job create topics
#
# Expected bootstrap inputs:
# - HCLOUD_TOKEN
# - SSH_PUBLIC_KEY_PATH
# - TF_ADMIN_CIDR_BLOCKS
# - BASE_DOMAIN
# - FORGEJO_DOMAIN
# - FORGEJO_ROOT_URL
# - NEAR_INTENTS_API_KEY
# - FORGEJO_RUNNER_REGISTRATION_TOKEN
#
# Future k3s deployment should source the app values from Kubernetes Secret/ConfigMap.
# Hetzner bootstrap path clones the repo to /opt/unrip/repo for later deploy/k8s assets.

View file

@ -0,0 +1,69 @@
name: deploy
on:
push:
branches:
- main
jobs:
deploy:
runs-on: linux-amd64
env:
IMAGE_TAG: ${{ github.sha }}
REGISTRY_HOST: ${{ vars.REGISTRY_HOST }}
PROJECT_NAME: ${{ vars.PROJECT_NAME || 'unrip' }}
PROJECT_NAMESPACE: ${{ vars.PROJECT_NAMESPACE || vars.PROJECT_NAME || 'unrip' }}
PROJECT_DEPLOYMENTS: ${{ vars.PROJECT_DEPLOYMENTS || 'near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer' }}
BUILDAH_ISOLATION: chroot
STORAGE_DRIVER: vfs
steps:
- name: Install tooling
run: |
apk add --no-cache buildah kubectl
- name: Checkout
env:
REPO_URL: ${{ github.server_url }}/${{ github.repository }}.git
REPO_TOKEN: ${{ github.token }}
run: |
rm -rf .git
git init .
git remote add origin "https://oauth2:${REPO_TOKEN}@${REPO_URL#https://}"
git fetch --depth=1 origin "$GITHUB_SHA"
git checkout --detach FETCH_HEAD
- name: Load kubeconfig
run: |
mkdir -p "$HOME/.kube"
printf '%s' '${{ secrets.KUBECONFIG_B64 }}' | base64 -d > "$HOME/.kube/config"
kubectl get ns
- name: Login to registry
run: |
buildah login -u '${{ secrets.REGISTRY_USERNAME }}' -p '${{ secrets.REGISTRY_PASSWORD }}' "$REGISTRY_HOST"
- name: Resolve deployment settings
run: |
IMAGE="$REGISTRY_HOST/$PROJECT_NAME:$IMAGE_TAG"
{
echo "IMAGE=$IMAGE"
echo "PROJECT_NAMESPACE=$PROJECT_NAMESPACE"
echo "PROJECT_DEPLOYMENTS=$PROJECT_DEPLOYMENTS"
} >> "$GITHUB_ENV"
- name: Build and push image
run: |
buildah bud --storage-driver "$STORAGE_DRIVER" -t "$IMAGE" .
buildah push --storage-driver "$STORAGE_DRIVER" "$IMAGE" "docker://$IMAGE"
- name: Roll deployments to new image
run: |
IFS=',' read -r -a DEPLOYMENTS <<< "$PROJECT_DEPLOYMENTS"
for deployment in "${DEPLOYMENTS[@]}"; do
deployment="$(echo "$deployment" | xargs)"
[ -n "$deployment" ] || continue
kubectl -n "$PROJECT_NAMESPACE" set image "deployment/$deployment" app="$IMAGE"
kubectl -n "$PROJECT_NAMESPACE" rollout status "deployment/$deployment" --timeout=180s
done

9
.gitignore vendored
View file

@ -1,5 +1,14 @@
.ant-colony/
.state/
.venv/
__pycache__/
*.pyc
.env
deploy/k8s/overlays/hetzner-single-node/secrets/*.env
deploy/k8s/overlays/hetzner-single-node/secrets/*.htpasswd
!deploy/k8s/overlays/hetzner-single-node/secrets/*.example
scripts/hetzner/bootstrap-secrets.env
infra/terraform/hetzner/.terraform/
infra/terraform/hetzner/.terraform.lock.hcl
infra/terraform/hetzner/terraform.tfstate
infra/terraform/hetzner/terraform.tfstate.*

10
Dockerfile Normal file
View file

@ -0,0 +1,10 @@
FROM node:22-bookworm-slim
WORKDIR /app
COPY package.json package-lock.json ./
RUN npm ci --omit=dev
COPY . .
ENV NODE_ENV=production
CMD ["node", "src/apps/dummy-consumer.mjs"]

402
README.md
View file

@ -1,35 +1,21 @@
# near-intents-monitor
Minimal event-driven POC for the first trading-system component:
Production-shaped first slice of the trading system:
- **venue ingest**: NEAR Intents solver-bus quote flow
- **central bus**: Redpanda / Kafka-compatible broker
- **dummy reactor**: placeholder consumer for later trade-decision logic
- **bus**: Redpanda first, Kafka-compatible by design
- **reactor**: dummy decision engine emitting commands
- **executor**: dummy execution worker with durable idempotency state
- **result consumer**: downstream observer of execution outcomes
## Architecture
```text
NEAR Intents WebSocket
|
v
src/apps/near-intents-ingest.mjs
|
+--> raw.near_intents.quote
|
+--> norm.swap_demand
|
v
src/apps/dummy-consumer.mjs
```
The ingest app connects to the NEAR Intents websocket, subscribes to `quote` and `quote_status`, normalizes quote demand, and publishes to a Kafka-compatible topic.
## Project structure
## Canonical repo shape
```text
src/
apps/
near-intents-ingest.mjs
dummy-reactor.mjs
dummy-executor.mjs
dummy-consumer.mjs
bus/
kafka/
@ -37,98 +23,346 @@ src/
consumer.mjs
core/
event-envelope.mjs
executor-state-store.mjs
log.mjs
pair-filter.mjs
schemas.mjs
lib/
env.mjs
config.mjs
env.mjs
venues/
near-intents/
ingest.mjs
normalize.mjs
ws.mjs
compose.yml
Dockerfile
docs/contracts.md
deploy/hetzner/README.md
```
## Environment
## Event flow
Create `.env` in repo root:
```env
NEAR_INTENTS_API_KEY=your_solver_jwt
NEAR_INTENTS_WS_URL=wss://solver-relay-v2.chaindefuser.com/ws
KAFKA_BROKERS=127.0.0.1:9092
KAFKA_CLIENT_ID=trading-system
KAFKA_TOPIC_NORM_SWAP_DEMAND=norm.swap_demand
KAFKA_CONSUMER_GROUP_DUMMY=dummy-reactor-v1
```text
NEAR Intents WebSocket
|
+--> raw.near_intents.quote
|
v
norm.swap_demand
|
v
cmd.execute_trade
|
v
exec.trade_result
```
### Broker notes
Core rule: services do not call each other directly for trading flow; they communicate through bus topics only.
- `KAFKA_BROKERS` accepts a comma-separated broker list.
- Redpanda works because the apps use the Kafka protocol via `kafkajs`.
- `src/lib/config.mjs` is the shared config loader for both app entrypoints.
- The ingest app publishes normalized quote-demand events to `norm.swap_demand` by default.
## Contracts
See `docs/contracts.md`.
## Install
Current topics:
- `raw.near_intents.quote`
- `norm.swap_demand`
- `cmd.execute_trade`
- `exec.trade_result`
## Primary deployment path: repo-driven Hetzner bootstrap
The primary production path is no longer a Compose-only VM workflow.
The intended operating model is:
- Terraform provisions a Hetzner single-node environment
- cloud-init installs k3s automatically on first boot
- a local operator workstation performs the first repo-driven bootstrap
- Kubernetes manifests install Redpanda, the app workloads, Forgejo, runner, registry, and ingress-related components
- once the in-cluster Git + CI stack is alive, routine app deploys move to self-hosted CI
This is a two-phase model:
- **Phase 0:** local workstation bootstrap of a brand-new cluster
- **Phase 1:** self-hosted Forgejo + runner takes over app delivery
Compose still exists for local development and optional single-machine testing, but it is not the canonical production story.
## Prerequisites for first deployment
Install locally on the operator workstation:
- Terraform `>= 1.6`
- `kubectl`
- `docker`
- `curl`
You also need:
- a Hetzner Cloud API token
- a local SSH public key file for Terraform node provisioning
- DNS control for your chosen base domain and Forgejo hostname
- preferably a Tailscale tailnet and auth key for private admin/control-plane access
- the repo checked out locally
## Required bootstrap secrets and inputs
Create the bootstrap env file:
```bash
cp scripts/hetzner/bootstrap-secrets.env.example scripts/hetzner/bootstrap-secrets.env
```
Set at least:
- `HCLOUD_TOKEN`
- `SSH_PUBLIC_KEY_PATH`
- `PUBLIC_DOMAIN`
- recommended:
- `TAILSCALE_AUTH_KEY`
- `TAILSCALE_CONTROL_PLANE_HOSTNAME`
- optional fallback:
- `TF_ADMIN_CIDR_BLOCKS`
- `BASE_DOMAIN`
- `FORGEJO_DOMAIN`
- `FORGEJO_ROOT_URL`
- `REGISTRY_DOMAIN`
- `LETSENCRYPT_EMAIL`
- `REGISTRY_USERNAME`
- `REGISTRY_PASSWORD`
- `NEAR_INTENTS_API_KEY`
- `FORGEJO_RUNNER_REGISTRATION_TOKEN`
- optional DNS automation:
- Cloudflare:
- `CLOUDFLARE_API_TOKEN`
- `CLOUDFLARE_ZONE_ID`
- Porkbun:
- `PORKBUN_API_KEY`
- `PORKBUN_SECRET_API_KEY`
Then load them:
```bash
source scripts/hetzner/bootstrap-secrets.env
```
## First bootstrap sequence
Run the end-to-end bootstrap from repo root:
```bash
bash scripts/hetzner/bootstrap.sh
```
Current repo behavior of that script:
1. runs Terraform in `infra/terraform/hetzner`
2. optionally creates DNS records for the base, Forgejo, and registry hosts via Cloudflare or Porkbun
3. if configured, joins the node to Tailscale and prefers the Tailscale control-plane hostname for Kubernetes API access
4. waits for SSH and the k3s API endpoint to become ready
5. fetches the real k3s kubeconfig from the node and writes it to `.state/hetzner/kubeconfig.yaml`
6. renders the Hetzner single-node overlay from local operator inputs
7. creates registry pull/auth secrets
8. applies the Kubernetes bootstrap manifests
9. builds the app image locally and imports it into k3s on the node
10. performs the first rollout using the imported bootstrap image
Use the generated kubeconfig afterward:
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl -n unrip get deploy,pods
kubectl -n forgejo get deploy,pods,svc
```
## What is deployed into k3s
The repo-managed Kubernetes assets are under `deploy/k8s/`.
Current single-node target includes resources for:
- `unrip` workloads in namespace `unrip`
- Redpanda
- Forgejo
- Forgejo runner
- private registry
- ingress-nginx namespace/resources
- cert-manager namespace/resources
- ACME issuers and ingress definitions
- a bootstrap job for Redpanda topic creation
Shared platform namespaces:
- `forgejo`
- `registry`
- `ingress-nginx`
- `cert-manager`
Project-specific namespaces:
- `unrip`
- future projects should get their own namespace rather than sharing `unrip`
Important current-state nuance:
- the bootstrap script currently applies `deploy/k8s/base`
- the longer-term intended target is `deploy/k8s/overlays/hetzner-single-node`
## Executor persistence in k3s
The executor is stateful by design because it persists idempotency/execution tracking.
Current persistence boundary:
- app env uses `EXECUTOR_STATE_DIR=/var/lib/unrip/executor-state`
- in Kubernetes, the executor deployment mounts storage at that path
- the Hetzner single-node overlay pins storage to the k3s `local-path` storage class
- cloud-init also prepares the host directory boundary for executor state on first boot
Operational meaning:
- executor state lives on node-backed storage in the single-node k3s environment
- if that PVC or underlying node storage is lost, duplicate-suppression history is lost too
- treat executor persistence as part of the minimal durable state of the cluster
## Failure recovery and operator checks
### If bootstrap fails before Terraform completes
Re-run after fixing the local input problem:
- missing token
- invalid CIDRs
- invalid SSH public key path
If the infrastructure must be torn down:
```bash
source scripts/hetzner/bootstrap-secrets.env
bash scripts/hetzner/destroy.sh
```
### If Terraform succeeds but Kubernetes is not ready
Check the public API and cluster state from the workstation:
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl get events -A --sort-by=.lastTimestamp | tail -n 50
```
Typical next checks:
- cloud-init may still be finishing
- k3s may still be starting
- a workload may be crash-looping due to missing secret values or image-delivery issues
### If workloads do not roll out
Inspect the affected namespace:
```bash
kubectl -n unrip get pods
kubectl -n unrip describe pod <pod-name>
kubectl -n unrip logs deploy/dummy-executor --tail=100
kubectl -n forgejo logs deploy/forgejo --tail=100
```
### If you need to recreate secrets
The workstation bootstrap creates these Secrets:
- `unrip/unrip-secrets`
- `forgejo/forgejo-secrets`
Verify them:
```bash
kubectl -n unrip get secret unrip-secrets
kubectl -n forgejo get secret forgejo-secrets
```
### Current known limitations
Current colony state already identified an important gap:
- bootstrap and CI are not yet fully production-hardened, even though the first deploy path now fetches the real kubeconfig and imports the bootstrap image directly into k3s
Treat the current bootstrap as a repo-driven first-deploy path suitable for testing, with hardening still pending.
## Self-hosted CI handoff
After cluster bootstrap:
- open Forgejo at `https://${FORGEJO_DOMAIN}`
- seed or push this repo into Forgejo
- create Forgejo repository secrets:
- `KUBECONFIG_B64`
- `REGISTRY_USERNAME`
- `REGISTRY_PASSWORD`
- create Forgejo repository variables:
- `REGISTRY_HOST=${REGISTRY_DOMAIN}`
- optional: `PROJECT_NAME=unrip`
- optional: `PROJECT_NAMESPACE=unrip`
- optional: `PROJECT_DEPLOYMENTS=near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer`
- push to `main`
Routine application deploys then follow `.forgejo/workflows/deploy.yml`:
- build image as `REGISTRY_HOST/PROJECT_NAME:${GIT_SHA}`
- push to the private registry
- `kubectl set image` for each deployment listed in `PROJECT_DEPLOYMENTS` inside `PROJECT_NAMESPACE`
- wait for rollout
If project variables are omitted, the workflow defaults to the current repo project:
- `PROJECT_NAME=unrip`
- `PROJECT_NAMESPACE=unrip`
- `PROJECT_DEPLOYMENTS=near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer`
Infrastructure changes remain Terraform-driven from the operator workstation unless and until that responsibility is also automated.
For the detailed operator runbooks, see:
- `docs/hetzner-k3s-bootstrap.md`
- `docs/hetzner-self-hosted-ci-runbook.md`
- `deploy/k8s/projects/README.md`
- `docs/next-session-architecture.md`
## Local development with Compose
Compose remains available for local development and debugging.
```bash
npm install
cp .env.example .env
# edit .env
docker compose build
docker compose up -d
```
## Run
Useful commands:
### Start NEAR Intents ingest
Use the package script:
```bash
docker compose ps
docker compose logs -f
docker compose logs -f near-intents-ingest dummy-reactor dummy-executor dummy-consumer
docker compose restart dummy-executor
docker compose down
docker compose down -v
```
### Individual services
```bash
npm run near-intents:ingest
npm run dummy-reactor
npm run dummy-executor
npm run dummy-consumer
```
Or run the app directly:
```bash
node src/apps/near-intents-ingest.mjs
```
Optional exact-pair filter:
Optional pair filter:
```bash
npm run near-intents:ingest -- --pair 'asset_a->asset_b'
```
Example:
## Idempotent executor behavior
- every command has a `command_id`
- commands carry `idempotency_key` and `execution_key`
- executor persists state under `EXECUTOR_STATE_DIR`
- completed commands are skipped after restart or replay
```bash
npm run near-intents:ingest -- --pair 'nep141:btc.omft.near->nep141:gnosis-0x420ca0f9b9b604ce0fd9c18ef134c705e5fa3430.omft.near'
## Env
```env
NEAR_INTENTS_API_KEY=your_solver_jwt
NEAR_INTENTS_WS_URL=wss://solver-relay-v2.chaindefuser.com/ws
KAFKA_BROKERS=redpanda:9092
KAFKA_CLIENT_ID=unrip
KAFKA_TOPIC_RAW_NEAR_INTENTS_QUOTE=raw.near_intents.quote
KAFKA_TOPIC_NORM_SWAP_DEMAND=norm.swap_demand
KAFKA_TOPIC_CMD_EXECUTE_TRADE=cmd.execute_trade
KAFKA_TOPIC_EXEC_TRADE_RESULT=exec.trade_result
KAFKA_CONSUMER_GROUP_DUMMY=dummy-reactor-v1
KAFKA_CONSUMER_GROUP_EXECUTOR=dummy-executor-v1
EXECUTOR_STATE_DIR=/var/lib/unrip/executor-state
```
The filter is direction-agnostic, so `asset_a->asset_b` also matches `asset_b->asset_a`.
### Start the dummy consumer
Use the package script:
```bash
npm run dummy-consumer
```
Or run the app directly:
```bash
node src/apps/dummy-consumer.mjs
```
The dummy consumer subscribes to `norm.swap_demand`, logs the observed pair and quote id, and stands in for a future decision engine.
## Scripts
- `npm run near-intents:ingest` — start the websocket ingest and publish to Kafka/Redpanda topics
- `npm run dummy-consumer` — consume normalized demand events
- `npm start` — legacy wrapper that forwards into the ingest app
## Notes
- This repo is now bus-first: venue intake and downstream reaction are decoupled through Kafka-compatible topics.
- `index.mjs` remains only as a compatibility launch wrapper; operational docs should prefer `src/apps/*` entrypoints and npm scripts.
- Older single-file, Python, or TUI-only runtime instructions are obsolete for this repository state.

81
compose.yml Normal file
View file

@ -0,0 +1,81 @@
# Local/dev runtime reference. Hetzner production bootstrap now starts from Terraform + cloud-init + k3s.
services:
redpanda:
image: docker.redpanda.com/redpandadata/redpanda:v24.3.9
command:
- redpanda
- start
- --overprovisioned
- --smp
- "1"
- --memory
- "1G"
- --reserve-memory
- "0M"
- --node-id
- "0"
- --check=false
- --kafka-addr
- internal://0.0.0.0:9092,external://0.0.0.0:19092
- --advertise-kafka-addr
- internal://redpanda:9092,external://127.0.0.1:19092
- --pandaproxy-addr
- internal://0.0.0.0:8082
- --advertise-pandaproxy-addr
- internal://redpanda:8082
ports:
- "127.0.0.1:19092:19092"
volumes:
- redpanda-data:/var/lib/redpanda/data
healthcheck:
test: ["CMD-SHELL", "rpk cluster health | grep -q 'Healthy: *true'"]
interval: 10s
timeout: 5s
retries: 10
start_period: 20s
near-intents-ingest:
build: .
command: ["node", "src/apps/near-intents-ingest.mjs"]
env_file:
- .env
depends_on:
redpanda:
condition: service_healthy
restart: unless-stopped
dummy-reactor:
build: .
command: ["node", "src/apps/dummy-reactor.mjs"]
env_file:
- .env
depends_on:
redpanda:
condition: service_healthy
restart: unless-stopped
dummy-executor:
build: .
command: ["node", "src/apps/dummy-executor.mjs"]
env_file:
- .env
depends_on:
redpanda:
condition: service_healthy
restart: unless-stopped
volumes:
- executor-state:/var/lib/unrip/executor-state
dummy-consumer:
build: .
command: ["node", "src/apps/dummy-consumer.mjs"]
env_file:
- .env
depends_on:
redpanda:
condition: service_healthy
restart: unless-stopped
volumes:
redpanda-data:
executor-state:

275
deploy/hetzner/README.md Normal file
View file

@ -0,0 +1,275 @@
# Hetzner single-node bootstrap (Terraform + cloud-init + k3s)
This is the canonical first-production deployment path for the repo.
A local operator workstation drives the first deployment end to end:
- Terraform provisions Hetzner infrastructure
- cloud-init installs k3s automatically on first boot
- the workstation waits for the public Kubernetes API
- the workstation creates initial Kubernetes Secrets
- the workstation applies repo-managed Kubernetes manifests
- the workstation performs the first image/bootstrap delivery attempt
- once Forgejo + runner are alive, routine app deploys are intended to move to self-hosted CI
Compose remains available for local development, but it is not the primary production deployment model.
## Scope of this layer
The foundation under `infra/terraform/hetzner` provisions:
- one Hetzner Cloud server
- one SSH key resource based on your local public key
- firewall rules for SSH, Kubernetes API, and HTTP/HTTPS ingress
- a private network attachment for future growth
- cloud-init user-data for unattended k3s installation and host preparation
The repo bootstrap then applies the Hetzner single-node overlay under `deploy/k8s/overlays/hetzner-single-node`, which composes Kubernetes resources under `deploy/k8s/` for:
- shared platform namespaces and services
- Redpanda
- unrip workloads
- Forgejo
- Forgejo runner
- private registry
- ingress/TLS-related resources
- Redpanda topic bootstrap job
## Prerequisites
Install on the operator workstation:
- Terraform `>= 1.6`
- `kubectl`
- `docker`
- `curl`
You also need:
- a Hetzner Cloud API token
- an SSH keypair already present locally
- access to DNS for your chosen domains
- admin CIDRs that can reach the future server on `22/tcp` and `6443/tcp`
- this repo checked out locally
## Required bootstrap secrets and inputs
Prepare the operator env file:
```bash
cp scripts/hetzner/bootstrap-secrets.env.example scripts/hetzner/bootstrap-secrets.env
${EDITOR:-vi} scripts/hetzner/bootstrap-secrets.env
```
Set at least:
- `HCLOUD_TOKEN`
- `SSH_PUBLIC_KEY_PATH`
- `TF_ADMIN_CIDR_BLOCKS`
- `BASE_DOMAIN`
- `FORGEJO_DOMAIN`
- `FORGEJO_ROOT_URL`
- `NEAR_INTENTS_API_KEY`
- `FORGEJO_RUNNER_REGISTRATION_TOKEN`
Load it into the current shell:
```bash
source scripts/hetzner/bootstrap-secrets.env
```
## Canonical bootstrap sequence
Run from repo root:
```bash
bash scripts/hetzner/bootstrap.sh
```
Current behavior of the script:
1. validates local tooling
2. runs `terraform init` and `terraform apply` in `infra/terraform/hetzner`
3. reads Terraform outputs such as server IP and `k3s_api_url`
4. waits for the k3s API readiness endpoint
5. writes a local workstation kubeconfig to `.state/hetzner/kubeconfig.yaml`
6. writes overlay secret env input files and creates:
- `unrip/unrip-secrets`
- `unrip/unrip-registry-creds`
- `forgejo/forgejo-secrets`
- `registry/registry-secrets`
7. applies `deploy/k8s/platform/base/namespace.yaml` and `deploy/k8s/overlays/hetzner-single-node`
8. builds the repo bootstrap image locally
9. pushes it through the temporary local registry bridge using the active project name
10. updates and waits for rollout status in the active project namespace
After the script finishes:
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl -n unrip get deploy,pods,jobs
kubectl -n forgejo get deploy,pods,svc
kubectl -n registry get pods,svc
```
## Current manifest target
Important current-state detail:
- `scripts/hetzner/bootstrap.sh` now applies `deploy/k8s/platform/base/namespace.yaml`
- it then applies `deploy/k8s/overlays/hetzner-single-node`
- bootstrap naming no longer assumes legacy `trading-system` kubeconfig contexts, image tags, or rollout namespaces
## Executor persistence in k3s
The dummy executor persists durable idempotency state.
Current persistence model:
- application path: `EXECUTOR_STATE_DIR=/var/lib/unrip/executor-state`
- cloud-init prepares the host boundary for executor storage on first boot
- Kubernetes mounts storage at that same path for the executor workload
- the Hetzner single-node overlay pins PVC-backed storage to k3s `local-path`
Operational consequence:
- executor duplicate-suppression state lives on node-backed persistent storage
- replacing the node or deleting the PVC without migration loses that history
- treat executor state as required operational data, even though the executor is still a dummy implementation
## Failure recovery runbook
### A. Bootstrap fails before infrastructure exists
Typical causes:
- invalid `HCLOUD_TOKEN`
- wrong `SSH_PUBLIC_KEY_PATH`
- malformed `TF_ADMIN_CIDR_BLOCKS`
Fix the input and rerun:
```bash
source scripts/hetzner/bootstrap-secrets.env
bash scripts/hetzner/bootstrap.sh
```
If you need to destroy partially created infrastructure:
```bash
source scripts/hetzner/bootstrap-secrets.env
bash scripts/hetzner/destroy.sh
```
### B. Terraform succeeds but cluster access is not usable
Verify the generated kubeconfig and cluster health:
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl get events -A --sort-by=.lastTimestamp | tail -n 50
```
What to suspect first:
- cloud-init still running
- k3s still starting
- bootstrap kubeconfig/auth not fully aligned yet
- public API reachable, but workloads not yet healthy
### C. Secrets were wrong or missing
The current bootstrap depends on:
- `${PROJECT_NAME:-unrip}-secrets`
- `NEAR_INTENTS_API_KEY`
- `forgejo-secrets`
- `root_url`
- `domain`
- `runner_registration_token`
Verify:
```bash
kubectl -n unrip get secret unrip-secrets
kubectl -n unrip get secret unrip-registry-creds
kubectl -n forgejo get secret forgejo-secrets
kubectl -n registry get secret registry-secrets
```
If needed, recreate them from the workstation before restarting the affected deployments.
### D. Workloads are present but not healthy
Inspect by namespace:
```bash
kubectl -n unrip get pods
kubectl -n unrip describe pod <pod-name>
kubectl -n unrip logs deploy/dummy-executor --tail=100
kubectl -n forgejo logs deploy/forgejo --tail=100
kubectl -n forgejo logs deploy/forgejo-runner --tail=100
```
Useful rollout checks:
```bash
kubectl -n unrip rollout status deployment/near-intents-ingest --timeout=300s
kubectl -n unrip rollout status deployment/dummy-reactor --timeout=300s
kubectl -n unrip rollout status deployment/dummy-executor --timeout=300s
kubectl -n unrip rollout status deployment/dummy-consumer --timeout=300s
kubectl -n forgejo rollout status deployment/forgejo --timeout=300s
kubectl -n forgejo rollout status deployment/forgejo-runner --timeout=300s
```
### E. Need to inspect Terraform outputs directly
```bash
cd infra/terraform/hetzner
terraform output
terraform output server_ipv4
terraform output server_private_ipv4
terraform output k3s_api_url
terraform output kubeconfig_strategy
```
## Self-hosted CI handoff
After the cluster is reachable and workloads are up:
1. reach Forgejo at the configured domain or by port-forward
2. perform the initial admin/bootstrap steps in Forgejo
3. create the target repository in Forgejo
4. push or mirror this repo into that Forgejo instance
5. confirm the runner is registered and healthy
6. move routine application deploys to the self-hosted pipeline, which now derives image naming and rollout targets from Forgejo repository variables instead of hard-coding the legacy project
Current repo-state caveats already known:
- first bootstrap is repo-driven from the workstation
- the bootstrap path no longer relies on SSH/scp transport in control flow
- the kubeconfig/auth result is not yet fully production-hardened
- first rollout still uses a temporary local registry bridge; routine CI deploys are intended to be registry-native and the Forgejo workflow now defaults to `unrip` while allowing per-repo overrides for image name, namespace, and deployment list
- Forgejo admin creation, repo creation, and Actions configuration still require operator action after cluster bring-up
- DNS automation is currently wired for Cloudflare when credentials are supplied during bootstrap
- TLS is expected to come from cert-manager + Let's Encrypt once ingress hostnames resolve publicly
## Terraform-only usage
If you only want the infra layer:
```bash
cd infra/terraform/hetzner
export TF_VAR_hcloud_token="<your-hetzner-token>"
export TF_VAR_ssh_public_key="$(cat ~/.ssh/id_ed25519.pub)"
export TF_VAR_admin_cidr_blocks='["203.0.113.10/32"]'
terraform init
terraform apply
```
Useful outputs:
- `server_ipv4`
- `server_private_ipv4`
- `server_name`
- `server_fqdn`
- `k3s_api_url`
- `kubeconfig_strategy`
For CI/CD details, also see:
- `docs/hetzner-k3s-bootstrap.md`
- `docs/hetzner-self-hosted-ci-runbook.md`
## Compose status
Compose is still useful for:
- local development
- fast topology debugging
- non-production single-machine testing
But it should be treated as optional/dev runtime support, not as the primary production deployment path.

View file

@ -0,0 +1,115 @@
#cloud-config
package_update: true
package_upgrade: true
packages:
- ca-certificates
- curl
- git
- gnupg
- jq
- nfs-common
- open-iscsi
- apt-transport-https
- software-properties-common
- unattended-upgrades
- ufw
write_files:
- path: /etc/sysctl.d/90-k3s-single-node.conf
permissions: '0644'
owner: root:root
content: |
vm.max_map_count=1048575
fs.inotify.max_user_instances=8192
fs.inotify.max_user_watches=1048576
fs.file-max=1048576
net.core.somaxconn=65535
net.ipv4.ip_local_port_range=1024 65535
net.ipv4.tcp_tw_reuse=1
- path: /etc/rancher/k3s/config.yaml
permissions: '0600'
owner: root:root
content: |
write-kubeconfig-mode: "0640"
kube-apiserver-arg:
- anonymous-auth=false
protect-kernel-defaults: true
disable:
- traefik
- path: /usr/local/bin/post-k3s-bootstrap.sh
permissions: '0755'
owner: root:root
content: |
#!/usr/bin/env bash
set -euo pipefail
install -d -m 0755 /var/lib/redpanda/data
install -d -m 0755 /var/lib/unrip/executor-state
chown root:root /var/lib/redpanda/data /var/lib/unrip/executor-state
systemctl enable --now iscsid || true
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
install -d -m 0755 /usr/local/share/unrip
cat >/usr/local/share/unrip/bootstrap-metadata.env <<'EOF'
BOOTSTRAP_MODE=k3s-single-node
BOOTSTRAP_PROJECT_NAME=unrip
BOOTSTRAP_PROJECT_NAMESPACE=unrip
K3S_KUBECONFIG=/opt/bootstrap/kubeconfig-internal.yaml
BOOTSTRAP_REPO_DIR=/opt/unrip/repo
BOOTSTRAP_MANIFEST_DIR=/opt/unrip/repo/deploy/k8s
GITOPS_HANDOFF=seed-self-hosted-git-and-runner
EOF
chmod 0644 /usr/local/share/unrip/bootstrap-metadata.env
install -d -m 0755 /opt/unrip
if [ ! -d /opt/unrip/repo/.git ]; then
git clone --depth 1 ${BOOTSTRAP_REPO_URL:-https://example.invalid/bootstrap-repo.git} /opt/unrip/repo || true
fi
install -d -m 0755 /opt/bootstrap
cp /etc/rancher/k3s/k3s.yaml /opt/bootstrap/kubeconfig-internal.yaml
chmod 0640 /opt/bootstrap/kubeconfig-internal.yaml
chgrp k3s-readers /opt/bootstrap/kubeconfig-internal.yaml
sed -i 's/127.0.0.1/{{PRIVATE_IPV4}}/' /opt/bootstrap/kubeconfig-internal.yaml
cat >/opt/bootstrap/README.txt <<'EOF'
This node was provisioned by Terraform + cloud-init.
Use /opt/bootstrap/kubeconfig-internal.yaml for automation.
Bootstrap metadata lives at /usr/local/share/unrip/bootstrap-metadata.env.
Future Kubernetes bootstrap assets should live under /opt/unrip/repo/deploy/k8s.
EOF
chmod 0644 /opt/bootstrap/README.txt
if command -v kubectl >/dev/null 2>&1; then
kubectl get nodes -o wide >/opt/bootstrap/kubectl-get-nodes.txt
fi
if id ubuntu >/dev/null 2>&1; then
usermod -aG k3s-readers ubuntu || true
install -d -o ubuntu -g ubuntu -m 0700 /home/ubuntu/.kube
cp /etc/rancher/k3s/k3s.yaml /home/ubuntu/.kube/config
chown ubuntu:ubuntu /home/ubuntu/.kube/config
chmod 0600 /home/ubuntu/.kube/config
sed -i 's/127.0.0.1/{{PRIVATE_IPV4}}/' /home/ubuntu/.kube/config
fi
runcmd:
- sysctl --system
- systemctl enable unattended-upgrades
- systemctl enable --now ufw
- ufw default deny incoming
- ufw default allow outgoing
- ufw allow 22/tcp
- ufw allow 6443/tcp
- ufw allow 80/tcp
- ufw allow 443/tcp
- groupadd --system k3s-readers || true
- mkdir -p /etc/rancher/k3s
- curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC='server --cluster-init --tls-san {{PUBLIC_IPV4}} --node-ip {{PRIVATE_IPV4}} --advertise-address {{PRIVATE_IPV4}} --flannel-backend vxlan --disable servicelb' sh -
- cloud-init status --wait
- /usr/local/bin/post-k3s-bootstrap.sh
final_message: "cloud-init finished: k3s first node bootstrapped"

42
deploy/k8s/README.md Normal file
View file

@ -0,0 +1,42 @@
# Kubernetes bootstrap assets
This directory is the repo-driven deployment target for the single-node Hetzner+k3s bootstrap.
## Layout
- `base/` — shared bootstrap manifests plus the current `unrip` project manifests
- `projects/` — conventions for hosting multiple isolated projects on the same cluster
- `overlays/hetzner-single-node/` — first-node overlay with concrete hostnames, local-path storage, and generated secret references
- `secrets/` — examples and instructions for supplying required secrets out-of-band
## Shared cluster model
Shared platform namespaces:
- `forgejo`
- `registry`
- `ingress-nginx`
- `cert-manager`
Project-specific namespaces:
- `unrip`
- future projects should get their own namespace instead of sharing `unrip`
## Apply flow
After Terraform/cloud-init has produced a working kubeconfig, the canonical path is:
```bash
bash scripts/hetzner/bootstrap.sh
```
That script renders the Hetzner overlay inputs, creates platform and project registry auth secrets using the active project naming, and applies:
```bash
kubectl apply -k deploy/k8s/overlays/hetzner-single-node
```
## Secret management
The overlay intentionally references generated or pre-created Secrets instead of committing credentials:
- `unrip/unrip-secrets`
- `unrip/unrip-registry-creds`
- `forgejo/forgejo-secrets`
- `registry/registry-secrets`
The bootstrap script creates them from local environment variables. By default it targets the `unrip` project, but its kubeconfig context name, bootstrap image tag, project secret env filename, project namespace, and project registry secret name are derived from `PROJECT_NAME`, `PROJECT_NAMESPACE`, and `CLUSTER_NAME` instead of hard-coding legacy `trading-system` values.

View file

@ -0,0 +1,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../platform/base
- ../projects/unrip/base

View file

@ -0,0 +1,35 @@
# Hetzner single-node overlay
This overlay turns the shared platform and `unrip` project bases into a concrete first-node bootstrap target for the Terraform-provisioned k3s VM.
## Before apply
Create real secret material from the examples:
```bash
cp deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env.example deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env.example deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd.example deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd
```
Update:
- ingress hosts in `ingress-hosts.patch.yaml`
- ACME email in `issuer-email.patch.yaml`
- project secret values in `secrets/unrip.env`
- Forgejo secret values in `secrets/forgejo.env`
- registry htpasswd in `secrets/registry.htpasswd`
## Apply
```bash
kubectl apply -k deploy/k8s/overlays/hetzner-single-node
```
## What gets installed
- shared platform namespaces for registry, ingress, cert-manager, and Forgejo
- project namespace `unrip`
- Redpanda plus a topic bootstrap job inside `unrip`
- app worker deployments referencing `unrip-secrets`
- Forgejo and Forgejo runner referencing `forgejo-secrets`
- private registry protected by htpasswd from `registry-secrets`
- nginx ingress and ACME issuers for TLS
For future projects, do not reuse `unrip`; create a new project namespace and matching `<project>-config`, `<project>-secrets`, and `<project>-registry-creds` resources.

View file

@ -0,0 +1,43 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: forgejo
namespace: forgejo
spec:
tls:
- hosts:
- git.doran.133011.xyz
secretName: forgejo-tls
rules:
- host: git.doran.133011.xyz
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: forgejo
port:
number: 3000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: registry
namespace: registry
spec:
tls:
- hosts:
- registry.doran.133011.xyz
secretName: registry-tls
rules:
- host: registry.doran.133011.xyz
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: registry
port:
number: 5000

View file

@ -0,0 +1,15 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
spec:
acme:
email: letsencryptemailfordoran@133011.xyz
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-production
spec:
acme:
email: letsencryptemailfordoran@133011.xyz

View file

@ -0,0 +1,24 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../platform/base
- ../../projects/unrip/base
patches:
- path: ingress-hosts.patch.yaml
- path: issuer-email.patch.yaml
- path: storage-class.patch.yaml
secretGenerator:
- name: unrip-secrets
namespace: unrip
envs:
- secrets/unrip.env
- name: forgejo-secrets
namespace: forgejo
envs:
- secrets/forgejo.env
- name: registry-secrets
namespace: registry
files:
- htpasswd=secrets/registry.htpasswd
generatorOptions:
disableNameSuffixHash: true

View file

@ -0,0 +1,3 @@
root_url=https://git.unrip-bootstrap.example.com/
domain=git.unrip-bootstrap.example.com
runner_registration_token=replace-me

View file

@ -0,0 +1 @@
bootstrap:$2y$05$replace-with-bcrypt-htpasswd

View file

@ -0,0 +1 @@
NEAR_INTENTS_API_KEY=replace-me

View file

@ -0,0 +1,31 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: redpanda-data
namespace: unrip
spec:
storageClassName: local-path
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: executor-state
namespace: unrip
spec:
storageClassName: local-path
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: forgejo-data
namespace: forgejo
spec:
storageClassName: local-path
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: registry-data
namespace: registry
spec:
storageClassName: local-path

View file

@ -0,0 +1,56 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: cert-manager
namespace: cert-manager
spec:
replicas: 1
selector:
matchLabels:
app: cert-manager
template:
metadata:
labels:
app: cert-manager
spec:
containers:
- name: cert-manager
image: quay.io/jetstack/cert-manager-controller:v1.17.1
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cert-manager-webhook
namespace: cert-manager
spec:
replicas: 1
selector:
matchLabels:
app: cert-manager-webhook
template:
metadata:
labels:
app: cert-manager-webhook
spec:
containers:
- name: webhook
image: quay.io/jetstack/cert-manager-webhook:v1.17.1
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cert-manager-cainjector
namespace: cert-manager
spec:
replicas: 1
selector:
matchLabels:
app: cert-manager-cainjector
template:
metadata:
labels:
app: cert-manager-cainjector
spec:
containers:
- name: cainjector
image: quay.io/jetstack/cert-manager-cainjector:v1.17.1

View file

@ -0,0 +1,29 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
spec:
acme:
email: ops@example.invalid
server: https://acme-staging-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-staging-account-key
solvers:
- http01:
ingress:
ingressClassName: traefik
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-production
spec:
acme:
email: ops@example.invalid
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-production-account-key
solvers:
- http01:
ingress:
ingressClassName: traefik

View file

@ -0,0 +1,29 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
data:
Corefile: |
.:53 {
errors
health
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
}
hosts /etc/coredns/NodeHosts {
ttl 60
reload 15s
fallthrough
}
prometheus :9153
cache 30
loop
reload
loadbalance
import /etc/coredns/custom/*.override
forward . 1.1.1.1 1.0.0.1 8.8.8.8 8.8.4.4
}
import /etc/coredns/custom/*.server

View file

@ -0,0 +1,31 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
data:
Corefile: |
.:53 {
errors
health
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
}
hosts /etc/coredns/NodeHosts {
ttl 60
reload 15s
fallthrough
}
prometheus :9153
cache 30
loop
reload
loadbalance
import /etc/coredns/custom/*.override
forward . 1.1.1.1 1.0.0.1 8.8.8.8 8.8.4.4
}
import /etc/coredns/custom/*.server
NodeHosts: |
10.30.1.10 unrip-1

View file

@ -0,0 +1,30 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: forgejo-runner
namespace: forgejo
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: forgejo-runner-deployer
rules:
- apiGroups: ["apps"]
resources: ["deployments"]
verbs: ["get", "list", "watch", "patch", "update"]
- apiGroups: [""]
resources: ["pods", "pods/log", "services", "configmaps", "secrets"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: forgejo-runner-deployer
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: forgejo-runner-deployer
subjects:
- kind: ServiceAccount
name: forgejo-runner
namespace: forgejo

View file

@ -0,0 +1,47 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: forgejo-runner
namespace: forgejo
spec:
replicas: 1
selector:
matchLabels:
app: forgejo-runner
template:
metadata:
labels:
app: forgejo-runner
spec:
serviceAccountName: forgejo-runner
restartPolicy: Always
containers:
- name: runner
image: code.forgejo.org/forgejo/runner:6.3.1
securityContext:
runAsUser: 0
runAsGroup: 0
env:
- name: FORGEJO_INSTANCE_URL
valueFrom:
secretKeyRef:
name: forgejo-secrets
key: root_url
- name: FORGEJO_RUNNER_REGISTRATION_TOKEN
valueFrom:
secretKeyRef:
name: forgejo-secrets
key: runner_registration_token
command: ["/bin/sh", "-lc"]
args:
- >-
if [ ! -f /data/.runner ]; then
forgejo-runner register --no-interactive --name k3s-runner --instance "$FORGEJO_INSTANCE_URL" --token "$FORGEJO_RUNNER_REGISTRATION_TOKEN" --labels "linux-amd64:host";
fi &&
forgejo-runner daemon --config /data/.runner
volumeMounts:
- name: runner-data
mountPath: /data
volumes:
- name: runner-data
emptyDir: {}

View file

@ -0,0 +1,76 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: forgejo-data
namespace: forgejo
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: forgejo
namespace: forgejo
spec:
replicas: 1
selector:
matchLabels:
app: forgejo
template:
metadata:
labels:
app: forgejo
spec:
containers:
- name: forgejo
image: codeberg.org/forgejo/forgejo:10
env:
- name: USER_UID
value: "1000"
- name: USER_GID
value: "1000"
- name: FORGEJO__server__ROOT_URL
valueFrom:
secretKeyRef:
name: forgejo-secrets
key: root_url
- name: FORGEJO__server__DOMAIN
valueFrom:
secretKeyRef:
name: forgejo-secrets
key: domain
- name: FORGEJO__security__INSTALL_LOCK
value: "true"
- name: FORGEJO__service__DISABLE_REGISTRATION
value: "true"
ports:
- name: http
containerPort: 3000
- name: ssh
containerPort: 22
volumeMounts:
- name: data
mountPath: /data
volumes:
- name: data
persistentVolumeClaim:
claimName: forgejo-data
---
apiVersion: v1
kind: Service
metadata:
name: forgejo
namespace: forgejo
spec:
selector:
app: forgejo
ports:
- name: http
port: 3000
targetPort: 3000
- name: ssh
port: 22
targetPort: 22

View file

@ -0,0 +1,73 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: ingress-nginx-controller
namespace: ingress-nginx
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
template:
metadata:
labels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
spec:
serviceAccountName: default
containers:
- name: controller
image: registry.k8s.io/ingress-nginx/controller:v1.12.1
args:
- /nginx-ingress-controller
- --ingress-class=nginx
- --controller-class=k8s.io/ingress-nginx
- --publish-service=$(POD_NAMESPACE)/ingress-nginx-controller
- --election-id=ingress-nginx-leader
- --enable-ssl-passthrough
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
ports:
- name: http
containerPort: 80
- name: https
containerPort: 443
securityContext:
allowPrivilegeEscalation: true
capabilities:
add: ["NET_BIND_SERVICE"]
drop: ["ALL"]
readinessProbe:
httpGet:
path: /healthz
port: 10254
livenessProbe:
httpGet:
path: /healthz
port: 10254
---
apiVersion: v1
kind: Service
metadata:
name: ingress-nginx-controller
namespace: ingress-nginx
spec:
type: LoadBalancer
selector:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
ports:
- name: http
port: 80
targetPort: 80
- name: https
port: 443
targetPort: 443

View file

@ -0,0 +1,49 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: forgejo
namespace: forgejo
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
spec:
ingressClassName: traefik
tls:
- hosts:
- git.example.invalid
secretName: forgejo-tls
rules:
- host: git.example.invalid
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: forgejo
port:
number: 3000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: registry
namespace: registry
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
spec:
ingressClassName: traefik
tls:
- hosts:
- registry.example.invalid
secretName: registry-tls
rules:
- host: registry.example.invalid
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: registry
port:
number: 5000

View file

@ -0,0 +1,11 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- forgejo.yaml
- forgejo-rbac.yaml
- forgejo-runner.yaml
- registry.yaml
- ingress.yaml
- cluster-issuers.yaml
- coredns.yaml

View file

@ -0,0 +1,35 @@
apiVersion: v1
kind: Namespace
metadata:
name: unrip
labels:
app.kubernetes.io/part-of: unrip
project.pi.io/type: project
---
apiVersion: v1
kind: Namespace
metadata:
name: forgejo
labels:
project.pi.io/type: platform
---
apiVersion: v1
kind: Namespace
metadata:
name: registry
labels:
project.pi.io/type: platform
---
apiVersion: v1
kind: Namespace
metadata:
name: ingress-nginx
labels:
project.pi.io/type: platform
---
apiVersion: v1
kind: Namespace
metadata:
name: cert-manager
labels:
project.pi.io/type: platform

View file

@ -0,0 +1,68 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: registry-data
namespace: registry
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: registry
namespace: registry
spec:
replicas: 1
selector:
matchLabels:
app: registry
template:
metadata:
labels:
app: registry
spec:
containers:
- name: registry
image: registry:2
env:
- name: REGISTRY_HTTP_ADDR
value: 0.0.0.0:5000
- name: REGISTRY_STORAGE_DELETE_ENABLED
value: "true"
- name: REGISTRY_AUTH
value: htpasswd
- name: REGISTRY_AUTH_HTPASSWD_REALM
value: Trading System Registry
- name: REGISTRY_AUTH_HTPASSWD_PATH
value: /auth/htpasswd
ports:
- containerPort: 5000
volumeMounts:
- name: data
mountPath: /var/lib/registry
- name: auth
mountPath: /auth
readOnly: true
volumes:
- name: data
persistentVolumeClaim:
claimName: registry-data
- name: auth
secret:
secretName: registry-secrets
---
apiVersion: v1
kind: Service
metadata:
name: registry
namespace: registry
spec:
selector:
app: registry
ports:
- name: http
port: 5000
targetPort: 5000

View file

@ -0,0 +1,35 @@
# Projects on the shared cluster
This cluster is intended to host multiple independent projects.
## Pattern
- shared platform namespaces:
- `forgejo`
- `registry`
- `ingress-nginx`
- `cert-manager`
- per-project namespaces:
- `unrip`
- future examples: `project-foo`, `project-bar`
## How to add another project
For each new project, create a project manifest set similar to `deploy/k8s/base/unrip.yaml`:
- one namespace
- one project config map
- one secret name unique to the project
- one image pull secret unique to the project
- one executor/data PVC if needed
- deployments/services/ingresses only inside that namespace
Recommended naming convention:
- namespace: project name, e.g. `unrip`
- config map: `<project>-config`
- app secrets: `<project>-secrets`
- pull secret: `<project>-registry-creds`
- persistent host path/app state: `/var/lib/<project>/...`
- app image: `registry.<domain>/<project>:<tag>`
## Current project in this repo
- project name: `unrip`
- namespace: `unrip`
- project manifest: `deploy/k8s/base/unrip.yaml`

View file

@ -0,0 +1,18 @@
apiVersion: batch/v1
kind: Job
metadata:
name: redpanda-topic-bootstrap
namespace: unrip
spec:
template:
spec:
restartPolicy: OnFailure
containers:
- name: bootstrap-topics
image: docker.redpanda.com/redpandadata/redpanda:v24.3.9
command: ["/bin/sh", "-lc"]
args:
- >-
rpk topic create raw.near_intents.quote norm.swap_demand cmd.execute_trade exec.trade_result
--brokers redpanda.unrip.svc.cluster.local:9092
--partitions 1 --replicas 1 || true

View file

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- redpanda.yaml
- unrip.yaml
- bootstrap-job.yaml

View file

@ -0,0 +1,91 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: redpanda-data
namespace: unrip
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: redpanda
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: redpanda
template:
metadata:
labels:
app: redpanda
app.kubernetes.io/part-of: unrip
spec:
containers:
- name: redpanda
image: docker.redpanda.com/redpandadata/redpanda:v24.3.9
args:
- redpanda
- start
- --overprovisioned
- --smp
- "1"
- --memory
- "1G"
- --reserve-memory
- "0M"
- --node-id
- "0"
- --check=false
- --set
- redpanda.auto_create_topics_enabled=false
- --kafka-addr
- internal://0.0.0.0:9092
- --advertise-kafka-addr
- internal://redpanda.unrip.svc.cluster.local:9092
- --pandaproxy-addr
- internal://0.0.0.0:8082
- --advertise-pandaproxy-addr
- internal://redpanda.unrip.svc.cluster.local:8082
ports:
- name: kafka
containerPort: 9092
- name: proxy
containerPort: 8082
readinessProbe:
tcpSocket:
port: 9092
initialDelaySeconds: 10
periodSeconds: 10
livenessProbe:
tcpSocket:
port: 9092
initialDelaySeconds: 30
periodSeconds: 15
volumeMounts:
- name: redpanda-data
mountPath: /var/lib/redpanda/data
volumes:
- name: redpanda-data
persistentVolumeClaim:
claimName: redpanda-data
---
apiVersion: v1
kind: Service
metadata:
name: redpanda
namespace: unrip
spec:
selector:
app: redpanda
ports:
- name: kafka
port: 9092
targetPort: 9092
- name: proxy
port: 8082
targetPort: 8082

View file

@ -0,0 +1,152 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: unrip-config
namespace: unrip
data:
NEAR_INTENTS_WS_URL: wss://solver-relay-v2.chaindefuser.com/ws
KAFKA_BROKERS: redpanda.unrip.svc.cluster.local:9092
KAFKA_CLIENT_ID: unrip
KAFKA_TOPIC_RAW_NEAR_INTENTS_QUOTE: raw.near_intents.quote
KAFKA_TOPIC_NORM_SWAP_DEMAND: norm.swap_demand
KAFKA_TOPIC_CMD_EXECUTE_TRADE: cmd.execute_trade
KAFKA_TOPIC_EXEC_TRADE_RESULT: exec.trade_result
KAFKA_CONSUMER_GROUP_DUMMY: dummy-reactor-v1
KAFKA_CONSUMER_GROUP_EXECUTOR: dummy-executor-v1
EXECUTOR_STATE_DIR: /var/lib/unrip/executor-state
PROJECT_NAME: unrip
PROJECT_NAMESPACE: unrip
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: executor-state
namespace: unrip
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 5Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: near-intents-ingest
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: near-intents-ingest
template:
metadata:
labels:
app: near-intents-ingest
app.kubernetes.io/part-of: unrip
spec:
imagePullSecrets:
- name: unrip-registry-creds
containers:
- name: app
image: ghcr.io/example/unrip:bootstrap
imagePullPolicy: IfNotPresent
command: ["node", "src/apps/near-intents-ingest.mjs"]
envFrom:
- configMapRef:
name: unrip-config
- secretRef:
name: unrip-secrets
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dummy-reactor
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: dummy-reactor
template:
metadata:
labels:
app: dummy-reactor
app.kubernetes.io/part-of: unrip
spec:
imagePullSecrets:
- name: unrip-registry-creds
containers:
- name: app
image: ghcr.io/example/unrip:bootstrap
imagePullPolicy: IfNotPresent
command: ["node", "src/apps/dummy-reactor.mjs"]
envFrom:
- configMapRef:
name: unrip-config
- secretRef:
name: unrip-secrets
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dummy-executor
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: dummy-executor
template:
metadata:
labels:
app: dummy-executor
app.kubernetes.io/part-of: unrip
spec:
imagePullSecrets:
- name: unrip-registry-creds
containers:
- name: app
image: ghcr.io/example/unrip:bootstrap
imagePullPolicy: IfNotPresent
command: ["node", "src/apps/dummy-executor.mjs"]
envFrom:
- configMapRef:
name: unrip-config
- secretRef:
name: unrip-secrets
volumeMounts:
- name: executor-state
mountPath: /var/lib/unrip/executor-state
volumes:
- name: executor-state
persistentVolumeClaim:
claimName: executor-state
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dummy-consumer
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: dummy-consumer
template:
metadata:
labels:
app: dummy-consumer
app.kubernetes.io/part-of: unrip
spec:
imagePullSecrets:
- name: unrip-registry-creds
containers:
- name: app
image: ghcr.io/example/unrip:bootstrap
imagePullPolicy: IfNotPresent
command: ["node", "src/apps/dummy-consumer.mjs"]
envFrom:
- configMapRef:
name: unrip-config
- secretRef:
name: unrip-secrets

View file

@ -0,0 +1,29 @@
# Required Kubernetes secrets
Base manifests and the Hetzner single-node overlay both expect secrets to be supplied out-of-band. The Hetzner overlay generates `unrip/unrip-secrets`, `forgejo/forgejo-secrets`, and `registry/registry-secrets` from local files.
## Required secrets
- `unrip/unrip-secrets`
- `NEAR_INTENTS_API_KEY`
- `forgejo/forgejo-secrets`
- `root_url`
- `domain`
- `runner_registration_token`
- `registry/registry-secrets`
- `htpasswd`
## Overlay-driven generation
The `deploy/k8s/overlays/hetzner-single-node` overlay can generate these from local files via `secretGenerator`.
Example workflow:
```bash
cp deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env.example deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env.example deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd.example deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd
kubectl apply -k deploy/k8s/overlays/hetzner-single-node
```
For future projects, follow the same convention with project-specific secret names in project-specific namespaces.
Do not commit populated secret files.

View file

@ -0,0 +1,4 @@
raw.near_intents.quote
norm.swap_demand
cmd.execute_trade
exec.trade_result

View file

@ -0,0 +1,105 @@
Status: partially successful, not fully healthy yet.
What worked
- Hetzner VM provisioned
- k3s installed and running
- node is `Ready`
- namespaces created
- Forgejo is up
- registry is up
- Redpanda is up
- `near-intents-ingest` is up
What is still broken
- `dummy-reactor`, `dummy-executor`, `dummy-consumer` are failing because Kafka/Redpanda topic metadata is not healthy yet:
- `This server does not host this topic-partition`
- ingress-nginx is crashing
- cert-manager webhook/cainjector are crashing
- so public HTTPS ingress is not ready
- therefore Git/registry/CI are not yet usable via domain names
So the honest report is:
- cluster bootstrap succeeded
- platform/app stack is only partially healthy
- we still need another fix pass before calling this “working”
How to interact with it right now
1. Use kubectl
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl -n unrip get pods
kubectl -n forgejo get pods,svc
kubectl -n registry get pods,svc
```
2. Access Forgejo right now
Since ingress is broken, use port-forward:
```bash
kubectl -n forgejo port-forward svc/forgejo 3000:3000
```
Then open:
```text
http://127.0.0.1:3000
```
3. Access the registry right now
Also via port-forward:
```bash
kubectl -n registry port-forward svc/registry 5000:5000
```
Then from your machine:
```bash
docker login 127.0.0.1:5000 -u unrip
```
And push/pull like:
```bash
docker tag unrip:bootstrap 127.0.0.1:5000/unrip:test
docker push 127.0.0.1:5000/unrip:test
```
4. Watch logs
```bash
kubectl -n unrip logs deploy/near-intents-ingest -f
kubectl -n unrip logs deploy/dummy-reactor -f
kubectl -n unrip logs deploy/dummy-executor -f
kubectl -n unrip logs deploy/dummy-consumer -f
kubectl -n forgejo logs deploy/forgejo -f
kubectl -n registry logs deploy/registry -f
```
How Git would work once Forgejo is usable
After port-forward or later ingress:
```bash
git remote add forgejo http://127.0.0.1:3000/<owner>/<repo>.git
git push forgejo main
```
How CI/CD is supposed to work
Intended flow:
1. code lives in Forgejo
2. Forgejo runner executes `.forgejo/workflows/deploy.yml`
3. workflow builds image
4. pushes image to registry
5. updates `unrip` deployments in Kubernetes
Current reality:
- not ready yet
- because ingress/cert-manager are unhealthy
- and we havent verified a full Forgejo runner deploy cycle
Bottom line
- Kubernetes cluster: yes
- server provisioning: yes
- basic platform pieces: partially
- usable Git/CI/CD stack: not yet
- unrip app pipeline: not yet
Most important next fixes
1. fix k3s manifest/platform issues:
- ingress-nginx RBAC/crash
- cert-manager install/CRDs/RBAC
2. fix Redpanda/topic metadata issue so reactor/executor/consumer run
3. only then wire Forgejo + registry + CI as usable

85
docs/contracts.md Normal file
View file

@ -0,0 +1,85 @@
# Event contracts
## Envelope
All bus messages use this envelope:
```json
{
"event_id": "string",
"event_type": "string",
"venue": "string",
"source": "string|null",
"schema_version": 1,
"observed_at": "ISO-8601|null",
"ingested_at": "ISO-8601",
"payload": {},
"raw": {}
}
```
## Topics
Current canonical topic set:
- `raw.near_intents.quote`
- `norm.swap_demand`
- `cmd.execute_trade`
- `exec.trade_result`
In Kubernetes bootstrap, Redpanda topic creation is currently handled by the repo-managed bootstrap job applied with the manifest set.
## `raw.near_intents.quote`
- `event_type`: `near_intents_quote_raw`
- `payload.message`: original venue-native payload
- `raw`: original venue-native payload
## `norm.swap_demand`
- `event_type`: `swap_demand`
- payload:
- `quote_id`
- `asset_in`
- `asset_out`
- `amount_in`
- `amount_out`
- `ttl_ms`
## `cmd.execute_trade`
- `event_type`: `execute_trade`
- payload:
- `command_id`
- `idempotency_key`
- `execution_key`
- `quote_id`
- `asset_in`
- `asset_out`
- `amount_in`
- `amount_out`
- `reason`
## `exec.trade_result`
- `event_type`: `trade_result`
- payload:
- `command_id`
- `idempotency_key`
- `execution_key`
- `quote_id`
- `status`
- `result_code`
- `note`
## Executor idempotency model
- `command_id` is unique per trade command and currently deterministic as `cmd-${quote_id}`
- `idempotency_key` is stable for semantic duplicate detection and currently `${venue}:${quote_id}`
- `execution_key` is the stable partition key and currently `${venue}:${asset_in}->${asset_out}`
- executor persists command state on durable storage before publishing a result
- already-completed `command_id`s are skipped on replay or restart
- if a command is seen again after a persisted `processing` state, the executor emits a recovered result path instead of blindly duplicating work
## Deployment and persistence implications
These contracts are tied to deployment behavior:
- executor duplicate suppression depends on durable persistence at `EXECUTOR_STATE_DIR`
- local Compose mounts that path for development/runtime testing
- the Hetzner single-node k3s path mounts persistent storage for the executor at `/var/lib/unrip/executor-state`
- in the current single-node target, that persistence is node-backed and should be treated as required operational state
Operational consequence:
- deleting the executor PVC or losing the node without migration discards idempotency history
- that can allow already-seen commands to be treated as new after recovery

View file

@ -0,0 +1,141 @@
# Hetzner + k3s + self-hosted Git/CI bootstrap
Goal: provision and deploy everything from this repo to a single Hetzner machine with no manual server login.
## Stack
- Terraform provisions the Hetzner Cloud VM, private network, and firewall
- cloud-init installs Tailscale first when configured, then installs k3s automatically
- Kubernetes manifests deploy:
- Redpanda
- trading system services
- private registry
- Forgejo
- ingress-nginx
- cert-manager
- ACME issuers
- local bootstrap script:
- runs Terraform
- optionally creates DNS records via Cloudflare or Porkbun
- writes overlay secrets/host patches from local env
- applies the Hetzner single-node k8s overlay
- builds the current app image locally
- fetches the real kubeconfig from the node
- imports the bootstrap image into k3s for the first rollout
## Files
- `infra/terraform/hetzner/`
- `deploy/k8s/base/`
- `deploy/k8s/overlays/hetzner-single-node/`
- `scripts/hetzner/bootstrap.sh`
- `scripts/hetzner/configure-cloudflare-dns.sh`
- `scripts/hetzner/destroy.sh`
- `scripts/k8s/logs.sh`
- `.forgejo/workflows/deploy.yml`
## Required local tools
- `terraform`
- `kubectl`
- `docker`
- `curl`
- `python3`
## Required local env
Start from:
```bash
cp scripts/hetzner/bootstrap-secrets.env.example scripts/hetzner/bootstrap-secrets.env
source scripts/hetzner/bootstrap-secrets.env
```
Required values:
- `HCLOUD_TOKEN`
- `SSH_PUBLIC_KEY_PATH`
- `PUBLIC_DOMAIN`
- `BASE_DOMAIN`
- recommended Tailscale values:
- `TAILSCALE_AUTH_KEY`
- `TAILSCALE_CONTROL_PLANE_HOSTNAME`
- `FORGEJO_DOMAIN`
- `FORGEJO_ROOT_URL`
- `REGISTRY_DOMAIN`
- `LETSENCRYPT_EMAIL`
- `REGISTRY_USERNAME`
- `REGISTRY_PASSWORD`
- `NEAR_INTENTS_API_KEY`
- `FORGEJO_RUNNER_REGISTRATION_TOKEN`
Optional for automatic DNS:
- Cloudflare:
- `CLOUDFLARE_API_TOKEN`
- `CLOUDFLARE_ZONE_ID`
- Porkbun:
- `PORKBUN_API_KEY`
- `PORKBUN_SECRET_API_KEY`
## Bootstrap
```bash
bash scripts/hetzner/bootstrap.sh
```
Outputs:
- Hetzner VM created
- Tailscale joined if configured
- k3s installed
- kubeconfig written to `.state/hetzner/kubeconfig.yaml`
- overlay secrets and ingress host patches rendered from local env
- namespaces, Redpanda, app deployments, Forgejo, registry, ingress, cert-manager, and issuers applied
- bootstrap image built and first rollout triggered
## Tailscale-first admin access
Recommended mode:
- public firewall exposes only `80/443`
- admin access uses Tailscale
- Kubernetes API uses the Tailscale hostname when `TAILSCALE_CONTROL_PLANE_HOSTNAME` is set
`TF_ADMIN_CIDR_BLOCKS` remains only as a fallback if you intentionally want public admin/API exposure.
## DNS and TLS
If DNS provider credentials are present, bootstrap updates:
- `${BASE_DOMAIN}`
- `git.${BASE_DOMAIN}`
- `registry.${BASE_DOMAIN}`
Supported scripted providers:
- Cloudflare
- Porkbun
TLS is handled in-cluster by cert-manager using Let's Encrypt issuers and the rendered ingress hosts.
## Observe the cluster
```bash
KUBECONFIG=.state/hetzner/kubeconfig.yaml kubectl get pods -A
bash scripts/k8s/logs.sh
```
## Self-hosted CI/CD handoff
After bootstrap:
1. open Forgejo at `https://${FORGEJO_DOMAIN}`
2. seed or mirror this repo into Forgejo
3. add Forgejo Actions secrets:
- `KUBECONFIG_B64`
- `REGISTRY_USERNAME`
- `REGISTRY_PASSWORD`
4. add Forgejo Actions variable:
- `REGISTRY_HOST=${REGISTRY_DOMAIN}`
5. push to `main`
The workflow then:
- builds the image
- pushes it to `https://${REGISTRY_DOMAIN}`
- updates the app deployments in `unrip`
- waits for rollout
## Destroy everything
```bash
bash scripts/hetzner/destroy.sh
```
## Current limitations
- Forgejo admin bootstrap and repo seeding are still operator-driven after the first cluster bootstrap.
- bootstrap and CI authentication paths should still be hardened before production use.
- routine deploys are intended to be registry-native through Forgejo Actions, but that still needs a real-world verification pass.

View file

@ -0,0 +1,108 @@
# Hetzner self-hosted CI/CD runbook
This is the operator runbook for the handoff from local bootstrap to self-hosted Forgejo-based deployment.
## Bootstrap prerequisites
From your workstation:
```bash
cp scripts/hetzner/bootstrap-secrets.env.example scripts/hetzner/bootstrap-secrets.env
source scripts/hetzner/bootstrap-secrets.env
bash scripts/hetzner/bootstrap.sh
```
After that you should have:
- `.state/hetzner/kubeconfig.yaml`
- Forgejo reachable at `https://${FORGEJO_DOMAIN}`
- Registry reachable at `https://${REGISTRY_DOMAIN}`
- private admin/control-plane access over Tailscale if configured
## Verify the cluster
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl -n forgejo get deploy,pods,svc,ingress
kubectl -n registry get deploy,pods,svc,ingress
kubectl -n unrip get deploy,pods
```
## Seed the repo into Forgejo
Create the target repo in Forgejo, then from your workstation:
```bash
git remote add forgejo https://${FORGEJO_DOMAIN}/<owner>/<repo>.git
git push forgejo main
```
## Configure Forgejo Actions secrets and variables
Create these repository secrets in Forgejo:
- `KUBECONFIG_B64`
- `REGISTRY_USERNAME`
- `REGISTRY_PASSWORD`
Create these repository variables:
- `REGISTRY_HOST=${REGISTRY_DOMAIN}`
- optional: `PROJECT_NAME=unrip`
- optional: `PROJECT_NAMESPACE=unrip`
- optional: `PROJECT_DEPLOYMENTS=near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer`
Generate `KUBECONFIG_B64` from the bootstrap kubeconfig:
```bash
base64 -w0 .state/hetzner/kubeconfig.yaml
```
## Workflow behavior
The workflow in `.forgejo/workflows/deploy.yml` now:
1. installs `buildah` and `kubectl` on the Forgejo runner
2. checks out the repo with the Forgejo job token
3. loads kubeconfig from `KUBECONFIG_B64`
4. logs into the private registry
5. builds `registry.<domain>/<project-name>:${GIT_SHA}` with `buildah`
6. pushes the image
7. updates each deployment listed in `PROJECT_DEPLOYMENTS` inside `PROJECT_NAMESPACE`
8. waits for rollout after each image update
Default behavior if you do not set project variables:
- `PROJECT_NAME=unrip`
- `PROJECT_NAMESPACE=unrip`
- `PROJECT_DEPLOYMENTS=near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer`
For a future project, reuse the same workflow by changing only the Forgejo repository variables instead of copying the workflow.
The first bootstrap deploy is different from routine CI:
- bootstrap fetches the real kubeconfig from the node and imports a local bootstrap image directly into k3s
- routine CI is intended to push versioned images to the private registry
## Trigger deploys
Push to `main` in Forgejo:
```bash
git push forgejo main
```
## Observe deploys
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl -n unrip rollout status deployment/near-intents-ingest --timeout=300s
kubectl -n unrip rollout status deployment/dummy-reactor --timeout=300s
kubectl -n unrip rollout status deployment/dummy-executor --timeout=300s
kubectl -n unrip rollout status deployment/dummy-consumer --timeout=300s
kubectl -n unrip get pods -o wide
kubectl get events -A --sort-by=.lastTimestamp | tail -n 50
```
## DNS and TLS
If DNS automation was enabled during bootstrap, A records for the base, Forgejo, and registry hosts are already managed from the repo-side bootstrap.
Currently supported DNS providers:
- Cloudflare
- Porkbun
TLS is issued by cert-manager using the rendered Let's Encrypt email and ingress hosts.
## Current limitations
- Forgejo admin bootstrap and repository creation are not yet API-automated.
- Forgejo repository secrets/variables still need to be populated before the first real deploy run.
- The runner currently uses host-mode jobs and installs `buildah`/`kubectl` at job start, which is functional but not yet optimized.

View file

@ -0,0 +1,383 @@
# Trading System Architecture Notes for Next Session
## Objective
Build the first real version of the trading system as an event-driven, multi-service architecture.
Current implemented seed:
- NEAR Intents ingest in Node.js
- Kafka-compatible bus usage via `kafkajs`
- dummy reactor / executor / result consumer loop
Next session should continue from this architecture, not revert to a monolith, local-only script, or TUI.
---
## Core Architecture
All components are independent services.
They communicate only through a central Kafka-compatible bus (Redpanda first, Kafka-compatible by design).
### Service classes
- venue ingestors
- normalizers
- reactors / decision engines
- executors
- downstream consumers / monitors / archivers / replay tools
### Service communication rule
No direct service-to-service calls for core trading flow.
Use bus topics only.
---
## Venue-Oriented Structure
The system should be organized by venue.
Each venue can have different:
- ingest/feed mechanics
- normalization logic
- execution mechanics
### Per-venue responsibilities
- `ingest` = venue-native intake
- `normalize` = convert venue-native payload into canonical internal event
- `execute` = venue-specific action logic
Planned shape:
```text
src/
apps/
bus/
core/
venues/
near-intents/
ingest
normalize
execute
```
---
## Bus Choice
Use **Redpanda** first, but stay fully **Kafka-compatible**.
### Reason
Requirements:
- high throughput
- low latency
- retention
- replay
- multiple producers/consumers
- independent services
- future scale-out
- multi-language compatibility
### Constraint
Do not use broker-specific features that make migration to Kafka difficult.
Use standard Kafka clients and semantics.
---
## Data Model Principles
Kafka/Redpanda is the operational event backbone.
### Event model rules
- append-only
- immutable events
- versioned schemas
- raw and normalized events both preserved
### Every event should include
- `event_id`
- `event_type`
- `venue`
- `observed_at` / `ingested_at`
- `schema_version`
- `payload`
- optionally raw/original payload where appropriate
### Raw vs normalized
Keep both.
- raw topics = exact venue-native source truth
- normalized topics = canonical research/trading inputs
This is required for:
- replay
- debugging
- future backtesting
- future Spark/batch processing
---
## Current/Planned Topic Flow
Minimal 3-stage pipeline:
1. ingest publishes normalized demand
2. reactor publishes trade command
3. executor publishes trade result
### Topic classes
- `raw.*` = raw venue-native events
- `norm.*` = canonical normalized market events
- `cmd.*` = execution commands
- `exec.*` = execution outcomes
- later `signal.*` if needed for reactor outputs before command stage
### Current minimal topics
- `norm.swap_demand`
- `cmd.execute_trade`
- `exec.trade_result`
### NEAR Intents
NEAR Intents source currently feeds quote-demand style events from solver-bus websocket.
This is a venue ingest source, not the whole trading system.
---
## Execution Safety / Zero Downtime Requirements
This is critical.
### Constraint
Multiple executors must never duplicate the same trade/action during deploys, restarts, or rebalances.
### Must-have rules
1. Every execution command must carry a unique `command_id`
2. Commands must include deterministic idempotency information
3. Executors must be idempotent
4. Executors must belong to a consumer group per executor role
5. Commands should be partitioned by a stable execution key where ordering matters
6. Executor state must be persisted durably enough to detect duplicate command execution
### Kafka consumer groups are not sufficient alone
They help assign work, but they do not guarantee no duplicate processing under restart/rebalance conditions.
Idempotency is still required.
### Rolling updates / zero downtime
Executors must support:
- graceful shutdown
- stop taking new work before exit
- finish or safely recover in-flight work
- commit offsets only after safe execution state transition
### Persistence implication
Executor idempotency state is not optional metadata.
It is operational state that must survive pod restarts.
Current single-node k3s direction:
- executor state lives at `/var/lib/unrip/executor-state`
- Kubernetes mounts that path through persistent storage
- the Hetzner single-node overlay currently targets k3s `local-path` storage
- node loss without storage migration means duplicate-suppression history is lost
---
## Deployment Target
### First deployment phase
- single machine on Hetzner
- but still multiple independent services
- no architecture shortcuts that prevent future clustering
### Future target
- split across multiple machines
- cluster capable
- fault tolerant
- multi-node
- zero-downtime deploys
### Deployment rules from day 1
- every component is a separate container/service
- all config via env/config files
- communication over network/bus only
- persistent components use mounted volumes/PVCs
- no manual SSH-based operational workflow
---
## Infrastructure / Ops Direction
Target environment:
- Hetzner
- self-hosted CI/CD
- provisioning by code
- no GitHub dependency
### Desired stack direction
- Terraform for Hetzner provisioning
- Kubernetes-oriented target from the start
- self-hosted Git + CI/CD
- Kafka-compatible broker
- object storage later for long-term archived event history
### Single-node first, future cluster later
The first version may run on one machine, but deployment structure should already match a future distributed system.
### Current canonical operator path
The repo now documents and partially implements this path as the primary deployment workflow:
#### Phase 0: workstation bootstrap
1. A local operator workstation prepares bootstrap secrets in `scripts/hetzner/bootstrap-secrets.env`.
2. The operator runs `bash scripts/hetzner/bootstrap.sh`.
3. Terraform provisions the server, firewall, network, and cloud-init user-data.
4. cloud-init installs k3s automatically and prepares persistence directories plus bootstrap artifacts.
5. The workstation waits for the public k3s API endpoint to report ready.
6. The workstation writes `.state/hetzner/kubeconfig.yaml`.
7. The workstation injects initial Kubernetes Secrets for app and Forgejo bootstrap.
8. The workstation applies repo-managed Kubernetes manifests under `deploy/k8s/`.
9. The workstation performs the first image/bootstrap delivery attempt for the app workloads.
10. The workstation verifies rollout status.
#### Phase 1: self-hosted handoff
1. Forgejo becomes reachable in-cluster.
2. The operator completes initial Forgejo admin/repo setup.
3. This repo is pushed or mirrored into Forgejo.
4. The Forgejo runner becomes the routine app deployment mechanism.
5. Terraform remains the infra mutation entrypoint unless further automated later.
### Failure-recovery expectation
The bootstrap path must be rerunnable from the workstation.
Docs should keep treating recovery as:
- fix local secrets/inputs
- rerun the bootstrap script
- inspect the cluster with the generated kubeconfig
- destroy/recreate infra with `scripts/hetzner/destroy.sh` only when required
### Current repo-state caveats
The direction is clear, but the implementation is still mid-transition:
- the bootstrap script currently applies `deploy/k8s/base` directly rather than the Hetzner overlay
- kubeconfig/auth handling is not yet fully production-hardened
- first image delivery is still a bootstrap workaround rather than a final registry-native CI path
- Forgejo admin bootstrap, repo creation, and Actions configuration still require operator steps
- local Compose remains in the repo for development/testing, not as the canonical production path
### Minimal repo layout target
```text
deploy/
hetzner/
README.md
k8s/
base/
overlays/
hetzner-single-node/
infra/
terraform/
hetzner/
```
Guidelines:
- `infra/terraform/hetzner/` owns VM, firewall, networking, and cloud-init rendering
- `deploy/k8s/` owns Kubernetes-native manifests and overlays
- app runtime manifests should remain Kubernetes-native so they can later move from single-node k3s to a larger cluster with minimal rewrite
- secret material must not live in git in plaintext; bootstrap docs should describe workstation-driven injection or generated secret references
---
## Local Development / Testing Direction
Do not assume manual multi-terminal operation long term.
### Requirement
Need an orchestrated local/dev runtime.
### Local dev should preserve real boundaries
- separate services
- broker present
- env/config driven
- same event flow as production
### Current local/dev answer
Compose is still acceptable for:
- developer laptops
- fast local iteration
- debugging event flow
- validating container boundaries before Kubernetes rollout
But Compose should remain explicitly secondary to the repo-driven Hetzner + k3s path for production operations.
### Testing layers
1. unit tests for normalizers / schema logic / helpers
2. integration tests against Kafka-compatible broker
3. replay/simulation tests using retained event streams
---
## Spark Readiness
Do not add Spark now.
But keep the system Spark-compatible later by:
- preserving raw events
- preserving normalized events
- using immutable append-only event streams
- versioning schemas
- separating operational event log from future analytical processing
Spark later would be for:
- large-scale backtesting
- feature generation
- archive processing
- multi-venue analytics
---
## Immediate Next Engineering Tasks
Next session should focus on the following.
### 1. Clean current repo structure
Remove duplicate/legacy paths and keep one canonical structure only.
### 2. Keep/complete the 3-stage loop
- NEAR Intents ingest -> `norm.swap_demand`
- dummy reactor -> `cmd.execute_trade`
- dummy executor -> `exec.trade_result`
- downstream result consumer
### 3. Define canonical schemas
Define concrete event schemas for:
- normalized swap demand
- execute trade command
- trade result
### 4. Define executor idempotency model
Specify:
- `command_id`
- idempotency key rules
- execution state transition rules
- duplicate handling rules
### 5. Move toward production-shaped deployment
Design for:
- one service per container
- single-node deployment first
- future multi-node split without app rewrite
### 6. Harden provisioning/deployment path
Next infra work should continue improving:
- Hetzner provisioning by code
- workstation bootstrap rerunnability
- self-hosted CI/CD handoff
- registry-native image delivery
- overlay convergence for the Hetzner single-node target
Status update:
- minimal Terraform exists under `infra/terraform/hetzner`
- first boot is cloud-init driven and installs k3s automatically
- bootstrap now starts from a local operator workstation rather than manual host login
- Kubernetes assets exist under `deploy/k8s`
- executor persistence boundaries are explicit for single-node k3s
- self-hosted CI handoff is documented, but still requires follow-up hardening
---
## Non-Goals for Next Session
- no dashboards
- no UI/TUI
- no monolith convenience architecture
- no SQLite-first system of record
- no direct coupling between ingest, decision, and execution
- no temporary local-only shortcuts that block future cluster deployment
---
## Guiding Principle
Build the single-node first version as if it is already a distributed system:
- separate services
- durable event bus
- replayable events
- explicit contracts
- idempotent execution
- production-compatible deployment boundaries
- bootstrapable from scratch without manual SSH-based host setup

View file

@ -0,0 +1,59 @@
#cloud-config
package_update: true
package_upgrade: true
packages:
- curl
- git
- ca-certificates
- jq
- bash
- apt-transport-https
write_files:
- path: /etc/sysctl.d/90-unrip.conf
permissions: '0644'
content: |
vm.max_map_count = 262144
- path: /usr/local/bin/bootstrap-unrip.sh
permissions: '0755'
content: |
#!/usr/bin/env bash
set -euo pipefail
install -d -m 0755 /opt/unrip
if [ ! -d /opt/unrip/repo/.git ]; then
git clone --branch ${bootstrap_repo_branch} ${bootstrap_repo_url} /opt/unrip/repo
else
git -C /opt/unrip/repo fetch --all --prune
git -C /opt/unrip/repo checkout ${bootstrap_repo_branch}
git -C /opt/unrip/repo pull --ff-only origin ${bootstrap_repo_branch}
fi
install -d -m 0755 /opt/unrip/bootstrap
cat >/opt/unrip/bootstrap/README.txt <<'EOF'
This node was provisioned by Terraform + cloud-init.
Future Kubernetes bootstrap assets should live in:
/opt/unrip/repo/${bootstrap_repo_path}
EOF
- path: /etc/rancher/k3s/config.yaml
permissions: '0644'
content: |
write-kubeconfig-mode: "0644"
node-name: ${node_name}
tls-san:
- ${public_domain}
%{ if tailscale_control_plane_hostname != "" ~}
- ${tailscale_control_plane_hostname}
%{ endif ~}
node-ip: ${private_ipv4_address}
advertise-address: ${private_ipv4_address}
disable:
- servicelb
runcmd:
- sysctl --system
%{ if tailscale_enabled && tailscale_auth_key != "" ~}
- curl -fsSL https://tailscale.com/install.sh | sh
- tailscale up --auth-key=${tailscale_auth_key} --ssh --hostname=${node_name}
%{ endif ~}
- curl -sfL https://get.k3s.io | INSTALL_K3S_CHANNEL=${k3s_channel} sh -s - server
- /usr/local/bin/bootstrap-unrip.sh
final_message: "k3s bootstrap finished for ${node_name}"

View file

@ -0,0 +1,48 @@
resource "hcloud_ssh_key" "automation" {
name = "${var.name}-automation"
public_key = var.ssh_public_key
}
resource "hcloud_network" "trading_system" {
name = "${var.name}-network"
ip_range = var.network_cidr
}
resource "hcloud_network_subnet" "trading_system" {
network_id = hcloud_network.trading_system.id
type = "cloud"
network_zone = var.network_zone
ip_range = var.subnet_cidr
}
resource "hcloud_server" "trading_system" {
name = var.name
image = var.image
server_type = var.server_type
location = var.location
ssh_keys = [hcloud_ssh_key.automation.id]
firewall_ids = [hcloud_firewall.trading_system.id]
public_net {
ipv4_enabled = true
ipv6_enabled = true
}
network {
network_id = hcloud_network.trading_system.id
ip = var.private_ipv4_address
}
user_data = templatefile("${path.module}/cloud-init.yaml.tftpl", {
k3s_channel = var.k3s_channel
node_name = var.name
private_ipv4_address = var.private_ipv4_address
public_domain = var.public_domain
bootstrap_repo_url = var.bootstrap_repo_url
bootstrap_repo_branch = var.bootstrap_repo_branch
bootstrap_repo_path = var.bootstrap_repo_path
tailscale_enabled = var.tailscale_enabled
tailscale_auth_key = var.tailscale_auth_key
tailscale_control_plane_hostname = var.tailscale_control_plane_hostname
})
}

View file

@ -0,0 +1,44 @@
resource "hcloud_firewall" "trading_system" {
name = "${var.name}-firewall"
dynamic "rule" {
for_each = length(var.admin_cidr_blocks) > 0 ? [22] : []
content {
direction = "in"
protocol = "tcp"
port = tostring(rule.value)
source_ips = var.admin_cidr_blocks
}
}
rule {
direction = "in"
protocol = "tcp"
port = "80"
source_ips = ["0.0.0.0/0", "::/0"]
}
rule {
direction = "in"
protocol = "tcp"
port = "443"
source_ips = ["0.0.0.0/0", "::/0"]
}
dynamic "rule" {
for_each = length(var.admin_cidr_blocks) > 0 ? [6443] : []
content {
direction = "in"
protocol = "tcp"
port = tostring(rule.value)
source_ips = var.admin_cidr_blocks
}
}
rule {
direction = "in"
protocol = "icmp"
source_ips = ["0.0.0.0/0", "::/0"]
destination_ips = []
}
}

View file

@ -0,0 +1,35 @@
output "server_ipv4" {
value = hcloud_server.trading_system.ipv4_address
}
output "server_ipv6" {
value = hcloud_server.trading_system.ipv6_address
}
output "server_name" {
value = hcloud_server.trading_system.name
}
output "server_private_ipv4" {
value = var.private_ipv4_address
}
output "server_fqdn" {
value = var.public_domain
}
output "k3s_api_url" {
value = var.tailscale_control_plane_hostname != "" ? "https://${var.tailscale_control_plane_hostname}:6443" : "https://${hcloud_server.trading_system.ipv4_address}:6443"
}
output "kubeconfig_strategy" {
value = var.tailscale_enabled ? "Use Tailscale for private Kubernetes API access; avoid public SSH/Kubernetes exposure in the canonical flow." : "Use the public Kubernetes API endpoint with an operator-supplied bootstrap credential; avoid SSH/scp kubeconfig retrieval in the canonical flow."
}
output "bootstrap_repo_checkout" {
value = "/opt/unrip/repo"
}
output "bootstrap_marker_file" {
value = "/opt/unrip/bootstrap/README.txt"
}

View file

@ -0,0 +1,14 @@
terraform {
required_version = ">= 1.6.0"
required_providers {
hcloud = {
source = "hetznercloud/hcloud"
version = "~> 1.49"
}
}
}
provider "hcloud" {
token = var.hcloud_token
}

View file

@ -0,0 +1,111 @@
variable "hcloud_token" {
description = "Hetzner Cloud API token"
type = string
sensitive = true
}
variable "name" {
description = "Server name"
type = string
default = "unrip-1"
}
variable "location" {
description = "Hetzner location"
type = string
default = "nbg1"
}
variable "server_type" {
description = "Hetzner server type"
type = string
default = "cpx32"
}
variable "image" {
description = "Hetzner image"
type = string
default = "ubuntu-24.04"
}
variable "ssh_public_key" {
description = "Public SSH key content used for automation access"
type = string
}
variable "admin_cidr_blocks" {
description = "CIDR blocks allowed to access SSH and K8s API when public admin access is enabled"
type = list(string)
default = []
}
variable "tailscale_enabled" {
description = "Install and use Tailscale for admin/control-plane access"
type = bool
default = true
}
variable "tailscale_auth_key" {
description = "Tailscale auth key used for unattended node join"
type = string
sensitive = true
default = ""
}
variable "tailscale_control_plane_hostname" {
description = "Expected Tailscale DNS name for the node; if set, bootstrap prefers it for kube access"
type = string
default = ""
}
variable "k3s_channel" {
description = "K3s release channel"
type = string
default = "stable"
}
variable "network_cidr" {
description = "Private network CIDR for the single-node cluster"
type = string
default = "10.30.0.0/16"
}
variable "subnet_cidr" {
description = "Private subnet CIDR for the server attachment"
type = string
default = "10.30.1.0/24"
}
variable "network_zone" {
description = "Hetzner network zone"
type = string
default = "eu-central"
}
variable "private_ipv4_address" {
description = "Static private IPv4 assigned to the node on the Hetzner network"
type = string
default = "10.30.1.10"
}
variable "public_domain" {
description = "Public DNS name pointing at the server IPv4/IPv6; used for ingress/TLS"
type = string
}
variable "bootstrap_repo_url" {
description = "Git repository URL cloned onto the node for GitOps/bootstrap assets"
type = string
}
variable "bootstrap_repo_branch" {
description = "Branch checked out for the bootstrap repository"
type = string
default = "main"
}
variable "bootstrap_repo_path" {
description = "Repository subdirectory expected to contain future Kubernetes bootstrap manifests/scripts"
type = string
default = "deploy/k8s"
}

View file

@ -5,6 +5,8 @@
"type": "module",
"scripts": {
"near-intents:ingest": "node src/apps/near-intents-ingest.mjs",
"dummy-reactor": "node src/apps/dummy-reactor.mjs",
"dummy-executor": "node src/apps/dummy-executor.mjs",
"dummy-consumer": "node src/apps/dummy-consumer.mjs",
"start": "node index.mjs"
},

View file

@ -0,0 +1,48 @@
# Copy this file to scripts/hetzner/bootstrap-secrets.env and fill in the values.
# Then run: source scripts/hetzner/bootstrap-secrets.env
export HCLOUD_TOKEN=replace_me
export SSH_PUBLIC_KEY_PATH="$HOME/.ssh/id_ed25519.pub"
# Optional project override. Defaults target the built-in unrip project overlay.
export PROJECT_NAME=unrip
export PROJECT_NAMESPACE=unrip
# export PROJECT_OVERLAY_DIR="$PWD/deploy/k8s/overlays/hetzner-single-node"
# export PROJECT_KUSTOMIZE_PATH="../../projects/unrip/base"
# export PROJECT_SECRET_NAME=unrip-secrets
# export PROJECT_SECRET_ENV_BASENAME=unrip.env
# export PROJECT_REGISTRY_SECRET_NAME=unrip-registry-creds
# export PROJECT_IMAGE_REPOSITORY=unrip
# export PROJECT_DEPLOYMENTS="near-intents-ingest dummy-reactor dummy-executor dummy-consumer"
# Tailscale-first admin access (recommended)
export TAILSCALE_AUTH_KEY=
# optional override; leave empty to auto-discover the node via local `tailscale status --json`
export TAILSCALE_CONTROL_PLANE_HOSTNAME=
# Optional fallback if you want public admin ports instead of Tailscale
export TF_ADMIN_CIDR_BLOCKS='[]'
# Public naming for ingress/TLS
export PUBLIC_DOMAIN=unrip-bootstrap.example.com
export BASE_DOMAIN=example.com
export FORGEJO_DOMAIN=git.example.com
export FORGEJO_ROOT_URL=https://git.example.com/
export REGISTRY_DOMAIN=registry.example.com
export LETSENCRYPT_EMAIL=ops@example.com
# Optional DNS automation: choose one provider
# Cloudflare
export CLOUDFLARE_API_TOKEN=
export CLOUDFLARE_ZONE_ID=
# Porkbun
export PORKBUN_API_KEY=
export PORKBUN_SECRET_API_KEY=
# Registry auth for CI/CD and image pulls
export REGISTRY_USERNAME=unrip
export REGISTRY_PASSWORD=replace_me
# Application and bootstrap secrets
export NEAR_INTENTS_API_KEY=replace_me
export FORGEJO_RUNNER_REGISTRATION_TOKEN=replace_me

302
scripts/hetzner/bootstrap.sh Executable file
View file

@ -0,0 +1,302 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR=$(cd "$(dirname "$0")/../.." && pwd)
TF_DIR="$ROOT_DIR/infra/terraform/hetzner"
STATE_DIR="$ROOT_DIR/.state/hetzner"
KUBECONFIG_PATH="$STATE_DIR/kubeconfig.yaml"
OVERLAY_DIR="$ROOT_DIR/deploy/k8s/overlays/hetzner-single-node"
DEFAULT_PROJECT_NAME="unrip"
DEFAULT_PROJECT_NAMESPACE="$DEFAULT_PROJECT_NAME"
mkdir -p "$STATE_DIR"
require() {
command -v "$1" >/dev/null 2>&1 || { echo "missing command: $1" >&2; exit 1; }
}
wait_for_url() {
local url="$1"
local label="$2"
local max_attempts="${3:-120}"
local sleep_seconds="${4:-5}"
local attempt=1
until curl -kfsS "$url" >/dev/null 2>&1; do
if (( attempt >= max_attempts )); then
echo "timed out waiting for ${label}: ${url}" >&2
return 1
fi
if (( attempt == 1 || attempt % 6 == 0 )); then
echo "waiting for ${label} (${attempt}/${max_attempts})..."
fi
sleep "$sleep_seconds"
attempt=$((attempt + 1))
done
}
wait_for_ssh() {
local target="$1"
local max_attempts="${2:-120}"
local sleep_seconds="${3:-5}"
local attempt=1
until ssh -i "$SSH_PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 "$target" 'echo ssh-ready' >/dev/null 2>&1; do
if (( attempt >= max_attempts )); then
echo "timed out waiting for ssh: ${target}" >&2
return 1
fi
if (( attempt == 1 || attempt % 6 == 0 )); then
echo "waiting for ssh (${attempt}/${max_attempts})..."
fi
sleep "$sleep_seconds"
attempt=$((attempt + 1))
done
}
wait_for_tailscale_node() {
local host_name="$1"
local max_attempts="${2:-120}"
local sleep_seconds="${3:-5}"
local attempt=1
command -v tailscale >/dev/null 2>&1 || {
echo "tailscale CLI is required locally for tailscale-first bootstrap" >&2
return 1
}
while true; do
local discovered
discovered=$(tailscale status --json 2>/dev/null | python3 - "$host_name" <<'PY'
import json,sys
host=sys.argv[1]
try:
data=json.load(sys.stdin)
except Exception:
print("")
raise SystemExit(0)
peers=data.get('Peer',{})
matches=[]
for peer in peers.values():
if peer.get('HostName') == host:
matches.append(peer)
for peer in sorted(matches, key=lambda p: ((p.get('Online') is True), p.get('DNSName') or ''), reverse=True):
if peer.get('Online'):
dns=(peer.get('DNSName') or '').rstrip('.')
if dns:
print(dns)
raise SystemExit(0)
for peer in sorted(matches, key=lambda p: p.get('DNSName') or '', reverse=True):
if peer.get('TailscaleIPs'):
print(peer['TailscaleIPs'][0])
raise SystemExit(0)
print("")
PY
)
if [[ -n "$discovered" ]]; then
printf '%s\n' "$discovered"
return 0
fi
if (( attempt >= max_attempts )); then
echo "timed out waiting for tailscale node: ${host_name}" >&2
return 1
fi
if (( attempt == 1 || attempt % 6 == 0 )); then
echo "waiting for tailscale node ${host_name} (${attempt}/${max_attempts})..." >&2
fi
sleep "$sleep_seconds"
attempt=$((attempt + 1))
done
}
require terraform
require kubectl
require docker
require curl
require python3
require ssh
require realpath
: "${HCLOUD_TOKEN:?set HCLOUD_TOKEN}"
: "${SSH_PUBLIC_KEY_PATH:?set SSH_PUBLIC_KEY_PATH}"
: "${PUBLIC_DOMAIN:?set PUBLIC_DOMAIN}"
: "${LETSENCRYPT_EMAIL:?set LETSENCRYPT_EMAIL}"
: "${TAILSCALE_AUTH_KEY:=}"
: "${TAILSCALE_CONTROL_PLANE_HOSTNAME:=}"
: "${NEAR_INTENTS_API_KEY:?set NEAR_INTENTS_API_KEY}"
: "${BASE_DOMAIN:?set BASE_DOMAIN}"
: "${FORGEJO_DOMAIN:=git.${BASE_DOMAIN}}"
: "${FORGEJO_ROOT_URL:=https://${FORGEJO_DOMAIN}/}"
: "${REGISTRY_DOMAIN:=registry.${BASE_DOMAIN}}"
: "${REGISTRY_USERNAME:?set REGISTRY_USERNAME}"
: "${REGISTRY_PASSWORD:?set REGISTRY_PASSWORD}"
: "${FORGEJO_RUNNER_REGISTRATION_TOKEN:?set FORGEJO_RUNNER_REGISTRATION_TOKEN}"
: "${TF_ADMIN_CIDR_BLOCKS:=}"
: "${PROJECT_NAME:=$DEFAULT_PROJECT_NAME}"
: "${PROJECT_NAMESPACE:=$DEFAULT_PROJECT_NAMESPACE}"
: "${PROJECT_OVERLAY_DIR:=$OVERLAY_DIR}"
: "${BOOTSTRAP_NODE_NAME:=unrip-1}"
: "${SKIP_TERRAFORM_APPLY:=0}"
: "${PROJECT_KUSTOMIZE_PATH:=../../projects/${PROJECT_NAME}/base}"
: "${PROJECT_SECRET_NAME:=${PROJECT_NAME}-secrets}"
: "${PROJECT_SECRET_ENV_BASENAME:=${PROJECT_NAME}.env}"
: "${PROJECT_REGISTRY_SECRET_NAME:=${PROJECT_NAME}-registry-creds}"
: "${PROJECT_IMAGE_REPOSITORY:=${PROJECT_NAME}}"
: "${PROJECT_DEPLOYMENTS:=near-intents-ingest dummy-reactor dummy-executor dummy-consumer}"
BOOTSTRAP_IMAGE="${PROJECT_IMAGE_REPOSITORY}:bootstrap"
PROJECT_SECRET_ENV_PATH="$PROJECT_OVERLAY_DIR/secrets/$PROJECT_SECRET_ENV_BASENAME"
GENERATED_OVERLAY_DIR="$STATE_DIR/generated-overlay"
GENERATED_OVERLAY_KUSTOMIZATION="$GENERATED_OVERLAY_DIR/kustomization.yaml"
SSH_PUBLIC_KEY=$(cat "$SSH_PUBLIC_KEY_PATH")
SSH_PRIVATE_KEY_PATH="${SSH_PUBLIC_KEY_PATH%.pub}"
if [[ ! -f "$SSH_PRIVATE_KEY_PATH" ]]; then
echo "missing ssh private key for bootstrap: $SSH_PRIVATE_KEY_PATH" >&2
exit 1
fi
TF_VARS=(
-var "hcloud_token=$HCLOUD_TOKEN"
-var "ssh_public_key=$SSH_PUBLIC_KEY"
-var "public_domain=$PUBLIC_DOMAIN"
-var "bootstrap_repo_url=local-bootstrap"
-var "tailscale_auth_key=$TAILSCALE_AUTH_KEY"
-var "tailscale_control_plane_hostname=$TAILSCALE_CONTROL_PLANE_HOSTNAME"
)
if [[ -n "$TF_ADMIN_CIDR_BLOCKS" && "$TF_ADMIN_CIDR_BLOCKS" != '[]' ]]; then
TF_VARS+=(-var "admin_cidr_blocks=$TF_ADMIN_CIDR_BLOCKS")
fi
if [[ -n "$TAILSCALE_AUTH_KEY" ]]; then
bash "$ROOT_DIR/scripts/hetzner/print-tailscale-firewall-note.sh"
fi
terraform -chdir="$TF_DIR" init
if [[ "$SKIP_TERRAFORM_APPLY" != "1" ]]; then
terraform -chdir="$TF_DIR" apply -auto-approve "${TF_VARS[@]}"
fi
SERVER_IP=$(terraform -chdir="$TF_DIR" output -raw server_ipv4)
K3S_API_URL=$(terraform -chdir="$TF_DIR" output -raw k3s_api_url)
if [[ -n "$TAILSCALE_AUTH_KEY" ]]; then
DISCOVERED_TAILSCALE_HOST="${TAILSCALE_CONTROL_PLANE_HOSTNAME:-$(wait_for_tailscale_node "$BOOTSTRAP_NODE_NAME")}"
SSH_TARGET="root@${DISCOVERED_TAILSCALE_HOST}"
K3S_API_URL="https://${DISCOVERED_TAILSCALE_HOST}:6443"
else
SSH_TARGET="root@${SERVER_IP}"
fi
if [[ -n "${CLOUDFLARE_API_TOKEN:-}" && -n "${CLOUDFLARE_ZONE_ID:-}" ]]; then
if ! SERVER_IP="$SERVER_IP" BASE_DOMAIN="$BASE_DOMAIN" bash "$ROOT_DIR/scripts/hetzner/configure-cloudflare-dns.sh"; then
echo "warning: cloudflare DNS automation failed; continuing without automated DNS" >&2
fi
elif [[ -n "${PORKBUN_API_KEY:-}" && -n "${PORKBUN_SECRET_API_KEY:-}" ]]; then
if ! SERVER_IP="$SERVER_IP" BASE_DOMAIN="$BASE_DOMAIN" bash "$ROOT_DIR/scripts/hetzner/configure-porkbun-dns.sh"; then
echo "warning: porkbun DNS automation failed; continuing without automated DNS" >&2
fi
fi
wait_for_ssh "$SSH_TARGET"
echo "waiting for Kubernetes API on $K3S_API_URL..."
wait_for_url "${K3S_API_URL}/readyz" "k3s API readiness"
ssh -i "$SSH_PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "$SSH_TARGET" 'sudo cat /etc/rancher/k3s/k3s.yaml' \
| sed "s|https://127.0.0.1:6443|${K3S_API_URL}|" > "$KUBECONFIG_PATH"
export KUBECONFIG="$KUBECONFIG_PATH"
mkdir -p "$PROJECT_OVERLAY_DIR/secrets" "$GENERATED_OVERLAY_DIR"
cat > "$PROJECT_SECRET_ENV_PATH" <<EOF
NEAR_INTENTS_API_KEY=$NEAR_INTENTS_API_KEY
EOF
cat > "$PROJECT_OVERLAY_DIR/secrets/forgejo.env" <<EOF
root_url=$FORGEJO_ROOT_URL
domain=$FORGEJO_DOMAIN
runner_registration_token=$FORGEJO_RUNNER_REGISTRATION_TOKEN
EOF
python3 - <<PY
from pathlib import Path
root = Path("$PROJECT_OVERLAY_DIR")
generated_root = Path("$GENERATED_OVERLAY_DIR")
project_kustomize_path = "$PROJECT_KUSTOMIZE_PATH"
project_namespace = "$PROJECT_NAMESPACE"
project_secret_name = "$PROJECT_SECRET_NAME"
project_secret_env_basename = "$PROJECT_SECRET_ENV_BASENAME"
project_overlay_dir = Path("$PROJECT_OVERLAY_DIR").relative_to(Path("$ROOT_DIR"))
resources = [f"../../{project_overlay_dir}/../../platform/base"]
if project_kustomize_path:
resources.append(f"../../{project_overlay_dir}/{project_kustomize_path}")
generated_root.mkdir(parents=True, exist_ok=True)
(generated_root / "kustomization.yaml").write_text(
"""apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
"""
+ "".join(f" - {resource}\n" for resource in resources)
+ """patches:
- path: ingress-hosts.patch.yaml
- path: issuer-email.patch.yaml
- path: storage-class.patch.yaml
secretGenerator:
- name: {project_secret_name}
namespace: {project_namespace}
envs:
- ../../{project_overlay_dir}/secrets/{project_secret_env_basename}
- name: forgejo-secrets
namespace: forgejo
envs:
- ../../{project_overlay_dir}/secrets/forgejo.env
- name: registry-secrets
namespace: registry
files:
- htpasswd=../../{project_overlay_dir}/secrets/registry.htpasswd
generatorOptions:
disableNameSuffixHash: true
""".format(
project_secret_name=project_secret_name,
project_namespace=project_namespace,
project_overlay_dir=project_overlay_dir,
project_secret_env_basename=project_secret_env_basename,
)
)
(generated_root / "storage-class.patch.yaml").write_text((root / "storage-class.patch.yaml").read_text())
(generated_root / "issuer-email.patch.yaml").write_text(f'''apiVersion: cert-manager.io/v1\nkind: ClusterIssuer\nmetadata:\n name: letsencrypt-staging\nspec:\n acme:\n email: {"$LETSENCRYPT_EMAIL"}\n---\napiVersion: cert-manager.io/v1\nkind: ClusterIssuer\nmetadata:\n name: letsencrypt-production\nspec:\n acme:\n email: {"$LETSENCRYPT_EMAIL"}\n''')
(generated_root / "ingress-hosts.patch.yaml").write_text(f'''apiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: forgejo\n namespace: forgejo\nspec:\n tls:\n - hosts:\n - {"$FORGEJO_DOMAIN"}\n secretName: forgejo-tls\n rules:\n - host: {"$FORGEJO_DOMAIN"}\n---\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: registry\n namespace: registry\nspec:\n tls:\n - hosts:\n - {"$REGISTRY_DOMAIN"}\n secretName: registry-tls\n rules:\n - host: {"$REGISTRY_DOMAIN"}\n''')
PY
kubectl apply -f "$ROOT_DIR/deploy/k8s/platform/base/namespace.yaml"
kubectl create namespace "$PROJECT_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
kubectl -n registry create secret generic registry-secrets \
--from-file=htpasswd=<(docker run --rm --entrypoint htpasswd httpd:2 -Bbn "$REGISTRY_USERNAME" "$REGISTRY_PASSWORD") \
--dry-run=client -o yaml | kubectl apply -f -
kubectl -n "$PROJECT_NAMESPACE" create secret docker-registry "$PROJECT_REGISTRY_SECRET_NAME" \
--docker-server="$REGISTRY_DOMAIN" \
--docker-username="$REGISTRY_USERNAME" \
--docker-password="$REGISTRY_PASSWORD" \
--dry-run=client -o yaml | kubectl apply -f -
kubectl apply -k "$GENERATED_OVERLAY_DIR"
docker build -t "$BOOTSTRAP_IMAGE" "$ROOT_DIR"
docker save "$BOOTSTRAP_IMAGE" \
| ssh -i "$SSH_PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "$SSH_TARGET" 'sudo k3s ctr images import -'
for deployment in $PROJECT_DEPLOYMENTS; do
kubectl -n "$PROJECT_NAMESPACE" set image "deployment/${deployment}" app="$BOOTSTRAP_IMAGE"
done
for deployment in $PROJECT_DEPLOYMENTS; do
kubectl -n "$PROJECT_NAMESPACE" rollout status "deployment/${deployment}" --timeout=180s
done
echo "bootstrap complete"
echo "project_name=$PROJECT_NAME"
echo "project_namespace=$PROJECT_NAMESPACE"
echo "project_overlay_dir=$PROJECT_OVERLAY_DIR"
echo "server_ip=$SERVER_IP"
echo "ssh_target=$SSH_TARGET"
echo "k3s_api_url=$K3S_API_URL"
echo "kubeconfig=$KUBECONFIG_PATH"
echo "bootstrap_image=$BOOTSTRAP_IMAGE"
echo "forgejo_url=$FORGEJO_ROOT_URL"
echo "registry_url=https://$REGISTRY_DOMAIN"
echo "dns_provider=${CLOUDFLARE_API_TOKEN:+cloudflare}${PORKBUN_API_KEY:+porkbun}"

View file

@ -0,0 +1,40 @@
#!/usr/bin/env bash
set -euo pipefail
: "${CLOUDFLARE_API_TOKEN:?set CLOUDFLARE_API_TOKEN}"
: "${CLOUDFLARE_ZONE_ID:?set CLOUDFLARE_ZONE_ID}"
: "${BASE_DOMAIN:?set BASE_DOMAIN}"
: "${SERVER_IP:?set SERVER_IP}"
api() {
curl -fsS -X "$1" "https://api.cloudflare.com/client/v4$2" \
-H "Authorization: Bearer $CLOUDFLARE_API_TOKEN" \
-H 'Content-Type: application/json' \
${3:+--data "$3"}
}
upsert_record() {
local type="$1"
local name="$2"
local content="$3"
local proxied="${4:-false}"
local existing_id
existing_id=$(curl -fsS "https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records?type=$type&name=$name" \
-H "Authorization: Bearer $CLOUDFLARE_API_TOKEN" \
-H 'Content-Type: application/json' | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["result"][0]["id"] if d.get("result") else "")')
local payload
payload=$(printf '{"type":"%s","name":"%s","content":"%s","ttl":120,"proxied":%s}' "$type" "$name" "$content" "$proxied")
if [[ -n "$existing_id" ]]; then
api PUT "/zones/$CLOUDFLARE_ZONE_ID/dns_records/$existing_id" "$payload" >/dev/null
else
api POST "/zones/$CLOUDFLARE_ZONE_ID/dns_records" "$payload" >/dev/null
fi
}
upsert_record A "$BASE_DOMAIN" "$SERVER_IP" false
upsert_record A "git.$BASE_DOMAIN" "$SERVER_IP" false
upsert_record A "registry.$BASE_DOMAIN" "$SERVER_IP" false
echo "cloudflare dns updated for $BASE_DOMAIN, git.$BASE_DOMAIN, registry.$BASE_DOMAIN"

View file

@ -0,0 +1,71 @@
#!/usr/bin/env bash
set -euo pipefail
require() {
command -v "$1" >/dev/null 2>&1 || { echo "missing command: $1" >&2; exit 1; }
}
require curl
require python3
: "${PORKBUN_API_KEY:?set PORKBUN_API_KEY}"
: "${PORKBUN_SECRET_API_KEY:?set PORKBUN_SECRET_API_KEY}"
: "${BASE_DOMAIN:?set BASE_DOMAIN}"
: "${SERVER_IP:?set SERVER_IP}"
api_base="https://api.porkbun.com/api/json/v3"
root_name=""
git_name="git"
registry_name="registry"
payload() {
local content="$1"
printf '{"apikey":"%s","secretapikey":"%s","content":"%s","ttl":"600"}' \
"$PORKBUN_API_KEY" "$PORKBUN_SECRET_API_KEY" "$content"
}
list_records() {
curl -sSf "$api_base/dns/retrieve/$BASE_DOMAIN" \
-H 'Content-Type: application/json' \
--data "{\"apikey\":\"$PORKBUN_API_KEY\",\"secretapikey\":\"$PORKBUN_SECRET_API_KEY\"}"
}
upsert_a_record() {
local name="$1"
local fqdn="$BASE_DOMAIN"
[[ -n "$name" ]] && fqdn="$name.$BASE_DOMAIN"
local record_id
record_id=$(python3 - "$fqdn" "$(list_records)" <<'PY'
import json,sys
fqdn=sys.argv[1]
data=json.loads(sys.argv[2])
for rec in data.get('records', []):
if rec.get('type') == 'A' and rec.get('name') == fqdn:
print(rec.get('id',''))
break
PY
)
if [[ -n "$record_id" ]]; then
curl -fsS "$api_base/dns/edit/$BASE_DOMAIN/$record_id" \
-H 'Content-Type: application/json' \
--data "$(payload "$SERVER_IP")" >/dev/null
echo "updated A $fqdn -> $SERVER_IP"
else
local body
body=$(printf '{"apikey":"%s","secretapikey":"%s","name":"%s","type":"A","content":"%s","ttl":"600"}' \
"$PORKBUN_API_KEY" "$PORKBUN_SECRET_API_KEY" "$name" "$SERVER_IP")
curl -fsS "$api_base/dns/create/$BASE_DOMAIN" \
-H 'Content-Type: application/json' \
--data "$body" >/dev/null
echo "created A $fqdn -> $SERVER_IP"
fi
}
upsert_a_record "$root_name"
upsert_a_record "$git_name"
upsert_a_record "$registry_name"
echo "porkbun dns updated for $BASE_DOMAIN, git.$BASE_DOMAIN, registry.$BASE_DOMAIN"

28
scripts/hetzner/destroy.sh Executable file
View file

@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR=$(cd "$(dirname "$0")/../.." && pwd)
TF_DIR="$ROOT_DIR/infra/terraform/hetzner"
: "${HCLOUD_TOKEN:?set HCLOUD_TOKEN}"
: "${SSH_PUBLIC_KEY_PATH:?set SSH_PUBLIC_KEY_PATH}"
: "${PUBLIC_DOMAIN:=bootstrap.example.com}"
: "${TAILSCALE_AUTH_KEY:=}"
: "${TAILSCALE_CONTROL_PLANE_HOSTNAME:=}"
: "${TF_ADMIN_CIDR_BLOCKS:=}"
SSH_PUBLIC_KEY=$(cat "$SSH_PUBLIC_KEY_PATH")
TF_VARS=(
-var "hcloud_token=$HCLOUD_TOKEN"
-var "ssh_public_key=$SSH_PUBLIC_KEY"
-var "public_domain=$PUBLIC_DOMAIN"
-var "tailscale_auth_key=$TAILSCALE_AUTH_KEY"
-var "tailscale_control_plane_hostname=$TAILSCALE_CONTROL_PLANE_HOSTNAME"
)
if [[ -n "$TF_ADMIN_CIDR_BLOCKS" && "$TF_ADMIN_CIDR_BLOCKS" != '[]' ]]; then
TF_VARS+=( -var "admin_cidr_blocks=$TF_ADMIN_CIDR_BLOCKS" )
fi
terraform -chdir="$TF_DIR" init
terraform -chdir="$TF_DIR" destroy -auto-approve "${TF_VARS[@]}"

View file

@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail
cat <<'EOF'
Tailscale-first mode:
- public firewall should expose only 80/443
- SSH and Kubernetes API should be reached over Tailscale
- ensure your workstation is authenticated to the same tailnet before bootstrap continues
EOF

13
scripts/k8s/logs.sh Executable file
View file

@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -euo pipefail
KUBECONFIG_PATH=${KUBECONFIG:-$(pwd)/.state/hetzner/kubeconfig.yaml}
PROJECT_NAMESPACE=${PROJECT_NAMESPACE:-unrip}
export KUBECONFIG="$KUBECONFIG_PATH"
kubectl -n "$PROJECT_NAMESPACE" get pods
kubectl -n "$PROJECT_NAMESPACE" logs -l app=near-intents-ingest --tail=100 -f &
kubectl -n "$PROJECT_NAMESPACE" logs -l app=dummy-reactor --tail=100 -f &
kubectl -n "$PROJECT_NAMESPACE" logs -l app=dummy-executor --tail=100 -f &
kubectl -n "$PROJECT_NAMESPACE" logs -l app=dummy-consumer --tail=100 -f &
wait

View file

@ -2,25 +2,24 @@ import process from 'node:process';
import { createConsumer } from '../bus/kafka/consumer.mjs';
import { logStatus } from '../core/log.mjs';
import { parseEventMessage } from '../core/event-envelope.mjs';
import { assertTradeResult } from '../core/schemas.mjs';
import { loadConfig } from '../lib/config.mjs';
const config = loadConfig();
const consumer = await createConsumer({
groupId: config.kafkaConsumerGroupDummy,
groupId: `${config.kafkaConsumerGroupExecutor}-results-view`,
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
await consumer.subscribe({ topic: config.kafkaTopicNormSwapDemand, fromBeginning: false });
logStatus(
`dummy consumer subscribed to ${config.kafkaTopicNormSwapDemand} as ${config.kafkaConsumerGroupDummy}`,
);
await consumer.subscribe({ topic: config.kafkaTopicExecTradeResult, fromBeginning: false });
logStatus(`result consumer subscribed to ${config.kafkaTopicExecTradeResult}`);
process.on('SIGINT', async () => {
await consumer.disconnect();
process.exit(0);
});
process.on('SIGTERM', async () => {
await consumer.disconnect();
process.exit(0);
@ -29,18 +28,15 @@ process.on('SIGTERM', async () => {
await consumer.run({
eachMessage: async ({ message }) => {
if (!message.value) return;
let event;
try {
event = JSON.parse(message.value.toString());
event = parseEventMessage(message.value.toString());
} catch {
logStatus('dummy consumer received non-JSON message; skipping');
logStatus('result consumer received non-JSON message; skipping');
return;
}
const payload = event?.payload || {};
const pair = `${payload.assetIn || '?'} -> ${payload.assetOut || '?'}`;
const quoteId = payload.quoteId || event.eventId || '?';
console.log(`[dummy-reactor] observed ${pair} quote_id=${quoteId} | would decide later`);
assertTradeResult(event);
const payload = event.payload;
console.log(`[result] command_id=${payload.command_id} quote_id=${payload.quote_id} status=${payload.status} result_code=${payload.result_code || 'n/a'}`);
},
});

View file

@ -0,0 +1,93 @@
import process from 'node:process';
import { createConsumer } from '../bus/kafka/consumer.mjs';
import { createProducer } from '../bus/kafka/producer.mjs';
import { buildEventEnvelope, parseEventMessage } from '../core/event-envelope.mjs';
import { createExecutorStateStore } from '../core/executor-state-store.mjs';
import { logStatus } from '../core/log.mjs';
import { assertExecuteTradeCommand, assertTradeResult } from '../core/schemas.mjs';
import { loadConfig } from '../lib/config.mjs';
const config = loadConfig();
const consumer = await createConsumer({
groupId: config.kafkaConsumerGroupExecutor,
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
const producer = await createProducer({
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
const stateStore = createExecutorStateStore({ stateDir: config.executorStateDir });
await consumer.subscribe({ topic: config.kafkaTopicCmdExecuteTrade, fromBeginning: false });
logStatus(`dummy executor subscribed to ${config.kafkaTopicCmdExecuteTrade} as ${config.kafkaConsumerGroupExecutor}`);
logStatus(`dummy executor will publish results to ${config.kafkaTopicExecTradeResult}; state_dir=${config.executorStateDir}`);
async function shutdown() {
await consumer.disconnect();
await producer.disconnect();
process.exit(0);
}
process.on('SIGINT', shutdown);
process.on('SIGTERM', shutdown);
await consumer.run({
eachMessage: async ({ message }) => {
if (!message.value) return;
let event;
try {
event = parseEventMessage(message.value.toString());
} catch {
logStatus('dummy executor received non-JSON message; skipping');
return;
}
assertExecuteTradeCommand(event);
const payload = event.payload;
const commandId = payload.command_id;
const existing = stateStore.get(commandId);
if (existing?.status === 'completed') {
logStatus(`dummy executor skipping duplicate command_id=${commandId}`);
return;
}
stateStore.markProcessing(commandId, {
idempotency_key: payload.idempotency_key,
execution_key: payload.execution_key,
quote_id: payload.quote_id,
});
const pair = `${payload.asset_in} -> ${payload.asset_out}`;
const result = buildEventEnvelope({
source: 'dummy-executor',
venue: event.venue || 'near-intents',
eventType: 'trade_result',
eventId: `exec-${commandId}`,
observedAt: event.observed_at,
payload: {
command_id: commandId,
idempotency_key: payload.idempotency_key,
execution_key: payload.execution_key,
quote_id: payload.quote_id,
status: 'simulated_sent',
result_code: existing?.status === 'processing' ? 'recovered_inflight' : 'sent',
note: 'dummy executor placeholder result',
},
});
assertTradeResult(result);
await producer.sendJson(config.kafkaTopicExecTradeResult, result, { key: payload.execution_key });
stateStore.markCompleted(commandId, {
idempotency_key: payload.idempotency_key,
execution_key: payload.execution_key,
quote_id: payload.quote_id,
result_event_id: result.event_id,
});
console.log(`[dummy-executor] result emitted ${pair} quote_id=${payload.quote_id} command_id=${commandId} status=simulated_sent`);
},
});

View file

@ -0,0 +1,75 @@
import process from 'node:process';
import { createConsumer } from '../bus/kafka/consumer.mjs';
import { createProducer } from '../bus/kafka/producer.mjs';
import { logStatus } from '../core/log.mjs';
import { loadConfig } from '../lib/config.mjs';
import { buildEventEnvelope, parseEventMessage } from '../core/event-envelope.mjs';
import { assertExecuteTradeCommand, assertNormalizedSwapDemand } from '../core/schemas.mjs';
const config = loadConfig();
const consumer = await createConsumer({
groupId: config.kafkaConsumerGroupDummy,
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
const producer = await createProducer({
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
await consumer.subscribe({ topic: config.kafkaTopicNormSwapDemand, fromBeginning: false });
logStatus(`dummy reactor subscribed to ${config.kafkaTopicNormSwapDemand} as ${config.kafkaConsumerGroupDummy}`);
logStatus(`dummy reactor will publish commands to ${config.kafkaTopicCmdExecuteTrade}`);
async function shutdown() {
await consumer.disconnect();
await producer.disconnect();
process.exit(0);
}
process.on('SIGINT', shutdown);
process.on('SIGTERM', shutdown);
await consumer.run({
eachMessage: async ({ message }) => {
if (!message.value) return;
let event;
try {
event = parseEventMessage(message.value.toString());
} catch {
logStatus('dummy reactor received non-JSON message; skipping');
return;
}
assertNormalizedSwapDemand(event);
const payload = event.payload;
const pair = `${payload.asset_in} -> ${payload.asset_out}`;
const quoteId = payload.quote_id;
const commandId = `cmd-${quoteId}`;
const command = buildEventEnvelope({
source: 'dummy-reactor',
venue: event.venue || 'near-intents',
eventType: 'execute_trade',
eventId: commandId,
observedAt: event.observed_at,
payload: {
command_id: commandId,
idempotency_key: `${event.venue || 'near-intents'}:${quoteId}`,
execution_key: `${event.venue || 'near-intents'}:${payload.asset_in}->${payload.asset_out}`,
quote_id: quoteId,
asset_in: payload.asset_in,
asset_out: payload.asset_out,
amount_in: payload.amount_in,
amount_out: payload.amount_out,
reason: 'dummy reactor placeholder decision',
},
});
assertExecuteTradeCommand(command);
await producer.sendJson(config.kafkaTopicCmdExecuteTrade, command, { key: command.payload.execution_key });
console.log(`[dummy-reactor] command emitted ${pair} quote_id=${quoteId} command_id=${commandId}`);
},
});

View file

@ -18,7 +18,7 @@ const producer = await createProducer({
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
logStatus(`kafka producer connected; topic=${config.kafkaTopicNormSwapDemand}`);
logStatus(`kafka producer connected; raw_topic=${config.kafkaTopicRawNearIntentsQuote}; normalized_topic=${config.kafkaTopicNormSwapDemand}`);
if (pairFilter) logStatus(`pair filter enabled: ${pairFilter[0]} <-> ${pairFilter[1]}`);
process.on('SIGINT', async () => {
@ -36,5 +36,6 @@ await startNearIntentsWs({
wsUrl: config.nearIntentsWsUrl,
pairFilter,
producer,
topic: config.kafkaTopicNormSwapDemand,
rawTopic: config.kafkaTopicRawNearIntentsQuote,
normalizedTopic: config.kafkaTopicNormSwapDemand,
});

View file

@ -1,27 +0,0 @@
import { Kafka } from 'kafkajs';
function brokersFromEnv() {
return (process.env.KAFKA_BROKERS || '127.0.0.1:9092')
.split(',')
.map((x) => x.trim())
.filter(Boolean);
}
export function createKafka() {
return new Kafka({
clientId: process.env.KAFKA_CLIENT_ID || 'trading-system',
brokers: brokersFromEnv(),
});
}
export async function createProducer() {
const producer = createKafka().producer();
await producer.connect();
return producer;
}
export async function createConsumer({ groupId }) {
const consumer = createKafka().consumer({ groupId });
await consumer.connect();
return consumer;
}

View file

@ -1,6 +1,6 @@
import { Kafka } from 'kafkajs';
function createKafka({ brokers = ['127.0.0.1:9092'], clientId = 'trading-system' } = {}) {
function createKafka({ brokers = ['127.0.0.1:9092'], clientId = 'unrip' } = {}) {
return new Kafka({ clientId, brokers });
}

View file

@ -1,6 +1,6 @@
import { Kafka } from 'kafkajs';
function createKafka({ brokers = ['127.0.0.1:9092'], clientId = 'trading-system' } = {}) {
function createKafka({ brokers = ['127.0.0.1:9092'], clientId = 'unrip' } = {}) {
return new Kafka({ clientId, brokers });
}
@ -8,7 +8,7 @@ export async function createProducer(options = {}) {
const producer = createKafka(options).producer();
await producer.connect();
return {
async sendJson(topic, event, { key = event?.eventId ?? event?.key ?? null } = {}) {
async sendJson(topic, event, { key = event?.event_id ?? event?.key ?? null } = {}) {
await producer.send({
topic,
messages: [{ key, value: JSON.stringify(event) }],

View file

@ -1,13 +0,0 @@
import fs from 'node:fs';
export function loadDotenv(path = '.env') {
if (!fs.existsSync(path)) return;
const lines = fs.readFileSync(path, 'utf8').split(/\r?\n/);
for (const raw of lines) {
const line = raw.trim();
if (!line || line.startsWith('#') || !line.includes('=')) continue;
const [key, ...rest] = line.split('=');
const value = rest.join('=').trim().replace(/^['"]|['"]$/g, '');
if (!(key.trim() in process.env)) process.env[key.trim()] = value;
}
}

View file

@ -1,14 +1,41 @@
import crypto from 'node:crypto';
export function makeEventEnvelope({ venue, eventType, payload, raw = null, key = null }) {
return {
event_id: crypto.randomUUID(),
schema_version: 1,
export function buildEventEnvelope({
eventType,
venue,
event_type: eventType,
observed_at: new Date().toISOString(),
key,
payload,
source,
eventId = crypto.randomUUID(),
schemaVersion = 1,
observedAt = null,
ingestedAt = new Date(),
raw = null,
}) {
if (!eventType) throw new Error('Missing eventType');
if (!venue) throw new Error('Missing venue');
if (payload == null) throw new Error('Missing payload');
return {
event_id: String(eventId),
event_type: String(eventType),
venue: String(venue),
source: source ? String(source) : null,
schema_version: Number(schemaVersion),
observed_at: toIsoStringOrNull(observedAt),
ingested_at: toIsoStringOrNull(ingestedAt) ?? new Date().toISOString(),
payload,
raw,
};
}
export function parseEventMessage(value) {
const event = typeof value === 'string' ? JSON.parse(value) : value;
if (!event || typeof event !== 'object') throw new Error('Event must be an object');
return event;
}
function toIsoStringOrNull(value) {
if (value == null) return null;
const date = value instanceof Date ? value : new Date(value);
return Number.isNaN(date.getTime()) ? null : date.toISOString();
}

View file

@ -0,0 +1,49 @@
import fs from 'node:fs';
import path from 'node:path';
export function createExecutorStateStore({ stateDir, fileName = 'commands.json' }) {
fs.mkdirSync(stateDir, { recursive: true });
const filePath = path.join(stateDir, fileName);
const state = loadState(filePath);
return {
get(commandId) {
return state[commandId] || null;
},
markProcessing(commandId, metadata) {
state[commandId] = {
...(state[commandId] || {}),
...metadata,
status: 'processing',
updated_at: new Date().toISOString(),
};
persistState(filePath, state);
return state[commandId];
},
markCompleted(commandId, metadata) {
state[commandId] = {
...(state[commandId] || {}),
...metadata,
status: 'completed',
updated_at: new Date().toISOString(),
};
persistState(filePath, state);
return state[commandId];
},
};
}
function loadState(filePath) {
if (!fs.existsSync(filePath)) return {};
try {
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
} catch {
return {};
}
}
function persistState(filePath, state) {
const tempPath = `${filePath}.tmp`;
fs.writeFileSync(tempPath, JSON.stringify(state, null, 2));
fs.renameSync(tempPath, filePath);
}

63
src/core/schemas.mjs Normal file
View file

@ -0,0 +1,63 @@
function requireString(value, field) {
if (typeof value !== 'string' || value.length === 0) throw new Error(`Missing ${field}`);
}
function requireObject(value, field) {
if (!value || typeof value !== 'object' || Array.isArray(value)) throw new Error(`Missing ${field}`);
}
export function assertEventEnvelope(event) {
requireObject(event, 'event');
requireString(event.event_id, 'event.event_id');
requireString(event.event_type, 'event.event_type');
requireString(event.venue, 'event.venue');
if (event.source != null) requireString(event.source, 'event.source');
if (typeof event.schema_version !== 'number') throw new Error('Missing event.schema_version');
requireString(event.ingested_at, 'event.ingested_at');
requireObject(event.payload, 'event.payload');
return event;
}
export function assertNormalizedSwapDemand(event) {
assertEventEnvelope(event);
if (event.event_type !== 'swap_demand') throw new Error(`Unexpected event_type: ${event.event_type}`);
const payload = event.payload;
requireString(payload.quote_id, 'payload.quote_id');
requireString(payload.asset_in, 'payload.asset_in');
requireString(payload.asset_out, 'payload.asset_out');
if (payload.amount_in != null) requireString(payload.amount_in, 'payload.amount_in');
if (payload.amount_out != null) requireString(payload.amount_out, 'payload.amount_out');
if (payload.ttl_ms != null) requireString(payload.ttl_ms, 'payload.ttl_ms');
return event;
}
export function assertExecuteTradeCommand(event) {
assertEventEnvelope(event);
if (event.event_type !== 'execute_trade') throw new Error(`Unexpected event_type: ${event.event_type}`);
const payload = event.payload;
requireString(payload.command_id, 'payload.command_id');
requireString(payload.idempotency_key, 'payload.idempotency_key');
requireString(payload.execution_key, 'payload.execution_key');
requireString(payload.quote_id, 'payload.quote_id');
requireString(payload.asset_in, 'payload.asset_in');
requireString(payload.asset_out, 'payload.asset_out');
if (payload.amount_in != null) requireString(payload.amount_in, 'payload.amount_in');
if (payload.amount_out != null) requireString(payload.amount_out, 'payload.amount_out');
return event;
}
export function assertTradeResult(event) {
assertEventEnvelope(event);
if (event.event_type !== 'trade_result') throw new Error(`Unexpected event_type: ${event.event_type}`);
const payload = event.payload;
requireString(payload.command_id, 'payload.command_id');
requireString(payload.idempotency_key, 'payload.idempotency_key');
requireString(payload.execution_key, 'payload.execution_key');
requireString(payload.quote_id, 'payload.quote_id');
requireString(payload.status, 'payload.status');
if (payload.result_code != null) requireString(payload.result_code, 'payload.result_code');
return event;
}

View file

@ -3,9 +3,14 @@ import { loadDotenv } from './env.mjs';
const DEFAULTS = {
nearIntentsWsUrl: 'wss://solver-relay-v2.chaindefuser.com/ws',
kafkaBrokers: ['127.0.0.1:9092'],
kafkaClientId: 'trading-system',
kafkaClientId: 'unrip',
kafkaTopicRawNearIntentsQuote: 'raw.near_intents.quote',
kafkaTopicNormSwapDemand: 'norm.swap_demand',
kafkaTopicCmdExecuteTrade: 'cmd.execute_trade',
kafkaTopicExecTradeResult: 'exec.trade_result',
kafkaConsumerGroupDummy: 'dummy-reactor-v1',
kafkaConsumerGroupExecutor: 'dummy-executor-v1',
executorStateDir: './var/executor-state',
};
function splitCsv(value) {
@ -16,6 +21,12 @@ function splitCsv(value) {
}
export function loadConfig({ envPath = '.env' } = {}) {
// Runtime config stays environment-first so the same app build works for:
// - local `.env` development
// - Docker/Compose
// - Kubernetes Secret/ConfigMap injection during Hetzner bootstrap
// This is what lets the local workstation bootstrap provision infra and then
// deploy the exact same image into k3s without app-level config rewrites.
loadDotenv(envPath);
return {
@ -25,9 +36,19 @@ export function loadConfig({ envPath = '.env' } = {}) {
? splitCsv(process.env.KAFKA_BROKERS)
: DEFAULTS.kafkaBrokers,
kafkaClientId: process.env.KAFKA_CLIENT_ID || DEFAULTS.kafkaClientId,
kafkaTopicRawNearIntentsQuote:
process.env.KAFKA_TOPIC_RAW_NEAR_INTENTS_QUOTE || DEFAULTS.kafkaTopicRawNearIntentsQuote,
kafkaTopicNormSwapDemand:
process.env.KAFKA_TOPIC_NORM_SWAP_DEMAND || DEFAULTS.kafkaTopicNormSwapDemand,
kafkaTopicCmdExecuteTrade:
process.env.KAFKA_TOPIC_CMD_EXECUTE_TRADE || DEFAULTS.kafkaTopicCmdExecuteTrade,
kafkaTopicExecTradeResult:
process.env.KAFKA_TOPIC_EXEC_TRADE_RESULT || DEFAULTS.kafkaTopicExecTradeResult,
kafkaConsumerGroupDummy:
process.env.KAFKA_CONSUMER_GROUP_DUMMY || DEFAULTS.kafkaConsumerGroupDummy,
kafkaConsumerGroupExecutor:
process.env.KAFKA_CONSUMER_GROUP_EXECUTOR || DEFAULTS.kafkaConsumerGroupExecutor,
executorStateDir:
process.env.EXECUTOR_STATE_DIR || DEFAULTS.executorStateDir,
};
}

View file

@ -1,5 +1,9 @@
import fs from 'node:fs';
// `.env` loading is a local/dev convenience only.
// In the repo-driven Hetzner+k3s bootstrap flow, Kubernetes injects runtime
// environment variables from Secrets/ConfigMaps and already-present process.env
// values always win over anything on disk.
export function loadDotenv(path = '.env') {
if (!fs.existsSync(path)) return;
const lines = fs.readFileSync(path, 'utf8').split(/\r?\n/);

View file

@ -1,37 +0,0 @@
export function buildEventEnvelope({
source,
venue,
eventType,
eventId,
occurredAt = null,
ingestedAt = new Date(),
payload,
}) {
if (!source) throw new Error('Missing source');
if (!venue) throw new Error('Missing venue');
if (!eventType) throw new Error('Missing eventType');
if (!eventId) throw new Error('Missing eventId');
const ingestedDate = parseDate(ingestedAt) ?? new Date();
return {
source: String(source),
venue: String(venue),
eventType: String(eventType),
eventId: String(eventId),
occurredAt: toIsoStringOrNull(occurredAt),
ingestedAt: ingestedDate.toISOString(),
payload,
};
}
function toIsoStringOrNull(value) {
const date = parseDate(value);
return date ? date.toISOString() : null;
}
function parseDate(value) {
if (value == null) return null;
const date = value instanceof Date ? value : new Date(value);
return Number.isNaN(date.getTime()) ? null : date;
}

View file

@ -1,4 +1,21 @@
import { buildEventEnvelope } from '../../lib/event-envelope.mjs';
import { buildEventEnvelope } from '../../core/event-envelope.mjs';
export function buildNearIntentsRawEnvelope(message, { ingestedAt = new Date() } = {}) {
const raw = isRecord(message) ? message : {};
const quoteId = first(raw, ['quote_id', 'quoteRequestId', 'request_id', 'id', 'quote_hash']);
const occurredAt = first(raw, ['created_at', 'createdAt', 'timestamp', 'ts']);
return buildEventEnvelope({
source: 'near-intents.ws',
venue: 'near-intents',
eventType: 'near_intents_quote_raw',
eventId: quoteId || `near-intents-raw-${ingestedAt.getTime()}`,
observedAt: occurredAt,
ingestedAt,
payload: { message: raw },
raw,
});
}
export function buildNearIntentsQuoteEnvelope(message, { ingestedAt = new Date() } = {}) {
const raw = isRecord(message) ? message : {};
@ -10,11 +27,12 @@ export function buildNearIntentsQuoteEnvelope(message, { ingestedAt = new Date()
return buildEventEnvelope({
source: 'near-intents.ws',
venue: 'near-intents',
eventType: 'quote',
eventId: payload.quoteId,
occurredAt,
eventType: 'swap_demand',
eventId: payload.quote_id,
observedAt: occurredAt,
ingestedAt,
payload,
raw,
});
}
@ -25,12 +43,12 @@ export function normalizeNearIntentsQuote(message) {
if (!quoteId || !assetIn || !assetOut) return null;
return {
quoteId: String(quoteId),
assetIn: String(assetIn),
assetOut: String(assetOut),
amountIn: stringify(first(message, ['exact_amount_in', 'sellAmount', 'amount_in'])),
amountOut: stringify(first(message, ['exact_amount_out', 'buyAmount', 'amount_out', 'expectedOut', 'quoted_amount_out'])),
ttlMs: stringify(first(message, ['min_deadline_ms', 'ttl_ms', 'deadline_ms'])),
quote_id: String(quoteId),
asset_in: String(assetIn),
asset_out: String(assetOut),
amount_in: stringify(first(message, ['exact_amount_in', 'sellAmount', 'amount_in'])),
amount_out: stringify(first(message, ['exact_amount_out', 'buyAmount', 'amount_out', 'expectedOut', 'quoted_amount_out'])),
ttl_ms: stringify(first(message, ['min_deadline_ms', 'ttl_ms', 'deadline_ms'])),
};
}

View file

@ -1,6 +1,7 @@
import { matchesPairFilter } from '../../core/pair-filter.mjs';
import { logStatus, startIdleHeartbeat } from '../../core/log.mjs';
import { buildNearIntentsQuoteEnvelope } from './normalize.mjs';
import { assertNormalizedSwapDemand } from '../../core/schemas.mjs';
import { buildNearIntentsQuoteEnvelope, buildNearIntentsRawEnvelope } from './normalize.mjs';
const DEFAULT_WS_URL = 'wss://solver-relay-v2.chaindefuser.com/ws';
const QUOTE_SUB_ID = 1;
@ -11,7 +12,8 @@ export async function startNearIntentsWs({
wsUrl = DEFAULT_WS_URL,
pairFilter,
producer,
topic,
rawTopic,
normalizedTopic,
onPublish = defaultOnPublish,
}) {
if (!apiKey) throw new Error('Missing NEAR_INTENTS_API_KEY');
@ -63,17 +65,20 @@ export async function startNearIntentsWs({
if (quoteSubscriptionId && subscription && subscription !== quoteSubscriptionId) return;
if (publishLocked) return;
const rawEnvelope = buildNearIntentsRawEnvelope(merged);
const envelope = buildNearIntentsQuoteEnvelope(merged);
if (!envelope) return;
assertNormalizedSwapDemand(envelope);
const assetIn = envelope.payload?.assetIn;
const assetOut = envelope.payload?.assetOut;
const assetIn = envelope.payload?.asset_in;
const assetOut = envelope.payload?.asset_out;
if (!assetIn || !assetOut) return;
if (!matchesPairFilter(assetIn, assetOut, pairFilter)) return;
publishLocked = true;
try {
await producer.sendJson(topic, envelope, { key: envelope.eventId });
await producer.sendJson(rawTopic, rawEnvelope, { key: rawEnvelope.event_id });
await producer.sendJson(normalizedTopic, envelope, { key: envelope.payload.quote_id });
publishedCount += 1;
onPublish(envelope, publishedCount);
} catch (error) {