Compare commits

..

No commits in common. "20d9cffe4230766d3d25839cf9411b4defab5fa4" and "28abf6b06caff82b5e757c2290da355b5e532426" have entirely different histories.

74 changed files with 3125 additions and 1909 deletions

6
.dockerignore Normal file
View file

@ -0,0 +1,6 @@
node_modules
npm-debug.log
.git
.gitignore
.env
var

39
.env.example Normal file
View file

@ -0,0 +1,39 @@
# Local dev / container runtime values
NEAR_INTENTS_API_KEY=replace_me
NEAR_INTENTS_WS_URL=wss://solver-relay-v2.chaindefuser.com/ws
KAFKA_BROKERS=redpanda:9092
KAFKA_CLIENT_ID=unrip
KAFKA_TOPIC_RAW_NEAR_INTENTS_QUOTE=raw.near_intents.quote
KAFKA_TOPIC_NORM_SWAP_DEMAND=norm.swap_demand
KAFKA_TOPIC_CMD_EXECUTE_TRADE=cmd.execute_trade
KAFKA_TOPIC_EXEC_TRADE_RESULT=exec.trade_result
KAFKA_CONSUMER_GROUP_DUMMY=dummy-reactor-v1
KAFKA_CONSUMER_GROUP_EXECUTOR=dummy-executor-v1
EXECUTOR_STATE_DIR=/var/lib/unrip/executor-state
# Repo-driven Hetzner bootstrap values live separately from the app .env.
# Copy scripts/hetzner/bootstrap-secrets.env.example to
# scripts/hetzner/bootstrap-secrets.env, configure non-secret values plus *_PASS
# mappings to your pass store, then:
# source scripts/hetzner/bootstrap-secrets.env
# bash scripts/hetzner/bootstrap.sh
#
# Canonical operator flow uses `pass` for sensitive values; explicit env vars still
# override pass-backed lookups for CI/testing.
#
# Expected bootstrap inputs now include:
# - HCLOUD_TOKEN_PASS or HCLOUD_TOKEN
# - SSH_PUBLIC_KEY_PATH
# - PUBLIC_DOMAIN
# - BASE_DOMAIN
# - LETSENCRYPT_EMAIL
# - REGISTRY_USERNAME
# - REGISTRY_PASSWORD_PASS or REGISTRY_PASSWORD
# - NEAR_INTENTS_API_KEY_PASS or NEAR_INTENTS_API_KEY
# - FORGEJO_ADMIN_USERNAME
# - FORGEJO_ADMIN_EMAIL
# - FORGEJO_ADMIN_PASSWORD_PASS or FORGEJO_ADMIN_PASSWORD
# - optional DNS provider creds via *_PASS or direct env vars
#
# Future k3s deployment should source the app values from Kubernetes Secret/ConfigMap.
# Hetzner bootstrap path clones the repo to /opt/unrip/repo for later deploy/k8s assets.

View file

@ -0,0 +1,115 @@
name: deploy
on:
push:
branches:
- main
workflow_dispatch:
jobs:
deploy:
runs-on: linux-amd64
env:
IMAGE_TAG: ${{ github.sha }}
REGISTRY_HOST: ${{ vars.REGISTRY_HOST }}
PROJECT_NAME: ${{ vars.PROJECT_NAME || 'unrip' }}
PROJECT_NAMESPACE: ${{ vars.PROJECT_NAMESPACE || vars.PROJECT_NAME || 'unrip' }}
PROJECT_DEPLOYMENTS: ${{ vars.PROJECT_DEPLOYMENTS || 'near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer' }}
PROJECT_REGISTRY_SECRET_NAME: ${{ vars.PROJECT_REGISTRY_SECRET_NAME || format('{0}-registry-creds', vars.PROJECT_NAME || 'unrip') }}
REPO_CLONE_URL: ${{ github.server_url }}/${{ github.repository }}.git
steps:
- name: Install tooling
run: |
apk add --no-cache kubectl
- name: Load kubeconfig
run: |
mkdir -p "$HOME/.kube"
printf '%s' '${{ secrets.KUBECONFIG_B64 }}' | base64 -d > "$HOME/.kube/config"
kubectl get ns
- name: Resolve deployment settings
run: |
IMAGE="$REGISTRY_HOST/$PROJECT_NAME:$IMAGE_TAG"
BUILD_JOB="image-build-${GITHUB_SHA:0:12}"
{
echo "IMAGE=$IMAGE"
echo "BUILD_JOB=$BUILD_JOB"
echo "PROJECT_NAMESPACE=$PROJECT_NAMESPACE"
echo "PROJECT_DEPLOYMENTS=$PROJECT_DEPLOYMENTS"
echo "PROJECT_REGISTRY_SECRET_NAME=$PROJECT_REGISTRY_SECRET_NAME"
} >> "$GITHUB_ENV"
- name: Build and push image in-cluster
env:
REPO_TOKEN: ${{ github.token }}
run: |
cat <<EOF | kubectl apply -f -
apiVersion: batch/v1
kind: Job
metadata:
name: ${BUILD_JOB}
namespace: ${PROJECT_NAMESPACE}
spec:
backoffLimit: 0
ttlSecondsAfterFinished: 3600
template:
spec:
restartPolicy: Never
volumes:
- name: workspace
emptyDir: {}
- name: registry-creds
secret:
secretName: ${PROJECT_REGISTRY_SECRET_NAME}
items:
- key: .dockerconfigjson
path: config.json
initContainers:
- name: checkout
image: alpine/git:2.47.2
env:
- name: REPO_TOKEN
value: ${REPO_TOKEN}
- name: REPO_CLONE_URL
value: ${REPO_CLONE_URL}
- name: GITHUB_SHA
value: ${GITHUB_SHA}
command: ["/bin/sh", "-lc"]
args:
- >-
git -c credential.username=oauth2 -c http.extraHeader="Authorization: Bearer ${REPO_TOKEN}" clone --depth=1 "${REPO_CLONE_URL}" /workspace &&
cd /workspace &&
git -c credential.username=oauth2 -c http.extraHeader="Authorization: Bearer ${REPO_TOKEN}" fetch --depth=1 origin "${GITHUB_SHA}" &&
git checkout --detach "${GITHUB_SHA}"
volumeMounts:
- name: workspace
mountPath: /workspace
containers:
- name: kaniko
image: gcr.io/kaniko-project/executor:v1.23.2-debug
args:
- --context=/workspace
- --dockerfile=/workspace/Dockerfile
- --destination=${IMAGE}
- --cache=true
volumeMounts:
- name: workspace
mountPath: /workspace
- name: registry-creds
mountPath: /kaniko/.docker
EOF
kubectl -n "$PROJECT_NAMESPACE" wait --for=condition=Complete --timeout=20m "job/$BUILD_JOB"
kubectl -n "$PROJECT_NAMESPACE" logs "job/$BUILD_JOB"
- name: Roll deployments to new image
run: |
IFS=',' read -r -a DEPLOYMENTS <<< "$PROJECT_DEPLOYMENTS"
for deployment in "${DEPLOYMENTS[@]}"; do
deployment="$(echo "$deployment" | xargs)"
[ -n "$deployment" ] || continue
kubectl -n "$PROJECT_NAMESPACE" set image "deployment/$deployment" app="$IMAGE"
kubectl -n "$PROJECT_NAMESPACE" rollout status "deployment/$deployment" --timeout=180s
done

3
.gitignore vendored
View file

@ -4,9 +4,6 @@
__pycache__/ __pycache__/
*.pyc *.pyc
.env .env
**/.env
node_modules/
projects/**/node_modules/
deploy/k8s/overlays/hetzner-single-node/secrets/*.env deploy/k8s/overlays/hetzner-single-node/secrets/*.env
deploy/k8s/overlays/hetzner-single-node/secrets/*.htpasswd deploy/k8s/overlays/hetzner-single-node/secrets/*.htpasswd
!deploy/k8s/overlays/hetzner-single-node/secrets/*.example !deploy/k8s/overlays/hetzner-single-node/secrets/*.example

10
Dockerfile Normal file
View file

@ -0,0 +1,10 @@
FROM node:22-bookworm-slim
WORKDIR /app
COPY package.json package-lock.json ./
RUN npm ci --omit=dev
COPY . .
ENV NODE_ENV=production
CMD ["node", "src/apps/dummy-consumer.mjs"]

400
README.md
View file

@ -1,88 +1,368 @@
# near-intents-monitor platform repo # near-intents-monitor
This repository is the **shared platform/infrastructure** repo for the Hetzner + k3s cluster. Production-shaped first slice of the trading system:
The `unrip` application now lives in its own separate repository. - **venue ingest**: NEAR Intents solver-bus quote flow
- **bus**: Redpanda first, Kafka-compatible by design
- **reactor**: dummy decision engine emitting commands
- **executor**: dummy execution worker with durable idempotency state
- **result consumer**: downstream observer of execution outcomes
## Repo layout ## Canonical repo shape
```text ```text
infra/ src/
terraform/ apps/
hetzner/ near-intents-ingest.mjs
scripts/ dummy-reactor.mjs
hetzner/ dummy-executor.mjs
deploy/ dummy-consumer.mjs
hetzner/ bus/
k8s/ kafka/
platform/ producer.mjs
overlays/ consumer.mjs
hetzner-single-node/ core/
event-envelope.mjs
executor-state-store.mjs
log.mjs
pair-filter.mjs
schemas.mjs
lib/
config.mjs
env.mjs
venues/
near-intents/
ingest.mjs
normalize.mjs
ws.mjs
compose.yml
Dockerfile
docs/contracts.md
deploy/hetzner/README.md
``` ```
## Shared platform at repo root ## Event flow
Shared/root-owned parts include: ```text
- Hetzner Terraform NEAR Intents WebSocket
- cloud-init + bootstrap scripts |
- cluster/platform Kubernetes manifests +--> raw.near_intents.quote
- Forgejo |
- Forgejo runner v
- registry norm.swap_demand
- cert-manager |
- Traefik integration v
- Grafana cmd.execute_trade
- Loki |
- Promtail v
- Headlamp exec.trade_result
- shared operator docs and runbooks ```
## Application repo: `unrip` Core rule: services do not call each other directly for trading flow; they communicate through bus topics only.
The trading-system code and project-specific deployment assets were split into a separate `unrip` repository. ## Contracts
That repo now owns the app source, Docker build, local compose setup, app manifests, and project docs. See `docs/contracts.md`.
## Canonical production path Current topics:
- `raw.near_intents.quote`
- `norm.swap_demand`
- `cmd.execute_trade`
- `exec.trade_result`
The canonical production path is the repo-driven Hetzner + k3s bootstrap flow. ## Primary deployment path: repo-driven Hetzner bootstrap
The primary production path is no longer a Compose-only VM workflow.
The intended operating model is:
- Terraform provisions a Hetzner single-node environment
- cloud-init installs k3s automatically on first boot
- a local operator workstation performs the first repo-driven bootstrap
- Kubernetes manifests install Redpanda, the app workloads, Forgejo, runner, registry, and ingress-related components
- once the in-cluster Git + CI stack is alive, routine app deploys move to self-hosted CI
This is a two-phase model:
- **Phase 0:** local workstation bootstrap of a brand-new cluster
- **Phase 1:** self-hosted Forgejo + runner takes over app delivery
Compose still exists for local development and optional single-machine testing, but it is not the canonical production story.
## Prerequisites for first deployment
Install locally on the operator workstation:
- Terraform `>= 1.6`
- `kubectl`
- `docker`
- `curl`
You also need:
- a Hetzner Cloud API token
- a local SSH public key file for Terraform node provisioning
- DNS control for your chosen base domain and Forgejo hostname
- preferably a Tailscale tailnet and auth key for private admin/control-plane access
- the repo checked out locally
## Required bootstrap secrets and inputs
Create the bootstrap env file:
```bash ```bash
cp scripts/hetzner/bootstrap-secrets.env.example scripts/hetzner/bootstrap-secrets.env cp scripts/hetzner/bootstrap-secrets.env.example scripts/hetzner/bootstrap-secrets.env
```
Set at least:
- `HCLOUD_TOKEN`
- `SSH_PUBLIC_KEY_PATH`
- `PUBLIC_DOMAIN`
- recommended:
- `TAILSCALE_AUTH_KEY`
- `TAILSCALE_CONTROL_PLANE_HOSTNAME`
- optional fallback:
- `TF_ADMIN_CIDR_BLOCKS`
- `BASE_DOMAIN`
- `FORGEJO_DOMAIN`
- `FORGEJO_ROOT_URL`
- `REGISTRY_DOMAIN`
- `LETSENCRYPT_EMAIL`
- `REGISTRY_USERNAME`
- `REGISTRY_PASSWORD`
- `NEAR_INTENTS_API_KEY`
- `FORGEJO_RUNNER_REGISTRATION_TOKEN`
- optional DNS automation:
- Cloudflare:
- `CLOUDFLARE_API_TOKEN`
- `CLOUDFLARE_ZONE_ID`
- Porkbun:
- `PORKBUN_API_KEY`
- `PORKBUN_SECRET_API_KEY`
Then load them:
```bash
source scripts/hetzner/bootstrap-secrets.env source scripts/hetzner/bootstrap-secrets.env
```
## First bootstrap sequence
Run the end-to-end bootstrap from repo root:
```bash
bash scripts/hetzner/bootstrap.sh bash scripts/hetzner/bootstrap.sh
``` ```
Bootstrap now: Current repo behavior of that script:
1. provisions/updates Hetzner infra with Terraform 1. runs Terraform in `infra/terraform/hetzner`
2. optionally manages DNS through Cloudflare or Porkbun 2. optionally creates DNS records for the base, Forgejo, and registry hosts via Cloudflare or Porkbun
3. fetches kubeconfig from the node into `.state/hetzner/kubeconfig.yaml` 3. if configured, joins the node to Tailscale and prefers the Tailscale control-plane hostname for Kubernetes API access
4. renders `.state/hetzner/generated-overlay/` 4. waits for SSH and the k3s API endpoint to become ready
5. applies the shared platform manifests and shared/generated secrets 5. fetches the real k3s kubeconfig from the node and writes it to `.state/hetzner/kubeconfig.yaml`
6. bootstraps Forgejo admin, runner, repo, and Actions config 6. renders the Hetzner single-node overlay from local operator inputs
7. seeds this infra repo into Forgejo 7. creates registry pull/auth secrets
8. applies the Kubernetes bootstrap manifests
9. builds the app image locally and imports it into k3s on the node
10. performs the first rollout using the imported bootstrap image
## Runtime surfaces Use the generated kubeconfig afterward:
- Forgejo: `https://git.doran.133011.xyz/` ```bash
- Registry: `https://registry.doran.133011.xyz/` export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
- Grafana: `https://grafana.doran.133011.xyz/` kubectl get nodes -o wide
- Headlamp: `https://headlamp.doran.133011.xyz/` kubectl get pods -A
kubectl -n unrip get deploy,pods
kubectl -n forgejo get deploy,pods,svc
```
## What is deployed into k3s
## Operator docs The repo-managed Kubernetes assets are under `deploy/k8s/`.
Current operator/platform docs: Current single-node target includes resources for:
- `unrip` workloads in namespace `unrip`
- Redpanda
- Forgejo
- Forgejo runner
- private registry
- ingress-nginx namespace/resources
- cert-manager namespace/resources
- ACME issuers and ingress definitions
- a bootstrap job for Redpanda topic creation
Shared platform namespaces:
- `forgejo`
- `registry`
- `ingress-nginx`
- `cert-manager`
Project-specific namespaces:
- `unrip`
- future projects should get their own namespace rather than sharing `unrip`
Important current-state nuance:
- the bootstrap script currently applies `deploy/k8s/base`
- the longer-term intended target is `deploy/k8s/overlays/hetzner-single-node`
## Executor persistence in k3s
The executor is stateful by design because it persists idempotency/execution tracking.
Current persistence boundary:
- app env uses `EXECUTOR_STATE_DIR=/var/lib/unrip/executor-state`
- in Kubernetes, the executor deployment mounts storage at that path
- the Hetzner single-node overlay pins storage to the k3s `local-path` storage class
- cloud-init also prepares the host directory boundary for executor state on first boot
Operational meaning:
- executor state lives on node-backed storage in the single-node k3s environment
- if that PVC or underlying node storage is lost, duplicate-suppression history is lost too
- treat executor persistence as part of the minimal durable state of the cluster
## Failure recovery and operator checks
### If bootstrap fails before Terraform completes
Re-run after fixing the local input problem:
- missing token
- invalid CIDRs
- invalid SSH public key path
If the infrastructure must be torn down:
```bash
source scripts/hetzner/bootstrap-secrets.env
bash scripts/hetzner/destroy.sh
```
### If Terraform succeeds but Kubernetes is not ready
Check the public API and cluster state from the workstation:
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl get events -A --sort-by=.lastTimestamp | tail -n 50
```
Typical next checks:
- cloud-init may still be finishing
- k3s may still be starting
- a workload may be crash-looping due to missing secret values or image-delivery issues
### If workloads do not roll out
Inspect the affected namespace:
```bash
kubectl -n unrip get pods
kubectl -n unrip describe pod <pod-name>
kubectl -n unrip logs deploy/dummy-executor --tail=100
kubectl -n forgejo logs deploy/forgejo --tail=100
```
### If you need to recreate secrets
The workstation bootstrap creates these Secrets:
- `unrip/unrip-secrets`
- `forgejo/forgejo-secrets`
Verify them:
```bash
kubectl -n unrip get secret unrip-secrets
kubectl -n forgejo get secret forgejo-secrets
```
### Current known limitations
Current colony state already identified an important gap:
- bootstrap and CI are not yet fully production-hardened, even though the first deploy path now fetches the real kubeconfig and imports the bootstrap image directly into k3s
Treat the current bootstrap as a repo-driven first-deploy path suitable for testing, with hardening still pending.
## Self-hosted CI handoff
After cluster bootstrap:
- open Forgejo at `https://${FORGEJO_DOMAIN}`
- seed or push this repo into Forgejo
- create Forgejo repository secrets:
- `KUBECONFIG_B64`
- `REGISTRY_USERNAME`
- `REGISTRY_PASSWORD`
- create Forgejo repository variables:
- `REGISTRY_HOST=${REGISTRY_DOMAIN}`
- optional: `PROJECT_NAME=unrip`
- optional: `PROJECT_NAMESPACE=unrip`
- optional: `PROJECT_DEPLOYMENTS=near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer`
- push to `main`
Routine application deploys then follow `.forgejo/workflows/deploy.yml`:
- build image as `REGISTRY_HOST/PROJECT_NAME:${GIT_SHA}`
- push to the private registry
- `kubectl set image` for each deployment listed in `PROJECT_DEPLOYMENTS` inside `PROJECT_NAMESPACE`
- wait for rollout
If project variables are omitted, the workflow defaults to the current repo project:
- `PROJECT_NAME=unrip`
- `PROJECT_NAMESPACE=unrip`
- `PROJECT_DEPLOYMENTS=near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer`
Infrastructure changes remain Terraform-driven from the operator workstation unless and until that responsibility is also automated.
For the detailed operator runbooks, see:
- `docs/hetzner-k3s-bootstrap.md` - `docs/hetzner-k3s-bootstrap.md`
- `docs/hetzner-self-hosted-ci-runbook.md` - `docs/hetzner-self-hosted-ci-runbook.md`
- `docs/k8s-observability.md` - `deploy/k8s/projects/README.md`
- `docs/hetzner-rebuild-pipeline.md` - `docs/next-session-architecture.md`
- `deploy/hetzner/README.md`
- `deploy/k8s/README.md`
- `deploy/k8s/overlays/hetzner-single-node/README.md`
## Notes ## Local development with Compose
- Ingress is Traefik-based. The old ingress-nginx path is obsolete. Compose remains available for local development and debugging.
- Grafana is for historical log search.
- Headlamp is for cluster/pod browsing and pod logs. ```bash
- Use `pass`-backed `*_PASS` values for secrets whenever possible. npm install
cp .env.example .env
# edit .env
docker compose build
docker compose up -d
```
Useful commands:
```bash
docker compose ps
docker compose logs -f
docker compose logs -f near-intents-ingest dummy-reactor dummy-executor dummy-consumer
docker compose restart dummy-executor
docker compose down
docker compose down -v
```
### Individual services
```bash
npm run near-intents:ingest
npm run dummy-reactor
npm run dummy-executor
npm run dummy-consumer
```
Optional pair filter:
```bash
npm run near-intents:ingest -- --pair 'asset_a->asset_b'
```
## Idempotent executor behavior
- every command has a `command_id`
- commands carry `idempotency_key` and `execution_key`
- executor persists state under `EXECUTOR_STATE_DIR`
- completed commands are skipped after restart or replay
## Env
```env
NEAR_INTENTS_API_KEY=your_solver_jwt
NEAR_INTENTS_WS_URL=wss://solver-relay-v2.chaindefuser.com/ws
KAFKA_BROKERS=redpanda:9092
KAFKA_CLIENT_ID=unrip
KAFKA_TOPIC_RAW_NEAR_INTENTS_QUOTE=raw.near_intents.quote
KAFKA_TOPIC_NORM_SWAP_DEMAND=norm.swap_demand
KAFKA_TOPIC_CMD_EXECUTE_TRADE=cmd.execute_trade
KAFKA_TOPIC_EXEC_TRADE_RESULT=exec.trade_result
KAFKA_CONSUMER_GROUP_DUMMY=dummy-reactor-v1
KAFKA_CONSUMER_GROUP_EXECUTOR=dummy-executor-v1
EXECUTOR_STATE_DIR=/var/lib/unrip/executor-state
```

81
compose.yml Normal file
View file

@ -0,0 +1,81 @@
# Local/dev runtime reference. Hetzner production bootstrap now starts from Terraform + cloud-init + k3s.
services:
redpanda:
image: docker.redpanda.com/redpandadata/redpanda:v24.3.9
command:
- redpanda
- start
- --overprovisioned
- --smp
- "1"
- --memory
- "1G"
- --reserve-memory
- "0M"
- --node-id
- "0"
- --check=false
- --kafka-addr
- internal://0.0.0.0:9092,external://0.0.0.0:19092
- --advertise-kafka-addr
- internal://redpanda:9092,external://127.0.0.1:19092
- --pandaproxy-addr
- internal://0.0.0.0:8082
- --advertise-pandaproxy-addr
- internal://redpanda:8082
ports:
- "127.0.0.1:19092:19092"
volumes:
- redpanda-data:/var/lib/redpanda/data
healthcheck:
test: ["CMD-SHELL", "rpk cluster health | grep -q 'Healthy: *true'"]
interval: 10s
timeout: 5s
retries: 10
start_period: 20s
near-intents-ingest:
build: .
command: ["node", "src/apps/near-intents-ingest.mjs"]
env_file:
- .env
depends_on:
redpanda:
condition: service_healthy
restart: unless-stopped
dummy-reactor:
build: .
command: ["node", "src/apps/dummy-reactor.mjs"]
env_file:
- .env
depends_on:
redpanda:
condition: service_healthy
restart: unless-stopped
dummy-executor:
build: .
command: ["node", "src/apps/dummy-executor.mjs"]
env_file:
- .env
depends_on:
redpanda:
condition: service_healthy
restart: unless-stopped
volumes:
- executor-state:/var/lib/unrip/executor-state
dummy-consumer:
build: .
command: ["node", "src/apps/dummy-consumer.mjs"]
env_file:
- .env
depends_on:
redpanda:
condition: service_healthy
restart: unless-stopped
volumes:
redpanda-data:
executor-state:

View file

@ -1,105 +1,275 @@
# Hetzner single-node bootstrap # Hetzner single-node bootstrap (Terraform + cloud-init + k3s)
This repos canonical infrastructure path is: This is the canonical first-production deployment path for the repo.
1. provision one Hetzner VM with Terraform A local operator workstation drives the first deployment end to end:
2. let cloud-init install k3s (and optionally Tailscale) - Terraform provisions Hetzner infrastructure
3. run `scripts/hetzner/bootstrap.sh` from the operator workstation - cloud-init installs k3s automatically on first boot
4. apply repo-managed platform + project manifests - the workstation waits for the public Kubernetes API
5. bootstrap Forgejo, the runner, repo secrets/variables, and the first CI-driven deploy - the workstation creates initial Kubernetes Secrets
- the workstation applies repo-managed Kubernetes manifests
- the workstation performs the first image/bootstrap delivery attempt
- once Forgejo + runner are alive, routine app deploys are intended to move to self-hosted CI
## Source of truth Compose remains available for local development, but it is not the primary production deployment model.
Use these docs first: ## Scope of this layer
- `docs/hetzner-k3s-bootstrap.md` — bootstrap + destroy + required env The foundation under `infra/terraform/hetzner` provisions:
- `docs/hetzner-self-hosted-ci-runbook.md` — Forgejo/runner/CI flow - one Hetzner Cloud server
- `docs/k8s-observability.md` — Grafana, Loki, Promtail, Headlamp - one SSH key resource based on your local public key
- `deploy/k8s/README.md` — Kubernetes layout - firewall rules for SSH, Kubernetes API, and HTTP/HTTPS ingress
- `deploy/k8s/overlays/hetzner-single-node/README.md` — overlay details - a private network attachment for future growth
- cloud-init user-data for unattended k3s installation and host preparation
## Current architecture The repo bootstrap then applies the Hetzner single-node overlay under `deploy/k8s/overlays/hetzner-single-node`, which composes Kubernetes resources under `deploy/k8s/` for:
- shared platform namespaces and services
Infrastructure under `infra/terraform/hetzner/` provisions: - Redpanda
- one Hetzner VM - unrip workloads
- one firewall
- one private network attachment
- cloud-init for unattended k3s install
Kubernetes platform services deployed from this repo:
- Forgejo - Forgejo
- Forgejo runner - Forgejo runner
- private registry - private registry
- cert-manager - ingress/TLS-related resources
- Traefik via k3s bundled ingress controller - Redpanda topic bootstrap job
- Grafana
- Loki
- Promtail
- Headlamp
Project services deployed from this repo: ## Prerequisites
- Redpanda
- `near-intents-ingest`
- `dummy-reactor`
- `dummy-executor`
- `dummy-consumer`
## Bootstrap model Install on the operator workstation:
- Terraform `>= 1.6`
- `kubectl`
- `docker`
- `curl`
The current bootstrap is workstation-driven after Terraform. You also need:
cloud-init does **not** clone this repo onto the node. - a Hetzner Cloud API token
- an SSH keypair already present locally
- access to DNS for your chosen domains
- admin CIDRs that can reach the future server on `22/tcp` and `6443/tcp`
- this repo checked out locally
`scripts/hetzner/bootstrap.sh` now: ## Required bootstrap secrets and inputs
- loads config and secrets from `scripts/hetzner/bootstrap-secrets.env`
- resolves `*_PASS` values through `pass`
- runs Terraform
- configures DNS through Cloudflare or Porkbun when credentials are present
- fetches kubeconfig from the node
- renders `.state/hetzner/generated-overlay/`
- applies platform + project manifests
- bootstraps Forgejo admin/user/repo/runner state
- seeds the repo into Forgejo
- lets Forgejo Actions perform the routine image build + deploy path by default
Legacy local-image bootstrap still exists, but the default/steady-state path is Forgejo Actions. Prepare the operator env file:
## Required operator inputs
Create and source:
```bash ```bash
cp scripts/hetzner/bootstrap-secrets.env.example scripts/hetzner/bootstrap-secrets.env cp scripts/hetzner/bootstrap-secrets.env.example scripts/hetzner/bootstrap-secrets.env
${EDITOR:-vi} scripts/hetzner/bootstrap-secrets.env
```
Set at least:
- `HCLOUD_TOKEN`
- `SSH_PUBLIC_KEY_PATH`
- `TF_ADMIN_CIDR_BLOCKS`
- `BASE_DOMAIN`
- `FORGEJO_DOMAIN`
- `FORGEJO_ROOT_URL`
- `NEAR_INTENTS_API_KEY`
- `FORGEJO_RUNNER_REGISTRATION_TOKEN`
Load it into the current shell:
```bash
source scripts/hetzner/bootstrap-secrets.env source scripts/hetzner/bootstrap-secrets.env
``` ```
At minimum you need: ## Canonical bootstrap sequence
- Hetzner credentials
- SSH public key path
- public domain settings
- registry credentials
- app secret(s)
- Forgejo admin credentials
- Grafana admin credentials
Recommended: Run from repo root:
- Tailscale auth key for private admin/control-plane access
- DNS provider credentials
- `pass`-backed secret refs instead of raw env values
## Current live/public surfaces ```bash
bash scripts/hetzner/bootstrap.sh
```
- Forgejo: `https://git.doran.133011.xyz/` Current behavior of the script:
- Registry: `https://registry.doran.133011.xyz/` 1. validates local tooling
- Grafana: `https://grafana.doran.133011.xyz/` 2. runs `terraform init` and `terraform apply` in `infra/terraform/hetzner`
- Headlamp: `https://headlamp.doran.133011.xyz/` 3. reads Terraform outputs such as server IP and `k3s_api_url`
4. waits for the k3s API readiness endpoint
5. writes a local workstation kubeconfig to `.state/hetzner/kubeconfig.yaml`
6. writes overlay secret env input files and creates:
- `unrip/unrip-secrets`
- `unrip/unrip-registry-creds`
- `forgejo/forgejo-secrets`
- `registry/registry-secrets`
7. applies `deploy/k8s/platform/base/namespace.yaml` and `deploy/k8s/overlays/hetzner-single-node`
8. builds the repo bootstrap image locally
9. pushes it through the temporary local registry bridge using the active project name
10. updates and waits for rollout status in the active project namespace
## Notes After the script finishes:
- The Forgejo runner no longer reads a pre-seeded `runner_registration_token` from a secret. Bootstrap generates a one-time token in-cluster and persists the runner config on the Forgejo PVC. ```bash
- Registry auth is created imperatively during bootstrap from `REGISTRY_USERNAME` and `REGISTRY_PASSWORD`; manual overlay applies must provide `registry.htpasswd` themselves. export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
- Headlamp login uses a generated Kubernetes service-account token; bootstrap stores it in `pass` when `HEADLAMP_ADMIN_TOKEN_PASS` is configured. kubectl get nodes -o wide
- Ingress is Traefik-based. The old `ingress-nginx` path is obsolete. kubectl get pods -A
kubectl -n unrip get deploy,pods,jobs
kubectl -n forgejo get deploy,pods,svc
kubectl -n registry get pods,svc
```
## Status ## Current manifest target
This path has been rebuilt successfully and the cluster is operational, but if you want the strongest reproducibility guarantee after any new platform addition, do one more full destroy/rebuild rehearsal. Important current-state detail:
- `scripts/hetzner/bootstrap.sh` now applies `deploy/k8s/platform/base/namespace.yaml`
- it then applies `deploy/k8s/overlays/hetzner-single-node`
- bootstrap naming no longer assumes legacy `trading-system` kubeconfig contexts, image tags, or rollout namespaces
## Executor persistence in k3s
The dummy executor persists durable idempotency state.
Current persistence model:
- application path: `EXECUTOR_STATE_DIR=/var/lib/unrip/executor-state`
- cloud-init prepares the host boundary for executor storage on first boot
- Kubernetes mounts storage at that same path for the executor workload
- the Hetzner single-node overlay pins PVC-backed storage to k3s `local-path`
Operational consequence:
- executor duplicate-suppression state lives on node-backed persistent storage
- replacing the node or deleting the PVC without migration loses that history
- treat executor state as required operational data, even though the executor is still a dummy implementation
## Failure recovery runbook
### A. Bootstrap fails before infrastructure exists
Typical causes:
- invalid `HCLOUD_TOKEN`
- wrong `SSH_PUBLIC_KEY_PATH`
- malformed `TF_ADMIN_CIDR_BLOCKS`
Fix the input and rerun:
```bash
source scripts/hetzner/bootstrap-secrets.env
bash scripts/hetzner/bootstrap.sh
```
If you need to destroy partially created infrastructure:
```bash
source scripts/hetzner/bootstrap-secrets.env
bash scripts/hetzner/destroy.sh
```
### B. Terraform succeeds but cluster access is not usable
Verify the generated kubeconfig and cluster health:
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl get events -A --sort-by=.lastTimestamp | tail -n 50
```
What to suspect first:
- cloud-init still running
- k3s still starting
- bootstrap kubeconfig/auth not fully aligned yet
- public API reachable, but workloads not yet healthy
### C. Secrets were wrong or missing
The current bootstrap depends on:
- `${PROJECT_NAME:-unrip}-secrets`
- `NEAR_INTENTS_API_KEY`
- `forgejo-secrets`
- `root_url`
- `domain`
- `runner_registration_token`
Verify:
```bash
kubectl -n unrip get secret unrip-secrets
kubectl -n unrip get secret unrip-registry-creds
kubectl -n forgejo get secret forgejo-secrets
kubectl -n registry get secret registry-secrets
```
If needed, recreate them from the workstation before restarting the affected deployments.
### D. Workloads are present but not healthy
Inspect by namespace:
```bash
kubectl -n unrip get pods
kubectl -n unrip describe pod <pod-name>
kubectl -n unrip logs deploy/dummy-executor --tail=100
kubectl -n forgejo logs deploy/forgejo --tail=100
kubectl -n forgejo logs deploy/forgejo-runner --tail=100
```
Useful rollout checks:
```bash
kubectl -n unrip rollout status deployment/near-intents-ingest --timeout=300s
kubectl -n unrip rollout status deployment/dummy-reactor --timeout=300s
kubectl -n unrip rollout status deployment/dummy-executor --timeout=300s
kubectl -n unrip rollout status deployment/dummy-consumer --timeout=300s
kubectl -n forgejo rollout status deployment/forgejo --timeout=300s
kubectl -n forgejo rollout status deployment/forgejo-runner --timeout=300s
```
### E. Need to inspect Terraform outputs directly
```bash
cd infra/terraform/hetzner
terraform output
terraform output server_ipv4
terraform output server_private_ipv4
terraform output k3s_api_url
terraform output kubeconfig_strategy
```
## Self-hosted CI handoff
After the cluster is reachable and workloads are up:
1. reach Forgejo at the configured domain or by port-forward
2. perform the initial admin/bootstrap steps in Forgejo
3. create the target repository in Forgejo
4. push or mirror this repo into that Forgejo instance
5. confirm the runner is registered and healthy
6. move routine application deploys to the self-hosted pipeline, which now derives image naming and rollout targets from Forgejo repository variables instead of hard-coding the legacy project
Current repo-state caveats already known:
- first bootstrap is repo-driven from the workstation
- the bootstrap path no longer relies on SSH/scp transport in control flow
- the kubeconfig/auth result is not yet fully production-hardened
- first rollout still uses a temporary local registry bridge; routine CI deploys are intended to be registry-native and the Forgejo workflow now defaults to `unrip` while allowing per-repo overrides for image name, namespace, and deployment list
- Forgejo admin creation, repo creation, and Actions configuration still require operator action after cluster bring-up
- DNS automation is currently wired for Cloudflare when credentials are supplied during bootstrap
- TLS is expected to come from cert-manager + Let's Encrypt once ingress hostnames resolve publicly
## Terraform-only usage
If you only want the infra layer:
```bash
cd infra/terraform/hetzner
export TF_VAR_hcloud_token="<your-hetzner-token>"
export TF_VAR_ssh_public_key="$(cat ~/.ssh/id_ed25519.pub)"
export TF_VAR_admin_cidr_blocks='["203.0.113.10/32"]'
terraform init
terraform apply
```
Useful outputs:
- `server_ipv4`
- `server_private_ipv4`
- `server_name`
- `server_fqdn`
- `k3s_api_url`
- `kubeconfig_strategy`
For CI/CD details, also see:
- `docs/hetzner-k3s-bootstrap.md`
- `docs/hetzner-self-hosted-ci-runbook.md`
## Compose status
Compose is still useful for:
- local development
- fast topology debugging
- non-production single-machine testing
But it should be treated as optional/dev runtime support, not as the primary production deployment path.

View file

@ -4,6 +4,7 @@ package_upgrade: true
packages: packages:
- ca-certificates - ca-certificates
- curl - curl
- git
- gnupg - gnupg
- jq - jq
- nfs-common - nfs-common
@ -57,11 +58,17 @@ write_files:
BOOTSTRAP_PROJECT_NAME=unrip BOOTSTRAP_PROJECT_NAME=unrip
BOOTSTRAP_PROJECT_NAMESPACE=unrip BOOTSTRAP_PROJECT_NAMESPACE=unrip
K3S_KUBECONFIG=/opt/bootstrap/kubeconfig-internal.yaml K3S_KUBECONFIG=/opt/bootstrap/kubeconfig-internal.yaml
BOOTSTRAP_MANIFEST_SOURCE=operator-workstation BOOTSTRAP_REPO_DIR=/opt/unrip/repo
BOOTSTRAP_MANIFEST_DIR=/opt/unrip/repo/deploy/k8s
GITOPS_HANDOFF=seed-self-hosted-git-and-runner GITOPS_HANDOFF=seed-self-hosted-git-and-runner
EOF EOF
chmod 0644 /usr/local/share/unrip/bootstrap-metadata.env chmod 0644 /usr/local/share/unrip/bootstrap-metadata.env
install -d -m 0755 /opt/unrip
if [ ! -d /opt/unrip/repo/.git ]; then
git clone --depth 1 ${BOOTSTRAP_REPO_URL:-https://example.invalid/bootstrap-repo.git} /opt/unrip/repo || true
fi
install -d -m 0755 /opt/bootstrap install -d -m 0755 /opt/bootstrap
cp /etc/rancher/k3s/k3s.yaml /opt/bootstrap/kubeconfig-internal.yaml cp /etc/rancher/k3s/k3s.yaml /opt/bootstrap/kubeconfig-internal.yaml
chmod 0640 /opt/bootstrap/kubeconfig-internal.yaml chmod 0640 /opt/bootstrap/kubeconfig-internal.yaml
@ -72,7 +79,7 @@ write_files:
This node was provisioned by Terraform + cloud-init. This node was provisioned by Terraform + cloud-init.
Use /opt/bootstrap/kubeconfig-internal.yaml for automation. Use /opt/bootstrap/kubeconfig-internal.yaml for automation.
Bootstrap metadata lives at /usr/local/share/unrip/bootstrap-metadata.env. Bootstrap metadata lives at /usr/local/share/unrip/bootstrap-metadata.env.
Kubernetes bootstrap assets are applied from the operator workstation after provisioning. Future Kubernetes bootstrap assets should live under /opt/unrip/repo/deploy/k8s.
EOF EOF
chmod 0644 /opt/bootstrap/README.txt chmod 0644 /opt/bootstrap/README.txt

View file

@ -3,24 +3,18 @@
This directory is the repo-driven deployment target for the single-node Hetzner+k3s bootstrap. This directory is the repo-driven deployment target for the single-node Hetzner+k3s bootstrap.
## Layout ## Layout
- `base/` — platform-only compatibility kustomization - `base/` — shared bootstrap manifests plus the current `unrip` project manifests
- `platform/` — shared cluster manifests - `projects/` — conventions for hosting multiple isolated projects on the same cluster
- `projects/` — naming/layout conventions for hosted projects
- `overlays/hetzner-single-node/` — first-node overlay with concrete hostnames, local-path storage, and generated secret references - `overlays/hetzner-single-node/` — first-node overlay with concrete hostnames, local-path storage, and generated secret references
- `secrets/` — examples and instructions for supplying required secrets out-of-band - `secrets/` — examples and instructions for supplying required secrets out-of-band
The actual `unrip` project manifests now live in the separate `unrip` application repository under:
- `deploy/k8s/base/`
## Shared cluster model ## Shared cluster model
Shared platform namespaces: Shared platform namespaces:
- `forgejo` - `forgejo`
- `registry` - `registry`
- `observability` (`grafana`, `loki`, `promtail`, `headlamp`) - `ingress-nginx`
- `cert-manager` - `cert-manager`
Ingress is provided by the Traefik controller bundled with k3s. Base and overlay manifests therefore target `ingressClassName: traefik` instead of installing ingress-nginx.
Project-specific namespaces: Project-specific namespaces:
- `unrip` - `unrip`
- future projects should get their own namespace instead of sharing `unrip` - future projects should get their own namespace instead of sharing `unrip`
@ -32,9 +26,7 @@ After Terraform/cloud-init has produced a working kubeconfig, the canonical path
bash scripts/hetzner/bootstrap.sh bash scripts/hetzner/bootstrap.sh
``` ```
That script renders the Hetzner overlay inputs, creates platform and project registry auth secrets using the active project naming, and applies the generated bootstrap overlay under `.state/hetzner/generated-overlay/`. That script renders the Hetzner overlay inputs, creates platform and project registry auth secrets using the active project naming, and applies:
For a manual, fully checked-in apply path, use:
```bash ```bash
kubectl apply -k deploy/k8s/overlays/hetzner-single-node kubectl apply -k deploy/k8s/overlays/hetzner-single-node
@ -45,7 +37,6 @@ The overlay intentionally references generated or pre-created Secrets instead of
- `unrip/unrip-secrets` - `unrip/unrip-secrets`
- `unrip/unrip-registry-creds` - `unrip/unrip-registry-creds`
- `forgejo/forgejo-secrets` - `forgejo/forgejo-secrets`
- `observability/observability-secrets`
- `registry/registry-secrets` - `registry/registry-secrets`
The bootstrap script creates them from local environment variables and `pass`-resolved secrets. By default it targets the `unrip` project, but project secret env filenames, namespaces, image names, rollout targets, and registry pull-secret names are derived from `PROJECT_NAME` and `PROJECT_NAMESPACE` instead of hard-coding legacy `trading-system` values. The bootstrap script creates them from local environment variables. By default it targets the `unrip` project, but its kubeconfig context name, bootstrap image tag, project secret env filename, project namespace, and project registry secret name are derived from `PROJECT_NAME`, `PROJECT_NAMESPACE`, and `CLUSTER_NAME` instead of hard-coding legacy `trading-system` values.

View file

@ -2,3 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- ../platform/base - ../platform/base
- ../projects/unrip/base

View file

@ -2,106 +2,34 @@
This overlay turns the shared platform and `unrip` project bases into a concrete first-node bootstrap target for the Terraform-provisioned k3s VM. This overlay turns the shared platform and `unrip` project bases into a concrete first-node bootstrap target for the Terraform-provisioned k3s VM.
The checked-in overlay is the declarative template. For first-cluster bootstrap, `scripts/hetzner/bootstrap.sh` renders a generated overlay under `.state/hetzner/generated-overlay/` and applies that generated copy as the source of truth for the run. ## Before apply
Create real secret material from the examples:
## Two ways to use this overlay
### 1. Recommended: `scripts/hetzner/bootstrap.sh`
This is the intended operator workflow for a fresh Hetzner cluster. The bootstrap script renders secret and patch inputs from local env and `pass`, creates imperative registry secrets, and applies a generated Kustomize overlay.
That generated overlay now imports the platform resources from `deploy/k8s/platform/base/kustomization.yaml`, so new checked-in platform components such as observability manifests are included automatically during bootstrap instead of being silently skipped by a hard-coded file list.
Bootstrap overwrites these operator-worktree files on each run:
- `deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env`
- `deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env`
- `deploy/k8s/overlays/hetzner-single-node/secrets/observability.env`
Bootstrap also renders and applies generated copies of these patch files under `.state/hetzner/generated-overlay/` instead of modifying the checked-in overlay files directly:
- `ingress-hosts.patch.yaml`
- `issuer-email.patch.yaml`
- `storage-class.patch.yaml`
Secret/config sources when using bootstrap:
- from `pass` or direct env overrides via `scripts/hetzner/bootstrap-secrets.env`:
- `HCLOUD_TOKEN`
- `TAILSCALE_AUTH_KEY`
- `CLOUDFLARE_API_TOKEN`
- `CLOUDFLARE_ZONE_ID`
- `PORKBUN_API_KEY`
- `PORKBUN_SECRET_API_KEY`
- `REGISTRY_PASSWORD`
- `NEAR_INTENTS_API_KEY`
- `FORGEJO_ADMIN_PASSWORD`
- optional `GRAFANA_ADMIN_PASSWORD` (bootstrap generates one if omitted)
- from plain env/non-secret config in `scripts/hetzner/bootstrap-secrets.env`:
- `PUBLIC_DOMAIN`, `BASE_DOMAIN`, `FORGEJO_DOMAIN`, `FORGEJO_ROOT_URL`, `REGISTRY_DOMAIN`, `GRAFANA_DOMAIN`, `GRAFANA_ROOT_URL`, `HEADLAMP_DOMAIN`
- default hostname model under `PUBLIC_DOMAIN`: `git.${PUBLIC_DOMAIN}`, `registry.${PUBLIC_DOMAIN}`, `grafana.${PUBLIC_DOMAIN}`, `headlamp.${PUBLIC_DOMAIN}`
- `LETSENCRYPT_EMAIL`
- `REGISTRY_USERNAME`
- `FORGEJO_ADMIN_USERNAME`, `FORGEJO_ADMIN_EMAIL`
- optional `GRAFANA_ADMIN_USERNAME` (defaults to `admin`)
- optional project overrides such as `PROJECT_NAME`, `PROJECT_NAMESPACE`, and `PROJECT_SECRET_ENV_BASENAME`
Bootstrap materializes Kubernetes inputs like this:
- `secrets/unrip.env` gets `NEAR_INTENTS_API_KEY`
- `secrets/forgejo.env` gets only `root_url` and `domain`
- `secrets/observability.env` gets `grafana_admin_user`, `grafana_admin_password`, and `grafana_root_url`
- generated overlay Kustomize secret generators create `observability-secrets` in namespace `observability` alongside the project and Forgejo secrets
- `registry-secrets` in namespace `registry` is created imperatively from `REGISTRY_USERNAME` and `REGISTRY_PASSWORD`
- `<project>-registry-creds` image pull secret is created imperatively in the project namespace from the same registry credentials
Note: the Forgejo runner no longer reads `runner_registration_token` from `forgejo-secrets`. `scripts/hetzner/bootstrap.sh` generates a one-time runner token in-cluster, registers the runner, and writes `/data/forgejo-runner/.runner` on the shared Forgejo PVC before restarting the runner deployment.
### 2. Manual: `kubectl apply -k`
Use this only if you intentionally want to manage the checked-in overlay inputs yourself. In manual mode, the checked-in overlay remains the source of truth; in bootstrap mode, the generated overlay is the source of truth for what gets applied.
Before apply, create or edit real local input files:
```bash ```bash
cp deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env.example deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env cp deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env.example deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env.example deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env cp deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env.example deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/observability.env.example deploy/k8s/overlays/hetzner-single-node/secrets/observability.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd.example deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd cp deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd.example deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd
``` ```
Then update: Update:
- ingress hosts in `ingress-hosts.patch.yaml` for Forgejo, Registry, Grafana, and Headlamp - ingress hosts in `ingress-hosts.patch.yaml`
- ACME email in `issuer-email.patch.yaml` - ACME email in `issuer-email.patch.yaml`
- project secret values in `secrets/unrip.env` - project secret values in `secrets/unrip.env`
- Forgejo secret values in `secrets/forgejo.env` (`root_url` and `domain` only) - Forgejo secret values in `secrets/forgejo.env`
- observability secret values in `secrets/observability.env` (`grafana_admin_user`, `grafana_admin_password`, `grafana_root_url`) - registry htpasswd in `secrets/registry.htpasswd`
Important manual-mode caveat:
- `kubectl apply -k deploy/k8s/overlays/hetzner-single-node` creates only the Kustomize-managed secrets from the checked-in files (`unrip-secrets`, `forgejo-secrets`, `observability-secrets`, and `registry-secrets` when `secrets/registry.htpasswd` exists)
- it does **not** create the project docker-registry pull secret
- if you skip `scripts/hetzner/bootstrap.sh`, you must create that pull secret separately before expecting image pulls or CI builds to work
## Apply ## Apply
Bootstrap path:
```bash
bash scripts/hetzner/bootstrap.sh
```
Manual path:
```bash ```bash
kubectl apply -k deploy/k8s/overlays/hetzner-single-node kubectl apply -k deploy/k8s/overlays/hetzner-single-node
``` ```
## What gets installed ## What gets installed
- shared platform namespaces for registry, ingress, cert-manager, Forgejo, and observability - shared platform namespaces for registry, ingress, cert-manager, and Forgejo
- project namespace `unrip` - project namespace `unrip`
- Redpanda plus a topic bootstrap job inside `unrip` - Redpanda plus a topic bootstrap job inside `unrip`
- app worker deployments referencing `unrip-secrets` - app worker deployments referencing `unrip-secrets`
- Forgejo and Forgejo runner referencing `forgejo-secrets` - Forgejo and Forgejo runner referencing `forgejo-secrets`
- private registry workload, which still requires the imperative `registry-secrets` auth secret to be created separately unless you used `scripts/hetzner/bootstrap.sh` - private registry protected by htpasswd from `registry-secrets`
- nginx ingress and ACME issuers for TLS - nginx ingress and ACME issuers for TLS
- observability ingress for Grafana and Headlamp, plus local-path PVC overrides for Grafana and Loki
## Observability UI exposure policy
- Grafana and Headlamp are both wired into the Hetzner ingress/domain model.
- Use `grafana.${PUBLIC_DOMAIN}` / `headlamp.${PUBLIC_DOMAIN}` or explicit `GRAFANA_DOMAIN` / `HEADLAMP_DOMAIN` values.
- Grafana is the historical log search UI backed by Loki.
- Headlamp is the Kubernetes cluster UI for workloads, events, and pod logs.
- Grafana is authenticated through `observability-secrets`; Headlamp is authenticated with the generated Kubernetes service-account token that bootstrap stores in `pass` when `HEADLAMP_ADMIN_TOKEN_PASS` is configured.
For future projects, do not reuse `unrip`; create a new project namespace and matching `<project>-config`, `<project>-secrets`, and `<project>-registry-creds` resources. For future projects, do not reuse `unrip`; create a new project namespace and matching `<project>-config`, `<project>-secrets`, and `<project>-registry-creds` resources.

View file

@ -41,47 +41,3 @@ spec:
name: registry name: registry
port: port:
number: 5000 number: 5000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana
namespace: observability
spec:
tls:
- hosts:
- grafana.doran.133011.xyz
secretName: grafana-tls
rules:
- host: grafana.doran.133011.xyz
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
number: 3000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: headlamp
namespace: observability
spec:
tls:
- hosts:
- headlamp.doran.133011.xyz
secretName: headlamp-tls
rules:
- host: headlamp.doran.133011.xyz
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: headlamp
port:
number: 80

View file

@ -2,6 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- ../../platform/base - ../../platform/base
- ../../projects/unrip/base
patches: patches:
- path: ingress-hosts.patch.yaml - path: ingress-hosts.patch.yaml
- path: issuer-email.patch.yaml - path: issuer-email.patch.yaml
@ -15,10 +16,6 @@ secretGenerator:
namespace: forgejo namespace: forgejo
envs: envs:
- secrets/forgejo.env - secrets/forgejo.env
- name: observability-secrets
namespace: observability
envs:
- secrets/observability.env
- name: registry-secrets - name: registry-secrets
namespace: registry namespace: registry
files: files:

View file

@ -1,2 +1,3 @@
root_url=https://git.unrip-bootstrap.example.com/ root_url=https://git.unrip-bootstrap.example.com/
domain=git.unrip-bootstrap.example.com domain=git.unrip-bootstrap.example.com
runner_registration_token=replace-me

View file

@ -1,3 +0,0 @@
grafana_admin_user=admin
grafana_admin_password=replace-me
grafana_root_url=https://grafana.example.invalid/

View file

@ -29,19 +29,3 @@ metadata:
namespace: registry namespace: registry
spec: spec:
storageClassName: local-path storageClassName: local-path
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: loki-data
namespace: observability
spec:
storageClassName: local-path
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-data
namespace: observability
spec:
storageClassName: local-path

View file

@ -28,4 +28,4 @@ data:
} }
import /etc/coredns/custom/*.server import /etc/coredns/custom/*.server
NodeHosts: | NodeHosts: |
10.30.1.10 doran-1 10.30.1.10 unrip-1

View file

@ -15,20 +15,6 @@ spec:
spec: spec:
serviceAccountName: forgejo-runner serviceAccountName: forgejo-runner
restartPolicy: Always restartPolicy: Always
initContainers:
- name: wait-for-runner-config
image: busybox:1.36
command: ["/bin/sh", "-ec"]
args:
- >-
until [ -s /data/.runner ]; do
echo "waiting for bootstrap to write /data/.runner";
sleep 5;
done
volumeMounts:
- name: forgejo-data
mountPath: /data
subPath: forgejo-runner
containers: containers:
- name: runner - name: runner
image: code.forgejo.org/forgejo/runner:6.3.1 image: code.forgejo.org/forgejo/runner:6.3.1
@ -36,18 +22,26 @@ spec:
runAsUser: 0 runAsUser: 0
runAsGroup: 0 runAsGroup: 0
env: env:
- name: FORGEJO_RUNNER_CONFIG - name: FORGEJO_INSTANCE_URL
value: /data/.runner valueFrom:
secretKeyRef:
name: forgejo-secrets
key: root_url
- name: FORGEJO_RUNNER_REGISTRATION_TOKEN
valueFrom:
secretKeyRef:
name: forgejo-secrets
key: runner_registration_token
command: ["/bin/sh", "-lc"] command: ["/bin/sh", "-lc"]
args: args:
- >- - >-
test -s "$FORGEJO_RUNNER_CONFIG" && if [ ! -f /data/.runner ]; then
forgejo-runner daemon --config "$FORGEJO_RUNNER_CONFIG" forgejo-runner register --no-interactive --name k3s-runner --instance "$FORGEJO_INSTANCE_URL" --token "$FORGEJO_RUNNER_REGISTRATION_TOKEN" --labels "linux-amd64:host";
fi &&
forgejo-runner daemon --config /data/.runner
volumeMounts: volumeMounts:
- name: forgejo-data - name: runner-data
mountPath: /data mountPath: /data
subPath: forgejo-runner
volumes: volumes:
- name: forgejo-data - name: runner-data
persistentVolumeClaim: emptyDir: {}
claimName: forgejo-data

View file

@ -1,100 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: headlamp-admin
namespace: observability
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: headlamp-admin
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: headlamp-admin
namespace: observability
---
apiVersion: v1
kind: Secret
metadata:
name: headlamp-admin-token
namespace: observability
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
annotations:
kubernetes.io/service-account.name: headlamp-admin
type: kubernetes.io/service-account-token
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: headlamp
namespace: observability
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: headlamp
template:
metadata:
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/part-of: observability
spec:
containers:
- name: headlamp
image: ghcr.io/headlamp-k8s/headlamp:v0.41.0
args:
- -in-cluster
- -plugins-dir=/headlamp/plugins
ports:
- name: http
containerPort: 4466
readinessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 20
timeoutSeconds: 10
livenessProbe:
httpGet:
path: /
port: http
initialDelaySeconds: 30
timeoutSeconds: 10
nodeSelector:
kubernetes.io/os: linux
---
apiVersion: v1
kind: Service
metadata:
name: headlamp
namespace: observability
labels:
app.kubernetes.io/name: headlamp
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
spec:
selector:
app.kubernetes.io/name: headlamp
ports:
- name: http
port: 80
targetPort: http

View file

@ -0,0 +1,73 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: ingress-nginx-controller
namespace: ingress-nginx
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
template:
metadata:
labels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
spec:
serviceAccountName: default
containers:
- name: controller
image: registry.k8s.io/ingress-nginx/controller:v1.12.1
args:
- /nginx-ingress-controller
- --ingress-class=nginx
- --controller-class=k8s.io/ingress-nginx
- --publish-service=$(POD_NAMESPACE)/ingress-nginx-controller
- --election-id=ingress-nginx-leader
- --enable-ssl-passthrough
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
ports:
- name: http
containerPort: 80
- name: https
containerPort: 443
securityContext:
allowPrivilegeEscalation: true
capabilities:
add: ["NET_BIND_SERVICE"]
drop: ["ALL"]
readinessProbe:
httpGet:
path: /healthz
port: 10254
livenessProbe:
httpGet:
path: /healthz
port: 10254
---
apiVersion: v1
kind: Service
metadata:
name: ingress-nginx-controller
namespace: ingress-nginx
spec:
type: LoadBalancer
selector:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
ports:
- name: http
port: 80
targetPort: 80
- name: https
port: 443
targetPort: 443

View file

@ -47,53 +47,3 @@ spec:
name: registry name: registry
port: port:
number: 5000 number: 5000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana
namespace: observability
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
spec:
ingressClassName: traefik
tls:
- hosts:
- grafana.example.invalid
secretName: grafana-tls
rules:
- host: grafana.example.invalid
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
number: 3000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: headlamp
namespace: observability
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
spec:
ingressClassName: traefik
tls:
- hosts:
- headlamp.example.invalid
secretName: headlamp-tls
rules:
- host: headlamp.example.invalid
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: headlamp
port:
number: 80

View file

@ -2,11 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- namespace.yaml - namespace.yaml
- utility-namespace.yaml
- ntfy.yaml
- traefik-config.yaml - traefik-config.yaml
- observability.yaml
- headlamp.yaml
- forgejo.yaml - forgejo.yaml
- forgejo-rbac.yaml - forgejo-rbac.yaml
- forgejo-runner.yaml - forgejo-runner.yaml

View file

@ -1,5 +1,13 @@
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata:
name: unrip
labels:
app.kubernetes.io/part-of: unrip
project.pi.io/type: project
---
apiVersion: v1
kind: Namespace
metadata: metadata:
name: forgejo name: forgejo
labels: labels:
@ -15,15 +23,13 @@ metadata:
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: cert-manager name: ingress-nginx
labels: labels:
project.pi.io/type: platform project.pi.io/type: platform
--- ---
# Ingress is provided by the Traefik controller bundled with k3s.
# No separate ingress-nginx namespace is created by this base.
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: observability name: cert-manager
labels: labels:
project.pi.io/type: platform project.pi.io/type: platform

View file

@ -1,86 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: ntfy-config
namespace: utility
data:
server.yml: |
base-url: http://ntfy.utility.svc.cluster.local
cache-file: /var/cache/ntfy/cache.db
attachment-cache-dir: /var/cache/ntfy/attachments
---
apiVersion: v1
kind: Service
metadata:
name: ntfy
namespace: utility
labels:
app: ntfy
app.kubernetes.io/part-of: unrip3
spec:
type: ClusterIP
selector:
app: ntfy
ports:
- name: http
port: 80
targetPort: http
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ntfy
namespace: utility
labels:
app: ntfy
app.kubernetes.io/part-of: unrip3
spec:
replicas: 1
selector:
matchLabels:
app: ntfy
template:
metadata:
labels:
app: ntfy
app.kubernetes.io/part-of: unrip3
spec:
containers:
- name: ntfy
image: binwiederhier/ntfy:v2.21.0
imagePullPolicy: IfNotPresent
args: ["serve"]
ports:
- name: http
containerPort: 80
readinessProbe:
httpGet:
path: /v1/health
port: http
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
httpGet:
path: /v1/health
port: http
initialDelaySeconds: 20
periodSeconds: 30
resources:
requests:
cpu: 25m
memory: 64Mi
limits:
cpu: 250m
memory: 128Mi
volumeMounts:
- name: config
mountPath: /etc/ntfy
readOnly: true
- name: cache
mountPath: /var/cache/ntfy
volumes:
- name: config
configMap:
name: ntfy-config
- name: cache
emptyDir: {}

View file

@ -1,455 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: loki-config
namespace: observability
labels:
app.kubernetes.io/name: loki
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
data:
config.yaml: |
auth_enabled: false
server:
http_listen_port: 3100
common:
path_prefix: /var/loki
replication_factor: 1
ring:
kvstore:
store: inmemory
storage:
filesystem:
chunks_directory: /var/loki/chunks
rules_directory: /var/loki/rules
schema_config:
configs:
- from: 2024-01-01
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
storage_config:
filesystem:
directory: /var/loki/chunks
limits_config:
allow_structured_metadata: false
reject_old_samples: true
reject_old_samples_max_age: 48h
retention_period: 48h
compactor:
working_directory: /var/loki/compactor
retention_enabled: true
delete_request_store: filesystem
analytics:
reporting_enabled: false
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: loki-data
namespace: observability
labels:
app.kubernetes.io/name: loki
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: loki
namespace: observability
labels:
app.kubernetes.io/name: loki
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: loki
template:
metadata:
labels:
app.kubernetes.io/name: loki
app.kubernetes.io/part-of: observability
spec:
securityContext:
fsGroup: 10001
containers:
- name: loki
image: grafana/loki:3.0.0
args:
- -config.file=/etc/loki/config.yaml
ports:
- name: http
containerPort: 3100
readinessProbe:
httpGet:
path: /ready
port: http
initialDelaySeconds: 10
periodSeconds: 10
livenessProbe:
httpGet:
path: /ready
port: http
initialDelaySeconds: 30
periodSeconds: 15
volumeMounts:
- name: config
mountPath: /etc/loki
readOnly: true
- name: data
mountPath: /var/loki
volumes:
- name: config
configMap:
name: loki-config
- name: data
persistentVolumeClaim:
claimName: loki-data
---
apiVersion: v1
kind: Service
metadata:
name: loki
namespace: observability
labels:
app.kubernetes.io/name: loki
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
spec:
selector:
app.kubernetes.io/name: loki
ports:
- name: http
port: 3100
targetPort: http
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: promtail
namespace: observability
labels:
app.kubernetes.io/name: promtail
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: promtail
labels:
app.kubernetes.io/name: promtail
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
- namespaces
verbs:
- get
- list
- watch
- apiGroups: ["discovery.k8s.io"]
resources:
- endpointslices
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: promtail
labels:
app.kubernetes.io/name: promtail
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: promtail
subjects:
- kind: ServiceAccount
name: promtail
namespace: observability
---
apiVersion: v1
kind: ConfigMap
metadata:
name: promtail-config
namespace: observability
labels:
app.kubernetes.io/name: promtail
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
data:
config.yaml: |
server:
http_listen_port: 3101
grpc_listen_port: 0
positions:
filename: /run/promtail/positions.yaml
clients:
- url: http://loki.observability.svc.cluster.local:3100/loki/api/v1/push
scrape_configs:
- job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
pipeline_stages:
- cri: {}
relabel_configs:
- source_labels:
- __meta_kubernetes_namespace
regex: kube-system|observability
action: drop
- source_labels:
- __meta_kubernetes_pod_node_name
target_label: __host__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- action: replace
source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_pod_name
separator: /
replacement: $1
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
separator: /
replacement: /var/log/pods/*$1/*.log
target_label: __path__
- action: replace
source_labels:
- __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash
- __meta_kubernetes_pod_annotation_kubernetes_io_config_hash
- __meta_kubernetes_pod_container_name
regex: true/(.*)
separator: /
replacement: /var/log/pods/*$1/*.log
target_label: __path__
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: promtail
namespace: observability
labels:
app.kubernetes.io/name: promtail
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
spec:
selector:
matchLabels:
app.kubernetes.io/name: promtail
template:
metadata:
labels:
app.kubernetes.io/name: promtail
app.kubernetes.io/part-of: observability
spec:
serviceAccountName: promtail
containers:
- name: promtail
image: grafana/promtail:3.0.0
args:
- -config.file=/etc/promtail/config.yaml
env:
- name: HOSTNAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
ports:
- name: http
containerPort: 3101
securityContext:
runAsUser: 0
runAsGroup: 0
volumeMounts:
- name: config
mountPath: /etc/promtail
readOnly: true
- name: run
mountPath: /run/promtail
- name: varlog
mountPath: /var/log
readOnly: true
volumes:
- name: config
configMap:
name: promtail-config
- name: run
emptyDir: {}
- name: varlog
hostPath:
path: /var/log
type: Directory
---
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-datasources
namespace: observability
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
data:
loki.yaml: |
apiVersion: 1
datasources:
- name: Loki
type: loki
access: proxy
url: http://loki.observability.svc.cluster.local:3100
isDefault: true
editable: false
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-data
namespace: observability
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 5Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana
namespace: observability
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: grafana
template:
metadata:
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: observability
spec:
securityContext:
fsGroup: 472
containers:
- name: grafana
image: grafana/grafana:11.0.0
env:
- name: GF_SECURITY_ADMIN_USER
valueFrom:
secretKeyRef:
name: observability-secrets
key: grafana_admin_user
- name: GF_SECURITY_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: observability-secrets
key: grafana_admin_password
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "false"
- name: GF_USERS_ALLOW_SIGN_UP
value: "false"
- name: GF_EXPLORE_ENABLED
value: "true"
- name: GF_SERVER_ROOT_URL
valueFrom:
secretKeyRef:
name: observability-secrets
key: grafana_root_url
ports:
- name: http
containerPort: 3000
readinessProbe:
httpGet:
path: /api/health
port: http
initialDelaySeconds: 10
periodSeconds: 10
livenessProbe:
httpGet:
path: /api/health
port: http
initialDelaySeconds: 30
periodSeconds: 15
volumeMounts:
- name: data
mountPath: /var/lib/grafana
- name: datasources
mountPath: /etc/grafana/provisioning/datasources
readOnly: true
volumes:
- name: data
persistentVolumeClaim:
claimName: grafana-data
- name: datasources
configMap:
name: grafana-datasources
---
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: observability
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: observability
project.pi.io/type: platform
spec:
selector:
app.kubernetes.io/name: grafana
ports:
- name: http
port: 3000
targetPort: 3000

View file

@ -1,7 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: utility
labels:
app.kubernetes.io/part-of: unrip3
project.pi.io/type: utility

View file

@ -1,23 +1,19 @@
# Projects on the shared cluster # Projects on the shared cluster
This cluster is intended to host multiple independent projects. This cluster is intended to host multiple independent projects.
This directory documents project conventions; actual project code/manifests live in their own application repositories.
## Pattern ## Pattern
- shared platform namespaces: - shared platform namespaces:
- `forgejo` - `forgejo`
- `registry` - `registry`
- `observability` - `ingress-nginx`
- `cert-manager` - `cert-manager`
- shared ingress model:
- use the k3s-bundled Traefik controller
- project Ingress resources should set `ingressClassName: traefik`
- per-project namespaces: - per-project namespaces:
- `unrip` - `unrip`
- future examples: `project-foo`, `project-bar` - future examples: `project-foo`, `project-bar`
## How to add another project ## How to add another project
For each new project, create a project manifest set in that project's own repository, similar to the `unrip` app repo's `deploy/k8s/base/`: For each new project, create a project manifest set similar to `deploy/k8s/base/unrip.yaml`:
- one namespace - one namespace
- one project config map - one project config map
- one secret name unique to the project - one secret name unique to the project
@ -33,7 +29,7 @@ Recommended naming convention:
- persistent host path/app state: `/var/lib/<project>/...` - persistent host path/app state: `/var/lib/<project>/...`
- app image: `registry.<domain>/<project>:<tag>` - app image: `registry.<domain>/<project>:<tag>`
## Current deployed app ## Current project in this repo
- project name: `unrip` - project name: `unrip`
- namespace: `unrip` - namespace: `unrip`
- project manifest: in the separate `unrip` app repo under `deploy/k8s/base/` - project manifest: `deploy/k8s/base/unrip.yaml`

View file

@ -0,0 +1,34 @@
apiVersion: batch/v1
kind: Job
metadata:
name: redpanda-topic-bootstrap
namespace: unrip
spec:
backoffLimit: 6
template:
spec:
restartPolicy: OnFailure
containers:
- name: bootstrap-topics
image: docker.redpanda.com/redpandadata/redpanda:v24.3.9
command: ["/bin/sh", "-lc"]
args:
- |
set -eu
BROKERS="redpanda.unrip.svc.cluster.local:9092"
TOPICS="raw.near_intents.quote norm.swap_demand cmd.execute_trade exec.trade_result"
echo "waiting for Redpanda at ${BROKERS}"
until rpk cluster info --brokers "$BROKERS" >/dev/null 2>&1; do
sleep 2
done
for topic in $TOPICS; do
if rpk topic describe "$topic" --brokers "$BROKERS" >/dev/null 2>&1; then
echo "topic already exists: $topic"
continue
fi
echo "creating topic: $topic"
rpk topic create --brokers "$BROKERS" --partitions 1 --replicas 1 "$topic"
done

View file

@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- redpanda.yaml
- unrip.yaml
- bootstrap-job.yaml

View file

@ -0,0 +1,91 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: redpanda-data
namespace: unrip
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: redpanda
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: redpanda
template:
metadata:
labels:
app: redpanda
app.kubernetes.io/part-of: unrip
spec:
containers:
- name: redpanda
image: docker.redpanda.com/redpandadata/redpanda:v24.3.9
args:
- redpanda
- start
- --overprovisioned
- --smp
- "1"
- --memory
- "1G"
- --reserve-memory
- "0M"
- --node-id
- "0"
- --check=false
- --set
- redpanda.auto_create_topics_enabled=false
- --kafka-addr
- internal://0.0.0.0:9092
- --advertise-kafka-addr
- internal://redpanda.unrip.svc.cluster.local:9092
- --pandaproxy-addr
- internal://0.0.0.0:8082
- --advertise-pandaproxy-addr
- internal://redpanda.unrip.svc.cluster.local:8082
ports:
- name: kafka
containerPort: 9092
- name: proxy
containerPort: 8082
readinessProbe:
tcpSocket:
port: 9092
initialDelaySeconds: 10
periodSeconds: 10
livenessProbe:
tcpSocket:
port: 9092
initialDelaySeconds: 30
periodSeconds: 15
volumeMounts:
- name: redpanda-data
mountPath: /var/lib/redpanda/data
volumes:
- name: redpanda-data
persistentVolumeClaim:
claimName: redpanda-data
---
apiVersion: v1
kind: Service
metadata:
name: redpanda
namespace: unrip
spec:
selector:
app: redpanda
ports:
- name: kafka
port: 9092
targetPort: 9092
- name: proxy
port: 8082
targetPort: 8082

View file

@ -0,0 +1,152 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: unrip-config
namespace: unrip
data:
NEAR_INTENTS_WS_URL: wss://solver-relay-v2.chaindefuser.com/ws
KAFKA_BROKERS: redpanda.unrip.svc.cluster.local:9092
KAFKA_CLIENT_ID: unrip
KAFKA_TOPIC_RAW_NEAR_INTENTS_QUOTE: raw.near_intents.quote
KAFKA_TOPIC_NORM_SWAP_DEMAND: norm.swap_demand
KAFKA_TOPIC_CMD_EXECUTE_TRADE: cmd.execute_trade
KAFKA_TOPIC_EXEC_TRADE_RESULT: exec.trade_result
KAFKA_CONSUMER_GROUP_DUMMY: dummy-reactor-v1
KAFKA_CONSUMER_GROUP_EXECUTOR: dummy-executor-v1
EXECUTOR_STATE_DIR: /var/lib/unrip/executor-state
PROJECT_NAME: unrip
PROJECT_NAMESPACE: unrip
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: executor-state
namespace: unrip
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 5Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: near-intents-ingest
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: near-intents-ingest
template:
metadata:
labels:
app: near-intents-ingest
app.kubernetes.io/part-of: unrip
spec:
imagePullSecrets:
- name: unrip-registry-creds
containers:
- name: app
image: ghcr.io/example/unrip:bootstrap
imagePullPolicy: IfNotPresent
command: ["node", "src/apps/near-intents-ingest.mjs"]
envFrom:
- configMapRef:
name: unrip-config
- secretRef:
name: unrip-secrets
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dummy-reactor
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: dummy-reactor
template:
metadata:
labels:
app: dummy-reactor
app.kubernetes.io/part-of: unrip
spec:
imagePullSecrets:
- name: unrip-registry-creds
containers:
- name: app
image: ghcr.io/example/unrip:bootstrap
imagePullPolicy: IfNotPresent
command: ["node", "src/apps/dummy-reactor.mjs"]
envFrom:
- configMapRef:
name: unrip-config
- secretRef:
name: unrip-secrets
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dummy-executor
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: dummy-executor
template:
metadata:
labels:
app: dummy-executor
app.kubernetes.io/part-of: unrip
spec:
imagePullSecrets:
- name: unrip-registry-creds
containers:
- name: app
image: ghcr.io/example/unrip:bootstrap
imagePullPolicy: IfNotPresent
command: ["node", "src/apps/dummy-executor.mjs"]
envFrom:
- configMapRef:
name: unrip-config
- secretRef:
name: unrip-secrets
volumeMounts:
- name: executor-state
mountPath: /var/lib/unrip/executor-state
volumes:
- name: executor-state
persistentVolumeClaim:
claimName: executor-state
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dummy-consumer
namespace: unrip
spec:
replicas: 1
selector:
matchLabels:
app: dummy-consumer
template:
metadata:
labels:
app: dummy-consumer
app.kubernetes.io/part-of: unrip
spec:
imagePullSecrets:
- name: unrip-registry-creds
containers:
- name: app
image: ghcr.io/example/unrip:bootstrap
imagePullPolicy: IfNotPresent
command: ["node", "src/apps/dummy-consumer.mjs"]
envFrom:
- configMapRef:
name: unrip-config
- secretRef:
name: unrip-secrets

View file

@ -1,6 +1,6 @@
# Required Kubernetes secrets # Required Kubernetes secrets
Base manifests and the Hetzner single-node overlay both expect secrets to be supplied out-of-band. The Hetzner overlay generates `unrip/unrip-secrets`, `forgejo/forgejo-secrets`, `observability/observability-secrets`, and `registry/registry-secrets` from local files. Base manifests and the Hetzner single-node overlay both expect secrets to be supplied out-of-band. The Hetzner overlay generates `unrip/unrip-secrets`, `forgejo/forgejo-secrets`, and `registry/registry-secrets` from local files.
## Required secrets ## Required secrets
- `unrip/unrip-secrets` - `unrip/unrip-secrets`
@ -8,12 +8,9 @@ Base manifests and the Hetzner single-node overlay both expect secrets to be sup
- `forgejo/forgejo-secrets` - `forgejo/forgejo-secrets`
- `root_url` - `root_url`
- `domain` - `domain`
- `runner_registration_token`
- `registry/registry-secrets` - `registry/registry-secrets`
- `htpasswd` - `htpasswd`
- `observability/observability-secrets`
- `grafana_admin_user`
- `grafana_admin_password`
- `grafana_root_url`
## Overlay-driven generation ## Overlay-driven generation
The `deploy/k8s/overlays/hetzner-single-node` overlay can generate these from local files via `secretGenerator`. The `deploy/k8s/overlays/hetzner-single-node` overlay can generate these from local files via `secretGenerator`.
@ -23,15 +20,10 @@ Example workflow:
```bash ```bash
cp deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env.example deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env cp deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env.example deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env.example deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env cp deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env.example deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/observability.env.example deploy/k8s/overlays/hetzner-single-node/secrets/observability.env
cp deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd.example deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd cp deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd.example deploy/k8s/overlays/hetzner-single-node/secrets/registry.htpasswd
kubectl apply -k deploy/k8s/overlays/hetzner-single-node kubectl apply -k deploy/k8s/overlays/hetzner-single-node
``` ```
The Forgejo runner no longer expects a pre-seeded `runner_registration_token` secret; `scripts/hetzner/bootstrap.sh` generates a one-time token in-cluster, registers the runner, stores the resulting `/data/.runner` config on the `forgejo-runner-data` PVC, and then restarts the deployment.
Headlamp login is different: its Kubernetes service-account token is generated in-cluster from `deploy/k8s/platform/base/headlamp.yaml` and bootstrap can optionally store that token in `pass` via `HEADLAMP_ADMIN_TOKEN_PASS`. It is not sourced from a checked-in env file.
For future projects, follow the same convention with project-specific secret names in project-specific namespaces. For future projects, follow the same convention with project-specific secret names in project-specific namespaces.
Do not commit populated secret files. Do not commit populated secret files.

View file

@ -0,0 +1,4 @@
raw.near_intents.quote
norm.swap_demand
cmd.execute_trade
exec.trade_result

View file

@ -1,18 +1,105 @@
# Historical bootstrap status report Status: partially successful, not fully healthy yet.
This file is retained only as an archive of an early, partially successful bootstrap attempt. What worked
It does **not** describe the current cluster state or the current canonical bootstrap flow. - Hetzner VM provisioned
- k3s installed and running
- node is `Ready`
- namespaces created
- Forgejo is up
- registry is up
- Redpanda is up
- `near-intents-ingest` is up
For current operator documentation, use: What is still broken
- `docs/hetzner-k3s-bootstrap.md` - `dummy-reactor`, `dummy-executor`, `dummy-consumer` are failing because Kafka/Redpanda topic metadata is not healthy yet:
- `docs/hetzner-self-hosted-ci-runbook.md` - `This server does not host this topic-partition`
- `docs/k8s-observability.md` - ingress-nginx is crashing
- `docs/hetzner-rebuild-pipeline.md` - cert-manager webhook/cainjector are crashing
- so public HTTPS ingress is not ready
- therefore Git/registry/CI are not yet usable via domain names
Current reality has moved past the failures described in the old report: So the honest report is:
- Traefik is the active ingress path - cluster bootstrap succeeded
- cert-manager is healthy - platform/app stack is only partially healthy
- Forgejo, registry, Grafana, and Headlamp are reachable - we still need another fix pass before calling this “working”
- Forgejo Actions is the default deployment path
If you need a historical failure log, use Git history for earlier revisions of this file. How to interact with it right now
1. Use kubectl
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl -n unrip get pods
kubectl -n forgejo get pods,svc
kubectl -n registry get pods,svc
```
2. Access Forgejo right now
Since ingress is broken, use port-forward:
```bash
kubectl -n forgejo port-forward svc/forgejo 3000:3000
```
Then open:
```text
http://127.0.0.1:3000
```
3. Access the registry right now
Also via port-forward:
```bash
kubectl -n registry port-forward svc/registry 5000:5000
```
Then from your machine:
```bash
docker login 127.0.0.1:5000 -u unrip
```
And push/pull like:
```bash
docker tag unrip:bootstrap 127.0.0.1:5000/unrip:test
docker push 127.0.0.1:5000/unrip:test
```
4. Watch logs
```bash
kubectl -n unrip logs deploy/near-intents-ingest -f
kubectl -n unrip logs deploy/dummy-reactor -f
kubectl -n unrip logs deploy/dummy-executor -f
kubectl -n unrip logs deploy/dummy-consumer -f
kubectl -n forgejo logs deploy/forgejo -f
kubectl -n registry logs deploy/registry -f
```
How Git would work once Forgejo is usable
After port-forward or later ingress:
```bash
git remote add forgejo http://127.0.0.1:3000/<owner>/<repo>.git
git push forgejo main
```
How CI/CD is supposed to work
Intended flow:
1. code lives in Forgejo
2. Forgejo runner executes `.forgejo/workflows/deploy.yml`
3. workflow builds image
4. pushes image to registry
5. updates `unrip` deployments in Kubernetes
Current reality:
- not ready yet
- because ingress/cert-manager are unhealthy
- and we havent verified a full Forgejo runner deploy cycle
Bottom line
- Kubernetes cluster: yes
- server provisioning: yes
- basic platform pieces: partially
- usable Git/CI/CD stack: not yet
- unrip app pipeline: not yet
Most important next fixes
1. fix k3s manifest/platform issues:
- ingress-nginx RBAC/crash
- cert-manager install/CRDs/RBAC
2. fix Redpanda/topic metadata issue so reactor/executor/consumer run
3. only then wire Forgejo + registry + CI as usable

View file

@ -1,5 +1,85 @@
# Moved # Event contracts
This project-specific document moved to: ## Envelope
All bus messages use this envelope:
- `projects/unrip/docs/contracts.md` ```json
{
"event_id": "string",
"event_type": "string",
"venue": "string",
"source": "string|null",
"schema_version": 1,
"observed_at": "ISO-8601|null",
"ingested_at": "ISO-8601",
"payload": {},
"raw": {}
}
```
## Topics
Current canonical topic set:
- `raw.near_intents.quote`
- `norm.swap_demand`
- `cmd.execute_trade`
- `exec.trade_result`
In Kubernetes bootstrap, Redpanda topic creation is currently handled by the repo-managed bootstrap job applied with the manifest set.
## `raw.near_intents.quote`
- `event_type`: `near_intents_quote_raw`
- `payload.message`: original venue-native payload
- `raw`: original venue-native payload
## `norm.swap_demand`
- `event_type`: `swap_demand`
- payload:
- `quote_id`
- `asset_in`
- `asset_out`
- `amount_in`
- `amount_out`
- `ttl_ms`
## `cmd.execute_trade`
- `event_type`: `execute_trade`
- payload:
- `command_id`
- `idempotency_key`
- `execution_key`
- `quote_id`
- `asset_in`
- `asset_out`
- `amount_in`
- `amount_out`
- `reason`
## `exec.trade_result`
- `event_type`: `trade_result`
- payload:
- `command_id`
- `idempotency_key`
- `execution_key`
- `quote_id`
- `status`
- `result_code`
- `note`
## Executor idempotency model
- `command_id` is unique per trade command and currently deterministic as `cmd-${quote_id}`
- `idempotency_key` is stable for semantic duplicate detection and currently `${venue}:${quote_id}`
- `execution_key` is the stable partition key and currently `${venue}:${asset_in}->${asset_out}`
- executor persists command state on durable storage before publishing a result
- already-completed `command_id`s are skipped on replay or restart
- if a command is seen again after a persisted `processing` state, the executor emits a recovered result path instead of blindly duplicating work
## Deployment and persistence implications
These contracts are tied to deployment behavior:
- executor duplicate suppression depends on durable persistence at `EXECUTOR_STATE_DIR`
- local Compose mounts that path for development/runtime testing
- the Hetzner single-node k3s path mounts persistent storage for the executor at `/var/lib/unrip/executor-state`
- in the current single-node target, that persistence is node-backed and should be treated as required operational state
Operational consequence:
- deleting the executor PVC or losing the node without migration discards idempotency history
- that can allow already-seen commands to be treated as new after recovery

View file

@ -11,7 +11,6 @@ Goal: provision and deploy everything from this repo to a single Hetzner machine
- trading system services - trading system services
- private registry - private registry
- Forgejo - Forgejo
- Loki + Promtail + Grafana + Headlamp observability
- k3s-bundled Traefik ingress resources - k3s-bundled Traefik ingress resources
- cert-manager - cert-manager
- ACME issuers - ACME issuers
@ -20,17 +19,14 @@ Goal: provision and deploy everything from this repo to a single Hetzner machine
- optionally creates DNS records via Cloudflare or Porkbun - optionally creates DNS records via Cloudflare or Porkbun
- fetches the real kubeconfig from the node - fetches the real kubeconfig from the node
- writes overlay secrets/host patches from local env - writes overlay secrets/host patches from local env
- renders `.state/hetzner/generated-overlay/` from the checked-in Hetzner overlay template plus `deploy/k8s/platform/base/kustomization.yaml` - applies the Hetzner single-node k8s overlay from the operator workstation checkout
- applies that generated overlay from the operator workstation checkout
- builds the current app image locally - builds the current app image locally
- imports the bootstrap image into k3s for the first rollout - imports the bootstrap image into k3s for the first rollout
## Files ## Files
- `infra/terraform/hetzner/` - `infra/terraform/hetzner/`
- `deploy/k8s/platform/` - `deploy/k8s/base/`
- `deploy/k8s/overlays/hetzner-single-node/` - `deploy/k8s/overlays/hetzner-single-node/`
- `projects/unrip/deploy/k8s/base/`
- `projects/unrip/`
- `scripts/hetzner/bootstrap.sh` - `scripts/hetzner/bootstrap.sh`
- `scripts/hetzner/configure-cloudflare-dns.sh` - `scripts/hetzner/configure-cloudflare-dns.sh`
- `scripts/hetzner/destroy.sh` - `scripts/hetzner/destroy.sh`
@ -71,14 +67,11 @@ The mapping file should contain non-secret config plus `pass` entry references f
When you run `scripts/hetzner/bootstrap.sh`, it uses this file to materialize local Kubernetes inputs before apply: When you run `scripts/hetzner/bootstrap.sh`, it uses this file to materialize local Kubernetes inputs before apply:
- overwrites `deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env` with `NEAR_INTENTS_API_KEY` - overwrites `deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env` with `NEAR_INTENTS_API_KEY`
- overwrites `deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env` with Forgejo `root_url` and `domain` - overwrites `deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env` with Forgejo `root_url` and `domain`
- overwrites `deploy/k8s/overlays/hetzner-single-node/secrets/observability.env` with Grafana bootstrap credentials and root URL - renders generated ingress and issuer patch files under `.state/hetzner/generated-overlay/`
- renders `.state/hetzner/generated-overlay/` as the bootstrap-time source of truth
- copies the checked-in overlay patch behavior into that generated overlay
- imports platform resources from `deploy/k8s/platform/base/kustomization.yaml`, so newly added platform modules such as observability manifests are included automatically
- creates `registry-secrets` in namespace `registry` from `REGISTRY_USERNAME` and `REGISTRY_PASSWORD` - creates `registry-secrets` in namespace `registry` from `REGISTRY_USERNAME` and `REGISTRY_PASSWORD`
- creates the project docker-registry pull secret in `PROJECT_NAMESPACE` from the same registry credentials - creates the project docker-registry pull secret in `PROJECT_NAMESPACE` from the same registry credentials
This is different from running `kubectl apply -k deploy/k8s/overlays/hetzner-single-node` manually: plain Kustomize apply only consumes the checked-in overlay files, while bootstrap applies the generated overlay copy. Manual apply still only reads the checked-in files and does not read `scripts/hetzner/bootstrap-secrets.env` or create the imperative registry auth secrets on its own. This is different from running `kubectl apply -k deploy/k8s/overlays/hetzner-single-node` manually: plain Kustomize apply only consumes the checked-in overlay files and only generates `unrip-secrets` and `forgejo-secrets`. It does not create registry auth secrets and does not read `scripts/hetzner/bootstrap-secrets.env` on its own.
Required values: Required values:
- `HCLOUD_TOKEN_PASS` or `HCLOUD_TOKEN` - `HCLOUD_TOKEN_PASS` or `HCLOUD_TOKEN`
@ -92,9 +85,6 @@ Required values:
- `FORGEJO_DOMAIN` - `FORGEJO_DOMAIN`
- `FORGEJO_ROOT_URL` - `FORGEJO_ROOT_URL`
- `REGISTRY_DOMAIN` - `REGISTRY_DOMAIN`
- `GRAFANA_DOMAIN`
- `GRAFANA_ROOT_URL`
- `HEADLAMP_DOMAIN`
- `LETSENCRYPT_EMAIL` - `LETSENCRYPT_EMAIL`
- `REGISTRY_USERNAME` - `REGISTRY_USERNAME`
- `REGISTRY_PASSWORD_PASS` or `REGISTRY_PASSWORD` - `REGISTRY_PASSWORD_PASS` or `REGISTRY_PASSWORD`
@ -102,11 +92,7 @@ Required values:
- `FORGEJO_ADMIN_USERNAME` - `FORGEJO_ADMIN_USERNAME`
- `FORGEJO_ADMIN_EMAIL` - `FORGEJO_ADMIN_EMAIL`
- `FORGEJO_ADMIN_PASSWORD_PASS` or `FORGEJO_ADMIN_PASSWORD` - `FORGEJO_ADMIN_PASSWORD_PASS` or `FORGEJO_ADMIN_PASSWORD`
- `GRAFANA_ADMIN_USERNAME` (defaults to `admin`)
- `GRAFANA_ADMIN_PASSWORD_PASS` or `GRAFANA_ADMIN_PASSWORD`
- optional `HEADLAMP_ADMIN_TOKEN_PASS` for storing the generated Headlamp login token back into `pass`
- optional repo settings: `FORGEJO_REPO_OWNER`, `FORGEJO_REPO_NAME`, `FORGEJO_REPO_PRIVATE` - optional repo settings: `FORGEJO_REPO_OWNER`, `FORGEJO_REPO_NAME`, `FORGEJO_REPO_PRIVATE`
- optional project path settings: `PROJECT_DIR`, `PROJECT_KUSTOMIZE_PATH`
Optional for automatic DNS: Optional for automatic DNS:
- Cloudflare: - Cloudflare:
@ -129,10 +115,7 @@ Outputs:
- kubeconfig written to `.state/hetzner/kubeconfig.yaml` - kubeconfig written to `.state/hetzner/kubeconfig.yaml`
- CI kubeconfig written to `.state/hetzner/kubeconfig.incluster.yaml` - CI kubeconfig written to `.state/hetzner/kubeconfig.incluster.yaml`
- overlay secrets and ingress host patches rendered from local env / `pass` - overlay secrets and ingress host patches rendered from local env / `pass`
- `.state/hetzner/generated-overlay/` rendered and applied as the canonical bootstrap manifest set for that run - namespaces, Redpanda, app deployments, Forgejo, registry, Traefik-targeted ingress resources, cert-manager, and issuers applied
- namespaces, Redpanda, app deployments, Forgejo, registry, Traefik-targeted ingress resources, cert-manager, issuers, and any additional platform resources referenced by `deploy/k8s/platform/base/kustomization.yaml` applied
- Headlamp is deployed and wired to the configured public hostname model
- bootstrap stores the generated Headlamp service-account token in `pass` when `HEADLAMP_ADMIN_TOKEN_PASS` is configured
- Forgejo admin account created automatically if missing - Forgejo admin account created automatically if missing
- Forgejo runner registration is generated automatically from inside the Forgejo pod and the resulting `/data/.runner` config is stored under the shared `forgejo-data` persistent volume used by the runner deployment - Forgejo runner registration is generated automatically from inside the Forgejo pod and the resulting `/data/.runner` config is stored under the shared `forgejo-data` persistent volume used by the runner deployment
- Forgejo repository created automatically in either the admin user's namespace or a pre-existing organization named by `FORGEJO_REPO_OWNER` - Forgejo repository created automatically in either the admin user's namespace or a pre-existing organization named by `FORGEJO_REPO_OWNER`
@ -150,18 +133,15 @@ Recommended mode:
## DNS and TLS ## DNS and TLS
If DNS provider credentials are present, bootstrap updates: If DNS provider credentials are present, bootstrap updates:
- `${PUBLIC_DOMAIN}` - `${BASE_DOMAIN}`
- `git.${PUBLIC_DOMAIN}` - `git.${BASE_DOMAIN}`
- `registry.${PUBLIC_DOMAIN}` - `registry.${BASE_DOMAIN}`
- `grafana.${PUBLIC_DOMAIN}`
- `headlamp.${PUBLIC_DOMAIN}`
Supported scripted providers: Supported scripted providers:
- Cloudflare - Cloudflare
- Porkbun - Porkbun
TLS is handled in-cluster by cert-manager using Let's Encrypt issuers and the rendered ingress hosts. TLS is handled in-cluster by cert-manager using Let's Encrypt issuers and the rendered ingress hosts.
Grafana and Headlamp are both wired into the public hostname model by default. Keep Grafana authenticated, and treat the Headlamp token as an operator credential.
The platform base assumes the default k3s Traefik ingress controller is present; it does not install ingress-nginx. The platform base assumes the default k3s Traefik ingress controller is present; it does not install ingress-nginx.
For clean-cluster applies, the base kustomization now includes cert-manager before the `ClusterIssuer` resources so the issuer CRs can be created in the same bootstrap flow. For clean-cluster applies, the base kustomization now includes cert-manager before the `ClusterIssuer` resources so the issuer CRs can be created in the same bootstrap flow.
@ -171,8 +151,6 @@ KUBECONFIG=.state/hetzner/kubeconfig.yaml kubectl get pods -A
bash scripts/k8s/logs.sh bash scripts/k8s/logs.sh
``` ```
For the web log UI and observability stack, see `docs/k8s-observability.md`.
## Self-hosted CI/CD handoff ## Self-hosted CI/CD handoff
Default bootstrap now automates the Forgejo handoff: Default bootstrap now automates the Forgejo handoff:
1. create the Forgejo repo in the admin namespace or in a pre-existing organization named by `FORGEJO_REPO_OWNER` 1. create the Forgejo repo in the admin namespace or in a pre-existing organization named by `FORGEJO_REPO_OWNER`
@ -190,7 +168,7 @@ Default bootstrap now automates the Forgejo handoff:
The workflow then: The workflow then:
- starts a Kubernetes Job in the target namespace - starts a Kubernetes Job in the target namespace
- checks out the repo inside that Job using the Forgejo job token via `Authorization: Bearer ...` HTTP auth - checks out the repo inside that Job using the Forgejo job token via `Authorization: Bearer ...` HTTP auth
- uses Kaniko plus the Kubernetes registry auth secret to build and push `${REGISTRY_DOMAIN}/${PROJECT_NAME}:${GIT_SHA}` from `PROJECT_PATH` inside the repo checkout - uses Kaniko plus the Kubernetes registry auth secret to build and push `${REGISTRY_DOMAIN}/${PROJECT_NAME}:${GIT_SHA}`
- updates the app deployments in `PROJECT_NAMESPACE` - updates the app deployments in `PROJECT_NAMESPACE`
- waits for rollout - waits for rollout
@ -220,7 +198,7 @@ bash scripts/hetzner/destroy.sh
`destroy.sh` reads `HCLOUD_TOKEN`, optional `TAILSCALE_AUTH_KEY`, optional DNS provider credentials, and optional Forgejo admin credentials via the same `*_PASS` mapping mechanism as bootstrap. `destroy.sh` reads `HCLOUD_TOKEN`, optional `TAILSCALE_AUTH_KEY`, optional DNS provider credentials, and optional Forgejo admin credentials via the same `*_PASS` mapping mechanism as bootstrap.
It uses the same Terraform inputs as bootstrap for the infrastructure resources, then can optionally: It uses the same Terraform inputs as bootstrap for the infrastructure resources, then can optionally:
- delete the scripted DNS records for `${PUBLIC_DOMAIN}`, `git.${PUBLIC_DOMAIN}`, `registry.${PUBLIC_DOMAIN}`, `grafana.${PUBLIC_DOMAIN}`, and `headlamp.${PUBLIC_DOMAIN}` - delete the scripted DNS records for `${BASE_DOMAIN}`, `git.${BASE_DOMAIN}`, and `registry.${BASE_DOMAIN}`
- remove local bootstrap artifacts under `.state/hetzner/`, `deploy/k8s/overlays/hetzner-single-node/generated/`, and the local Terraform working/state files in `infra/terraform/hetzner/` - remove local bootstrap artifacts under `.state/hetzner/`, `deploy/k8s/overlays/hetzner-single-node/generated/`, and the local Terraform working/state files in `infra/terraform/hetzner/`
- delete the bootstrap-managed Forgejo repository via the Forgejo API - delete the bootstrap-managed Forgejo repository via the Forgejo API

View file

@ -1,117 +0,0 @@
# Hetzner rebuild pipeline map
This document summarizes the currently intended rebuild flow for the repo-driven Hetzner single-node cluster.
It is a companion to the operator runbooks, not a competing source of truth.
Use these first for exact commands and required env:
- `docs/hetzner-k3s-bootstrap.md`
- `docs/hetzner-self-hosted-ci-runbook.md`
- `docs/k8s-observability.md`
## High-level rebuild sequence
1. prepare `scripts/hetzner/bootstrap-secrets.env`
2. source it so `*_PASS` mappings resolve through `pass`
3. optionally run `scripts/hetzner/destroy.sh`
4. run `scripts/hetzner/bootstrap.sh`
5. let bootstrap:
- provision/update Hetzner infra with Terraform
- configure DNS when provider credentials are present
- fetch the real kubeconfig from the node
- render `.state/hetzner/generated-overlay/`
- apply platform + project manifests
- bootstrap Forgejo admin, runner, repo, and Actions configuration
- seed the repo into Forgejo
- trigger the normal Forgejo Actions build/push/deploy path
6. verify public/operator surfaces:
- Forgejo
- registry
- Grafana
- Headlamp
7. verify workload health and CI success
## Ownership boundaries
### Terraform owns
- Hetzner VM
- network
- firewall
- cloud-init user data
### Cloud-init owns
- OS package prep
- optional Tailscale join
- k3s installation
- a marker file under `/opt/unrip/bootstrap/README.txt`
Cloud-init does **not** clone this repo or apply Kubernetes manifests.
### Bootstrap script owns
- `pass`-resolved secret loading
- DNS automation
- kubeconfig retrieval/rendering
- generated overlay rendering under `.state/hetzner/generated-overlay/`
- imperative registry auth secret creation
- Forgejo bootstrap API calls
- repo seeding
- Headlamp token export to `pass`
### Kubernetes manifests own
- platform services
- project services
- ingress/TLS resources
- observability stack
- persistent volume claims and workload specs
## Current default runtime model
Platform services:
- Forgejo
- Forgejo runner
- registry
- cert-manager
- Grafana
- Loki
- Promtail
- Headlamp
Project services:
- Redpanda
- `near-intents-ingest`
- `dummy-reactor`
- `dummy-executor`
- `dummy-consumer`
Ingress/controller model:
- Traefik bundled with k3s
- no ingress-nginx in the active path
## Rebuild verification checklist
After bootstrap, verify:
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl get nodes -o wide
kubectl get pods -A
kubectl -n observability get deploy,ds,pods,svc,ingress,secrets
kubectl -n forgejo get deploy,pods,svc,ingress
kubectl -n registry get deploy,pods,svc,ingress
kubectl -n unrip get deploy,pods
```
Public/operator surfaces should respond:
- `https://git.<public-domain>/`
- `https://registry.<public-domain>/v2/`
- `https://grafana.<public-domain>/`
- `https://headlamp.<public-domain>/`
CI should show a successful deploy workflow in Forgejo Actions.
## Current caveat
The core Hetzner/k3s/Forgejo path has been rebuilt successfully before.
Headlamp was added afterward and validated live on the rebuilt cluster, but a brand-new destroy/rebuild rehearsal with Headlamp included has not yet been re-run from zero.
So the rebuild story is repo-driven and operationally close to fully reproducible, with one remaining value-add validation step: a final clean-room rebuild after the latest Headlamp/docs cleanup.

View file

@ -22,8 +22,6 @@ After that you should have:
- repository Actions secrets/variables populated for CI - repository Actions secrets/variables populated for CI
- the current repo pushed to Forgejo automatically in default mode - the current repo pushed to Forgejo automatically in default mode
- Registry reachable at `https://${REGISTRY_DOMAIN}` - Registry reachable at `https://${REGISTRY_DOMAIN}`
- Grafana reachable at `https://${GRAFANA_DOMAIN}`
- Headlamp reachable at `https://${HEADLAMP_DOMAIN}`
- private admin/control-plane access over Tailscale if configured - private admin/control-plane access over Tailscale if configured
Bootstrap repo automation requires `FORGEJO_ADMIN_USERNAME`, `FORGEJO_ADMIN_PASSWORD`, Python `PyYAML` locally for kubeconfig rendering, and Python `PyNaCl` locally in the default `forgejo-actions` mode so the script can encrypt Forgejo Actions secrets before upload. Bootstrap now fails fast with an explicit preflight error if those Python modules are missing. The same bootstrap flow now also creates the initial Forgejo admin account and writes a durable `/data/.runner` config into the shared Forgejo PVC before the runner deployment is allowed to start. Bootstrap repo automation requires `FORGEJO_ADMIN_USERNAME`, `FORGEJO_ADMIN_PASSWORD`, Python `PyYAML` locally for kubeconfig rendering, and Python `PyNaCl` locally in the default `forgejo-actions` mode so the script can encrypt Forgejo Actions secrets before upload. Bootstrap now fails fast with an explicit preflight error if those Python modules are missing. The same bootstrap flow now also creates the initial Forgejo admin account and writes a durable `/data/.runner` config into the shared Forgejo PVC before the runner deployment is allowed to start.
@ -40,7 +38,6 @@ kubectl get nodes -o wide
kubectl get pods -A kubectl get pods -A
kubectl -n forgejo get deploy,pods,svc,ingress kubectl -n forgejo get deploy,pods,svc,ingress
kubectl -n registry get deploy,pods,svc,ingress kubectl -n registry get deploy,pods,svc,ingress
kubectl -n observability get deploy,ds,pods,svc,ingress,secrets
kubectl -n unrip get deploy,pods kubectl -n unrip get deploy,pods
``` ```
@ -66,7 +63,6 @@ Bootstrap upserts these repository variables automatically:
- `PROJECT_NAME=${PROJECT_NAME}` - `PROJECT_NAME=${PROJECT_NAME}`
- `PROJECT_NAMESPACE=${PROJECT_NAMESPACE}` - `PROJECT_NAMESPACE=${PROJECT_NAMESPACE}`
- `PROJECT_DEPLOYMENTS` as a comma-separated version of the bootstrap deployment list - `PROJECT_DEPLOYMENTS` as a comma-separated version of the bootstrap deployment list
- `PROJECT_PATH` as the repo-relative app directory used for Docker/Kaniko builds
The Forgejo repo configuration step is idempotent, so rerunning bootstrap updates the same repo secrets/variables in place. The Forgejo repo configuration step is idempotent, so rerunning bootstrap updates the same repo secrets/variables in place.
@ -77,7 +73,7 @@ The workflow in `.forgejo/workflows/deploy.yml` now:
3. computes `IMAGE=${REGISTRY_HOST}/${PROJECT_NAME}:${GIT_SHA}` 3. computes `IMAGE=${REGISTRY_HOST}/${PROJECT_NAME}:${GIT_SHA}`
4. creates an in-cluster Kubernetes Job in `PROJECT_NAMESPACE` 4. creates an in-cluster Kubernetes Job in `PROJECT_NAMESPACE`
5. that Job checks out the repo with the Forgejo job token in an init container using an `Authorization: Bearer ...` header instead of embedding the token in the clone URL 5. that Job checks out the repo with the Forgejo job token in an init container using an `Authorization: Bearer ...` header instead of embedding the token in the clone URL
6. Kaniko builds and pushes the image from `PROJECT_PATH` using the Kubernetes registry auth secret 6. Kaniko builds and pushes the image using the Kubernetes registry auth secret
7. the workflow updates each deployment listed in `PROJECT_DEPLOYMENTS` inside `PROJECT_NAMESPACE` 7. the workflow updates each deployment listed in `PROJECT_DEPLOYMENTS` inside `PROJECT_NAMESPACE`
8. the workflow waits for rollout after each image update 8. the workflow waits for rollout after each image update
@ -85,7 +81,6 @@ Default behavior if you do not set project variables:
- `PROJECT_NAME=unrip` - `PROJECT_NAME=unrip`
- `PROJECT_NAMESPACE=unrip` - `PROJECT_NAMESPACE=unrip`
- `PROJECT_DEPLOYMENTS=near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer` - `PROJECT_DEPLOYMENTS=near-intents-ingest,dummy-reactor,dummy-executor,dummy-consumer`
- `PROJECT_PATH=projects/unrip`
- `PROJECT_REGISTRY_SECRET_NAME=unrip-registry-creds` - `PROJECT_REGISTRY_SECRET_NAME=unrip-registry-creds`
For a future project, reuse the same workflow by changing only the Forgejo repository variables instead of copying the workflow. For a future project, reuse the same workflow by changing only the Forgejo repository variables instead of copying the workflow.
@ -135,8 +130,6 @@ Likewise, generated local kubeconfigs/manifests remain on disk unless you set `D
TLS is issued by cert-manager using the rendered Let's Encrypt email and ingress hosts. TLS is issued by cert-manager using the rendered Let's Encrypt email and ingress hosts.
For browser-based cluster inspection and pod logs, use Headlamp. For historical log search, use Grafana/Loki. Both are documented in `docs/k8s-observability.md`.
## Current limitations ## Current limitations
- the bootstrap path now creates the initial admin account and runner config automatically from inside the Forgejo pod, but it still depends on the operator supplying the intended admin credentials up front - the bootstrap path now creates the initial admin account and runner config automatically from inside the Forgejo pod, but it still depends on the operator supplying the intended admin credentials up front
- runner startup is now manifest-gated on a durable `/data/.runner` file stored under the shared `forgejo-data` PVC, so fresh applies no longer depend on a broken intermediate secret or a race against a crashing runner pod; deleting that Forgejo PVC still requires rerunning bootstrap to re-register the runner - runner startup is now manifest-gated on a durable `/data/.runner` file stored under the shared `forgejo-data` PVC, so fresh applies no longer depend on a broken intermediate secret or a race against a crashing runner pod; deleting that Forgejo PVC still requires rerunning bootstrap to re-register the runner

View file

@ -1,172 +0,0 @@
# Kubernetes observability on the Hetzner single-node cluster
This cluster now includes a reproducible ops/observability stack in the `observability` namespace:
- `loki` for log storage and querying
- `promtail` as a DaemonSet that ships pod stdout/stderr logs from every node
- `grafana` for log search and historical exploration
- `headlamp` for a Kubernetes web UI with pods, workloads, events, and pod logs
## What gets collected
Promtail tails Kubernetes container log files under `/var/log/pods` on each node.
That means any container writing logs to stdout/stderr automatically shows up in Loki/Grafana.
This fits the current app setup in this repo because the services already log to stdout/stderr.
What is **not** collected automatically:
- arbitrary log files written somewhere else inside a container filesystem
- logs from external services that are not running as Kubernetes pods on this cluster
## Access
Grafana is exposed through Traefik + cert-manager at:
- `https://${GRAFANA_DOMAIN}` when bootstrapped from `scripts/hetzner/bootstrap-secrets.env`
- in the current live environment: `https://grafana.doran.133011.xyz/`
Grafana credentials come from:
- `GRAFANA_ADMIN_USERNAME`
- `GRAFANA_ADMIN_PASSWORD_PASS` or `GRAFANA_ADMIN_PASSWORD`
The recommended path is `pass`.
In the current live setup the password is stored at:
- `api/hetznerk3s/grafana-admin-password`
Headlamp is exposed at:
- `https://${HEADLAMP_DOMAIN}` when bootstrapped from `scripts/hetzner/bootstrap-secrets.env`
- in the current live environment: `https://headlamp.doran.133011.xyz/`
Headlamp uses a Kubernetes service-account token for login. Bootstrap stores the generated token in `pass` when `HEADLAMP_ADMIN_TOKEN_PASS` is set.
In the current live setup it is stored at:
- `api/hetznerk3s/headlamp-admin-token`
## Reproducible bootstrap path
The observability stack is part of the repo-managed platform layer:
- `deploy/k8s/platform/base/observability.yaml`
- `deploy/k8s/platform/base/headlamp.yaml`
- `deploy/k8s/platform/base/kustomization.yaml`
- `deploy/k8s/platform/base/namespace.yaml`
- `deploy/k8s/overlays/hetzner-single-node/storage-class.patch.yaml`
- `deploy/k8s/overlays/hetzner-single-node/kustomization.yaml`
- `deploy/k8s/overlays/hetzner-single-node/ingress-hosts.patch.yaml`
- `deploy/k8s/overlays/hetzner-single-node/secrets/observability.env.example`
Bootstrap materializes the Grafana secret from local env / `pass` and also stores the generated Headlamp login token back into `pass` when configured:
- writes `deploy/k8s/overlays/hetzner-single-node/secrets/observability.env`
- copies it into `.state/hetzner/generated-overlay/`
- applies the generated overlay
- waits for `headlamp-admin-token`
- stores that token via `HEADLAMP_ADMIN_TOKEN_PASS`
## Verify the stack
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl -n observability get pods
kubectl -n observability get pvc
kubectl -n observability get ingress
kubectl -n observability rollout status deployment/loki --timeout=300s
kubectl -n observability rollout status deployment/grafana --timeout=300s
kubectl -n observability rollout status deployment/headlamp --timeout=300s
kubectl -n observability rollout status daemonset/promtail --timeout=300s
```
## Verify logs are arriving
Generate some app logs, then query Loki directly:
```bash
export KUBECONFIG=$PWD/.state/hetzner/kubeconfig.yaml
kubectl -n observability port-forward svc/loki 3100:3100
```
In another shell:
```bash
curl -sS 'http://127.0.0.1:3100/loki/api/v1/labels' | jq
curl -G -sS 'http://127.0.0.1:3100/loki/api/v1/query' \
--data-urlencode 'query={namespace="unrip"}' | jq
```
If those queries return labels/streams, pod logs are reaching Loki.
## Use Headlamp
1. open `https://headlamp.doran.133011.xyz/`
2. fetch the login token with:
```bash
pass show api/hetznerk3s/headlamp-admin-token
```
3. paste that token into the Headlamp login form
4. browse namespaces, workloads, pods, and use the built-in pod log view
For this disposable cluster the generated Headlamp token is bound to `cluster-admin` so the UI can show everything. For a production setup, replace that with narrower RBAC.
## Use Grafana
After logging into Grafana:
1. open **Explore**
2. choose the default **Loki** datasource
3. run queries like:
- `{namespace="unrip"}`
- `{namespace="forgejo"}`
- `{namespace="registry"}`
- `{pod=~"near-intents-ingest.*"}`
- `{container="app"}`
Useful labels added by promtail:
- `namespace`
- `pod`
- `container`
- `app`
- selected `app.kubernetes.io/*` labels
## Day-to-day ops
CLI remains useful for fast debugging:
```bash
kubectl get pods -A
kubectl -n unrip logs deploy/near-intents-ingest -f
kubectl -n forgejo logs deploy/forgejo -f
bash scripts/k8s/logs.sh
```
Use Headlamp when you want:
- a web UI listing workloads and pods
- click-through pod inspection
- built-in pod log viewing
- events and resource browsing
Use Grafana when you want:
- historical log search
- cross-pod filtering
- LogQL queries
- easier multi-namespace log exploration
## Security notes
Grafana is an admin/operator surface.
For this cluster it is publicly reachable behind Grafana login.
That is acceptable for this disposable single-node setup, but for a harder production posture prefer one of:
- Tailscale-only access
- ingress auth in front of Grafana and Headlamp
- SSO/OIDC
## Add a new app and have logs show up there
Nothing special is required as long as the new pod logs to stdout/stderr.
If you deploy a new app under Kubernetes and expose it through the usual manifests/Ingress flow, promtail will scrape its pod logs automatically.

View file

@ -1,5 +1,198 @@
# Moved # Minimal product: NEAR Intents demand monitor
This project-specific document moved to: ## Goal
Build the smallest useful event-driven product for crypto trading research:
- `projects/unrip/docs/minimal-product.md` - read **live user demand** from NEAR Intents
- publish demand into a **central Kafka/Redpanda-compatible bus**
- prove downstream consumption with a **dummy reactor**
- avoid dashboards, execution, wallets, storage, auth workflows beyond the required API key, strategy code, and generic infra beyond the message bus itself
## Why this is the right first slice
From the NEAR Intents docs, there are several possible data surfaces:
1. **Message Bus WebSocket `quote` subscription**
- Endpoint: `wss://solver-relay-v2.chaindefuser.com/ws`
- Real-time stream for quote requests
- Subscription request shape:
```json
{
"jsonrpc": "2.0",
"id": 1,
"method": "subscribe",
"params": ["quote"]
}
```
- Expected live frame shape is JSON-RPC-like but should be treated as flexible. The adapter should accept quote payloads when the useful fields appear either:
- directly under `params`
- directly under `result`
- or at the top level of the message body
- Fields of interest include:
- `quote_id` (or equivalent request identifier)
- `defuse_asset_identifier_in`
- `defuse_asset_identifier_out`
- `exact_amount_in` or `exact_amount_out`
- `min_deadline_ms`
- Subscription acknowledgements may also vary. They may arrive as an `id`-matched JSON-RPC response with a simple `result`, a structured `result`, or other non-quote control frame before the first quote event.
- This is the closest public signal to **current demand**.
2. **Message Bus JSON-RPC `publish_intent` / `get_status`**
- Endpoint: `https://solver-relay-v2.chaindefuser.com/rpc`
- Useful for posting intents or checking a known `intent_hash`
- Not a public firehose of all intents.
3. **Explorer API `/api/v0/transactions`**
- Historical and analytics friendly
- Requires JWT auth
- Better for history, not best for a minimal live monitor
4. **Verifier contract intent payloads**
- The on-chain swap expression is usually `token_diff`
- Important for understanding settlement semantics
- Not the easiest first live intake path for a lean bus-first system
## Product decision
The minimal product should monitor **WebSocket `quote` events** and route them through a bus-first runtime.
### Why
- closest live signal to user demand
- directly reflects what users are requesting from solvers
- enough to answer the first trading question: **what assets are being requested right now?**
- decouples venue intake from downstream analysis through Kafka-compatible topics
### Important implementation note
Current docs for the market-maker quickstart and live endpoint behavior indicate the Message Bus requires a **partner API key / JWT** in the `Authorization: Bearer ...` header.
That means the best path is still the quote stream, but live operation is partner-gated.
### Important caveat
A `quote` event is **pre-trade demand**, not guaranteed execution.
That is fine for v0. The purpose is demand sensing, not settlement accounting.
## Runtime shape
```text
NEAR Intents websocket
|
v
src/apps/near-intents-ingest.mjs
|
+--> raw.near_intents.quote
|
+--> norm.swap_demand
|
v
src/apps/dummy-consumer.mjs
```
### Runtime contracts
#### Ingest app
`src/apps/near-intents-ingest.mjs`:
- loads env
- parses optional `--pair 'asset_a->asset_b'`
- starts the NEAR Intents websocket adapter
- writes raw and normalized events to the configured broker
#### Dummy consumer
`src/apps/dummy-consumer.mjs`:
- subscribes to `norm.swap_demand`
- logs observed pair and quote id
- exists only to prove a downstream consumer contract
#### Bus config
Default env-driven topics and group ids:
- `KAFKA_TOPIC_RAW_NEAR_INTENTS_QUOTE=raw.near_intents.quote`
- `KAFKA_TOPIC_NORM_SWAP_DEMAND=norm.swap_demand`
- `KAFKA_CONSUMER_GROUP_DUMMY=dummy-reactor-v1`
Redpanda is a valid runtime target because the transport is Kafka-compatible.
## Internal model
Normalize each quote event into a thin bus envelope:
Top-level envelope fields:
- `venue`
- `source`
- `type`
- `eventId`
- `occurredAt`
- `ingestedAt`
- `assetIn`
- `assetOut`
- `raw`
- `quote`
Nested `quote` fields:
- `quoteId`
- `assetIn`
- `assetOut`
- `amountIn`
- `amountOut`
- `ttlMs`
Field extraction must remain tolerant to known upstream aliases, and normalization should continue to operate on the merged `metadata + data` payload shape from the Message Bus event.
The live adapter now intentionally accepts quote-like payloads from `params`, `result`, or the top-level message body, but only processes frames that actually look like quote data. Subscription acknowledgements and unrelated control frames should still be ignored.
## Filtering
The ingest runtime supports an optional exact-pair filter:
```bash
npm run near-intents:ingest -- --pair 'asset_a->asset_b'
```
The filter is direction-agnostic, so the reversed asset order is also accepted.
## Scope boundaries
### Must do
- connect to the websocket
- subscribe to `quote` and tolerate control frames
- normalize quote events into one compact model
- publish raw and normalized events to Kafka/Redpanda-compatible topics
- allow a downstream consumer to react to normalized events
- reconnect automatically on disconnect
- document `npm` and `node` entrypoints
### Must not do
- Python packaging or CLI guidance
- TUI-specific product requirements
- charts
- account details
- pnl
- routing internals
- market making controls
- execution buttons
- config panels
- speculative infra beyond the current bus and dummy consumer
## Path to success
1. Connect to WebSocket
2. Subscribe to `quote`
3. Normalize incoming events into one compact model
4. Publish raw envelopes to `raw.near_intents.quote`
5. Publish normalized envelopes to `norm.swap_demand`
6. Start a dummy consumer on the normalized topic
7. Reconnect automatically on disconnect
8. Only after this works, consider:
- `quote_status`-specific downstream handling
- historical replay via Explorer API
- token metadata enrichment
- filtering and alerts beyond `--pair`
## Packaging alignment
Current repository packaging and usage should stay aligned around the JavaScript runtime entrypoints:
- package scripts:
- `npm run near-intents:ingest`
- `npm run dummy-consumer`
- `npm start` as a compatibility wrapper
- direct app entrypoints:
- `node src/apps/near-intents-ingest.mjs`
- `node src/apps/dummy-consumer.mjs`
Documentation should treat the npm scripts and `src/apps/*` node entrypoints as canonical. Older single-file and Python/TUI instructions should remain removed to avoid runtime confusion.
## Sources
- NEAR Intents Message Bus WebSocket docs: `subscribe` with `quote` / `quote_status`
- NEAR Intents Message Bus RPC docs: `quote`, `publish_intent`, `get_status`
- Verifier contract docs: `token_diff` intent type
- Explorer API OpenAPI: authenticated historical transactions

View file

@ -1,5 +1,383 @@
# Moved # Trading System Architecture Notes for Next Session
This project-specific document moved to: ## Objective
Build the first real version of the trading system as an event-driven, multi-service architecture.
- `projects/unrip/docs/next-session-architecture.md` Current implemented seed:
- NEAR Intents ingest in Node.js
- Kafka-compatible bus usage via `kafkajs`
- dummy reactor / executor / result consumer loop
Next session should continue from this architecture, not revert to a monolith, local-only script, or TUI.
---
## Core Architecture
All components are independent services.
They communicate only through a central Kafka-compatible bus (Redpanda first, Kafka-compatible by design).
### Service classes
- venue ingestors
- normalizers
- reactors / decision engines
- executors
- downstream consumers / monitors / archivers / replay tools
### Service communication rule
No direct service-to-service calls for core trading flow.
Use bus topics only.
---
## Venue-Oriented Structure
The system should be organized by venue.
Each venue can have different:
- ingest/feed mechanics
- normalization logic
- execution mechanics
### Per-venue responsibilities
- `ingest` = venue-native intake
- `normalize` = convert venue-native payload into canonical internal event
- `execute` = venue-specific action logic
Planned shape:
```text
src/
apps/
bus/
core/
venues/
near-intents/
ingest
normalize
execute
```
---
## Bus Choice
Use **Redpanda** first, but stay fully **Kafka-compatible**.
### Reason
Requirements:
- high throughput
- low latency
- retention
- replay
- multiple producers/consumers
- independent services
- future scale-out
- multi-language compatibility
### Constraint
Do not use broker-specific features that make migration to Kafka difficult.
Use standard Kafka clients and semantics.
---
## Data Model Principles
Kafka/Redpanda is the operational event backbone.
### Event model rules
- append-only
- immutable events
- versioned schemas
- raw and normalized events both preserved
### Every event should include
- `event_id`
- `event_type`
- `venue`
- `observed_at` / `ingested_at`
- `schema_version`
- `payload`
- optionally raw/original payload where appropriate
### Raw vs normalized
Keep both.
- raw topics = exact venue-native source truth
- normalized topics = canonical research/trading inputs
This is required for:
- replay
- debugging
- future backtesting
- future Spark/batch processing
---
## Current/Planned Topic Flow
Minimal 3-stage pipeline:
1. ingest publishes normalized demand
2. reactor publishes trade command
3. executor publishes trade result
### Topic classes
- `raw.*` = raw venue-native events
- `norm.*` = canonical normalized market events
- `cmd.*` = execution commands
- `exec.*` = execution outcomes
- later `signal.*` if needed for reactor outputs before command stage
### Current minimal topics
- `norm.swap_demand`
- `cmd.execute_trade`
- `exec.trade_result`
### NEAR Intents
NEAR Intents source currently feeds quote-demand style events from solver-bus websocket.
This is a venue ingest source, not the whole trading system.
---
## Execution Safety / Zero Downtime Requirements
This is critical.
### Constraint
Multiple executors must never duplicate the same trade/action during deploys, restarts, or rebalances.
### Must-have rules
1. Every execution command must carry a unique `command_id`
2. Commands must include deterministic idempotency information
3. Executors must be idempotent
4. Executors must belong to a consumer group per executor role
5. Commands should be partitioned by a stable execution key where ordering matters
6. Executor state must be persisted durably enough to detect duplicate command execution
### Kafka consumer groups are not sufficient alone
They help assign work, but they do not guarantee no duplicate processing under restart/rebalance conditions.
Idempotency is still required.
### Rolling updates / zero downtime
Executors must support:
- graceful shutdown
- stop taking new work before exit
- finish or safely recover in-flight work
- commit offsets only after safe execution state transition
### Persistence implication
Executor idempotency state is not optional metadata.
It is operational state that must survive pod restarts.
Current single-node k3s direction:
- executor state lives at `/var/lib/unrip/executor-state`
- Kubernetes mounts that path through persistent storage
- the Hetzner single-node overlay currently targets k3s `local-path` storage
- node loss without storage migration means duplicate-suppression history is lost
---
## Deployment Target
### First deployment phase
- single machine on Hetzner
- but still multiple independent services
- no architecture shortcuts that prevent future clustering
### Future target
- split across multiple machines
- cluster capable
- fault tolerant
- multi-node
- zero-downtime deploys
### Deployment rules from day 1
- every component is a separate container/service
- all config via env/config files
- communication over network/bus only
- persistent components use mounted volumes/PVCs
- no manual SSH-based operational workflow
---
## Infrastructure / Ops Direction
Target environment:
- Hetzner
- self-hosted CI/CD
- provisioning by code
- no GitHub dependency
### Desired stack direction
- Terraform for Hetzner provisioning
- Kubernetes-oriented target from the start
- self-hosted Git + CI/CD
- Kafka-compatible broker
- object storage later for long-term archived event history
### Single-node first, future cluster later
The first version may run on one machine, but deployment structure should already match a future distributed system.
### Current canonical operator path
The repo now documents and partially implements this path as the primary deployment workflow:
#### Phase 0: workstation bootstrap
1. A local operator workstation prepares bootstrap secrets in `scripts/hetzner/bootstrap-secrets.env`.
2. The operator runs `bash scripts/hetzner/bootstrap.sh`.
3. Terraform provisions the server, firewall, network, and cloud-init user-data.
4. cloud-init installs k3s automatically and prepares persistence directories plus bootstrap artifacts.
5. The workstation waits for the public k3s API endpoint to report ready.
6. The workstation writes `.state/hetzner/kubeconfig.yaml`.
7. The workstation injects initial Kubernetes Secrets for app and Forgejo bootstrap.
8. The workstation applies repo-managed Kubernetes manifests under `deploy/k8s/`.
9. The workstation performs the first image/bootstrap delivery attempt for the app workloads.
10. The workstation verifies rollout status.
#### Phase 1: self-hosted handoff
1. Forgejo becomes reachable in-cluster.
2. The operator completes initial Forgejo admin/repo setup.
3. This repo is pushed or mirrored into Forgejo.
4. The Forgejo runner becomes the routine app deployment mechanism.
5. Terraform remains the infra mutation entrypoint unless further automated later.
### Failure-recovery expectation
The bootstrap path must be rerunnable from the workstation.
Docs should keep treating recovery as:
- fix local secrets/inputs
- rerun the bootstrap script
- inspect the cluster with the generated kubeconfig
- destroy/recreate infra with `scripts/hetzner/destroy.sh` only when required
### Current repo-state caveats
The direction is clear, but the implementation is still mid-transition:
- the bootstrap script currently applies `deploy/k8s/base` directly rather than the Hetzner overlay
- kubeconfig/auth handling is not yet fully production-hardened
- first image delivery is still a bootstrap workaround rather than a final registry-native CI path
- Forgejo admin bootstrap, repo creation, and Actions configuration still require operator steps
- local Compose remains in the repo for development/testing, not as the canonical production path
### Minimal repo layout target
```text
deploy/
hetzner/
README.md
k8s/
base/
overlays/
hetzner-single-node/
infra/
terraform/
hetzner/
```
Guidelines:
- `infra/terraform/hetzner/` owns VM, firewall, networking, and cloud-init rendering
- `deploy/k8s/` owns Kubernetes-native manifests and overlays
- app runtime manifests should remain Kubernetes-native so they can later move from single-node k3s to a larger cluster with minimal rewrite
- secret material must not live in git in plaintext; bootstrap docs should describe workstation-driven injection or generated secret references
---
## Local Development / Testing Direction
Do not assume manual multi-terminal operation long term.
### Requirement
Need an orchestrated local/dev runtime.
### Local dev should preserve real boundaries
- separate services
- broker present
- env/config driven
- same event flow as production
### Current local/dev answer
Compose is still acceptable for:
- developer laptops
- fast local iteration
- debugging event flow
- validating container boundaries before Kubernetes rollout
But Compose should remain explicitly secondary to the repo-driven Hetzner + k3s path for production operations.
### Testing layers
1. unit tests for normalizers / schema logic / helpers
2. integration tests against Kafka-compatible broker
3. replay/simulation tests using retained event streams
---
## Spark Readiness
Do not add Spark now.
But keep the system Spark-compatible later by:
- preserving raw events
- preserving normalized events
- using immutable append-only event streams
- versioning schemas
- separating operational event log from future analytical processing
Spark later would be for:
- large-scale backtesting
- feature generation
- archive processing
- multi-venue analytics
---
## Immediate Next Engineering Tasks
Next session should focus on the following.
### 1. Clean current repo structure
Remove duplicate/legacy paths and keep one canonical structure only.
### 2. Keep/complete the 3-stage loop
- NEAR Intents ingest -> `norm.swap_demand`
- dummy reactor -> `cmd.execute_trade`
- dummy executor -> `exec.trade_result`
- downstream result consumer
### 3. Define canonical schemas
Define concrete event schemas for:
- normalized swap demand
- execute trade command
- trade result
### 4. Define executor idempotency model
Specify:
- `command_id`
- idempotency key rules
- execution state transition rules
- duplicate handling rules
### 5. Move toward production-shaped deployment
Design for:
- one service per container
- single-node deployment first
- future multi-node split without app rewrite
### 6. Harden provisioning/deployment path
Next infra work should continue improving:
- Hetzner provisioning by code
- workstation bootstrap rerunnability
- self-hosted CI/CD handoff
- registry-native image delivery
- overlay convergence for the Hetzner single-node target
Status update:
- minimal Terraform exists under `infra/terraform/hetzner`
- first boot is cloud-init driven and installs k3s automatically
- bootstrap now starts from a local operator workstation rather than manual host login
- Kubernetes assets exist under `deploy/k8s`
- executor persistence boundaries are explicit for single-node k3s
- self-hosted CI handoff is documented, but still requires follow-up hardening
---
## Non-Goals for Next Session
- no dashboards
- no UI/TUI
- no monolith convenience architecture
- no SQLite-first system of record
- no direct coupling between ingest, decision, and execution
- no temporary local-only shortcuts that block future cluster deployment
---
## Guiding Principle
Build the single-node first version as if it is already a distributed system:
- separate services
- durable event bus
- replayable events
- explicit contracts
- idempotent execution
- production-compatible deployment boundaries
- bootstrapable from scratch without manual SSH-based host setup

View file

@ -1,5 +1,144 @@
# Moved # NEAR Intents demand monitor: bus-first source plan
This project-specific document moved to: ## Why websocket quote requests are still the MVP demand signal
- `projects/unrip/docs/spec.md` Public solver quote requests remain the closest thing to live demand because they appear when a user or integration asks the network for executable pricing. They are still the right upstream source, but the runtime architecture is now bus-first rather than terminal-first.
Why this source wins for a first monitor:
- **Most real-time:** quote requests arrive before settlement and usually before a completed trade is visible anywhere else.
- **Closer to intent formation:** they reflect active user demand, not just historical outcomes.
- **Operationally simple:** a single websocket feed can drive the ingest side without indexing chains, scraping dashboards, or correlating multiple APIs.
- **Good enough for ranking demand:** even if quotes do not always become fills, repeated quote flow is still a strong indicator of what users are currently trying to do.
## Tradeoffs vs other sources
### Solver websocket quote requests
Pros:
- lowest-latency view of current demand
- directly tied to solver workflow
- suitable for a streaming ingest adapter
- can be normalized into pair, size, and frequency metrics immediately
Cons:
- quote requests are **interest**, not guaranteed executed volume
- public access may still be rate-limited, undocumented, or require credentials depending on environment
- schema and availability may change faster than user-facing products
### Explorer
Explorer (`https://explorer.near-intents.org/`) is useful for validation and historical inspection, but it is usually a worse primary source for an MVP demand monitor.
Tradeoffs:
- better for human inspection than low-latency streaming
- likely shows processed/published activity instead of raw quote demand
- may lag the actual request path
- less convenient as a machine-first demand feed
### Status dashboard / published status
Status (`https://status.near-intents.org/posts/dashboard`) is useful for system health, not demand discovery.
Tradeoffs:
- tells us whether the platform is up, degraded, or incident-affected
- does **not** represent per-request user demand
- coarse and aggregated by design
### Published intents / settled outcomes
Published or completed intents are higher-confidence signals, but lower-fidelity for immediate demand sensing.
Tradeoffs:
- stronger evidence of actual execution
- misses abandoned demand and pre-trade discovery
- arrives later than quote traffic
- may require more indexing and entity correlation work
## Runtime architecture
```text
solver websocket quote stream
|
v
src/apps/near-intents-ingest.mjs
|
+--> raw.near_intents.quote
|
+--> norm.swap_demand
|
v
src/apps/dummy-consumer.mjs
```
### Responsibilities
#### `src/apps/near-intents-ingest.mjs`
- loads env from `.env`
- parses optional `--pair 'asset_a->asset_b'`
- connects to the NEAR Intents websocket
- subscribes to `quote` and `quote_status`
- publishes raw venue envelopes to `raw.near_intents.quote`
- publishes normalized swap-demand envelopes to `norm.swap_demand`
#### `src/apps/dummy-consumer.mjs`
- consumes normalized events from `norm.swap_demand`
- logs observed demand as a placeholder for later strategy logic
#### Kafka / Redpanda layer
- broker endpoint comes from `KAFKA_BROKERS`
- Redpanda is supported through Kafka protocol compatibility
- topics are configurable via env and default to:
- `raw.near_intents.quote`
- `norm.swap_demand`
## Assumptions and limitations
- The websocket is the best available **MVP** source, not a perfect truth source.
- Demand is approximated by quote requests, not by settled intents.
- Live endpoints require auth in practice; `NEAR_INTENTS_API_KEY` must be provided.
- Request schemas may evolve; the parser should tolerate missing fields.
- The current product is intentionally minimal: no database, no backfill, no reconciliation against chain state.
- The dummy consumer proves the decoupled flow but is not a strategy engine.
## Run instructions
Install:
```bash
npm install
```
Start ingest:
```bash
npm run near-intents:ingest
```
Direct node entrypoint:
```bash
node src/apps/near-intents-ingest.mjs
```
Run with exact-pair filtering:
```bash
npm run near-intents:ingest -- --pair 'asset_a->asset_b'
```
Start dummy consumer:
```bash
npm run dummy-consumer
```
Direct node entrypoint:
```bash
node src/apps/dummy-consumer.mjs
```
## Decision summary
For an MVP whose job is to answer "what are users asking for right now?", solver websocket quote requests are still the best first source because they are the most direct, timely, and stream-friendly signal. The implementation now routes that signal through Kafka/Redpanda topics so ingestion and downstream reaction can evolve independently.

1
index.mjs Normal file
View file

@ -0,0 +1 @@
import './src/apps/near-intents-ingest.mjs';

View file

@ -19,17 +19,20 @@ write_files:
#!/usr/bin/env bash #!/usr/bin/env bash
set -euo pipefail set -euo pipefail
install -d -m 0755 /opt/unrip
if [ ! -d /opt/unrip/repo/.git ]; then
git clone --branch ${bootstrap_repo_branch} ${bootstrap_repo_url} /opt/unrip/repo
else
git -C /opt/unrip/repo fetch --all --prune
git -C /opt/unrip/repo checkout ${bootstrap_repo_branch}
git -C /opt/unrip/repo pull --ff-only origin ${bootstrap_repo_branch}
fi
install -d -m 0755 /opt/unrip/bootstrap install -d -m 0755 /opt/unrip/bootstrap
cat >/opt/unrip/bootstrap/README.txt <<'EOF' cat >/opt/unrip/bootstrap/README.txt <<'EOF'
This node was provisioned by Terraform + cloud-init. This node was provisioned by Terraform + cloud-init.
This cloud-init step no longer clones a bootstrap repository. Future Kubernetes bootstrap assets should live in:
The current Hetzner flow remains workstation-driven after Terraform: /opt/unrip/repo/${bootstrap_repo_path}
- scripts/hetzner/bootstrap.sh fetches kubeconfig from the node
- scripts/hetzner/bootstrap.sh renders secrets/overlays locally
- scripts/hetzner/bootstrap.sh applies Kubernetes manifests from the operator workstation
Reserved for future node-local bootstrap/GitOps assets:
/opt/unrip/bootstrap/${bootstrap_repo_path}
EOF EOF
- path: /etc/rancher/k3s/config.yaml - path: /etc/rancher/k3s/config.yaml
permissions: '0644' permissions: '0644'

View file

@ -38,6 +38,8 @@ resource "hcloud_server" "trading_system" {
node_name = var.name node_name = var.name
private_ipv4_address = var.private_ipv4_address private_ipv4_address = var.private_ipv4_address
public_domain = var.public_domain public_domain = var.public_domain
bootstrap_repo_url = var.bootstrap_repo_url
bootstrap_repo_branch = var.bootstrap_repo_branch
bootstrap_repo_path = var.bootstrap_repo_path bootstrap_repo_path = var.bootstrap_repo_path
tailscale_enabled = var.tailscale_enabled tailscale_enabled = var.tailscale_enabled
tailscale_auth_key = var.tailscale_auth_key tailscale_auth_key = var.tailscale_auth_key

View file

@ -26,6 +26,10 @@ output "kubeconfig_strategy" {
value = var.tailscale_enabled ? "Use Tailscale for private Kubernetes API access; avoid public SSH/Kubernetes exposure in the canonical flow." : "Use the public Kubernetes API endpoint with an operator-supplied bootstrap credential; avoid SSH/scp kubeconfig retrieval in the canonical flow." value = var.tailscale_enabled ? "Use Tailscale for private Kubernetes API access; avoid public SSH/Kubernetes exposure in the canonical flow." : "Use the public Kubernetes API endpoint with an operator-supplied bootstrap credential; avoid SSH/scp kubeconfig retrieval in the canonical flow."
} }
output "bootstrap_repo_checkout" {
value = "/opt/unrip/repo"
}
output "bootstrap_marker_file" { output "bootstrap_marker_file" {
value = "/opt/unrip/bootstrap/README.txt" value = "/opt/unrip/bootstrap/README.txt"
} }

View file

@ -7,7 +7,7 @@ variable "hcloud_token" {
variable "name" { variable "name" {
description = "Server name" description = "Server name"
type = string type = string
default = "doran-1" default = "unrip-1"
} }
variable "location" { variable "location" {
@ -93,8 +93,19 @@ variable "public_domain" {
type = string type = string
} }
variable "bootstrap_repo_url" {
description = "Git repository URL cloned onto the node for GitOps/bootstrap assets"
type = string
}
variable "bootstrap_repo_branch" {
description = "Branch checked out for the bootstrap repository"
type = string
default = "main"
}
variable "bootstrap_repo_path" { variable "bootstrap_repo_path" {
description = "Reserved repository subdirectory name for a future node-local bootstrap/GitOps flow; current provisioning still applies manifests from the operator workstation" description = "Repository subdirectory expected to contain future Kubernetes bootstrap manifests/scripts"
type = string type = string
default = "deploy/k8s" default = "deploy/k8s"
} }

24
package-lock.json generated Normal file
View file

@ -0,0 +1,24 @@
{
"name": "near-intents-monitor-poc",
"version": "0.1.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "near-intents-monitor-poc",
"version": "0.1.0",
"dependencies": {
"kafkajs": "^2.2.4"
}
},
"node_modules/kafkajs": {
"version": "2.2.4",
"resolved": "https://registry.npmjs.org/kafkajs/-/kafkajs-2.2.4.tgz",
"integrity": "sha512-j/YeapB1vfPT2iOIUn/vxdyKEuhuY2PxMBvf5JWux6iSaukAccrMtXEY/Lb7OvavDhOWME589bpLrEdnVHjfjA==",
"license": "MIT",
"engines": {
"node": ">=14.0.0"
}
}
}
}

16
package.json Normal file
View file

@ -0,0 +1,16 @@
{
"name": "near-intents-monitor-poc",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"near-intents:ingest": "node src/apps/near-intents-ingest.mjs",
"dummy-reactor": "node src/apps/dummy-reactor.mjs",
"dummy-executor": "node src/apps/dummy-executor.mjs",
"dummy-consumer": "node src/apps/dummy-consumer.mjs",
"start": "node index.mjs"
},
"dependencies": {
"kafkajs": "^2.2.4"
}
}

View file

@ -11,7 +11,6 @@
# What bootstrap materializes from this file: # What bootstrap materializes from this file:
# - overwrites deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env # - overwrites deploy/k8s/overlays/hetzner-single-node/secrets/unrip.env
# - overwrites deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env # - overwrites deploy/k8s/overlays/hetzner-single-node/secrets/forgejo.env
# - overwrites deploy/k8s/overlays/hetzner-single-node/secrets/observability.env
# - renders generated ingress/issuer patches under .state/hetzner/generated-overlay/ # - renders generated ingress/issuer patches under .state/hetzner/generated-overlay/
# - creates registry-secrets and the project docker-registry pull secret imperatively # - creates registry-secrets and the project docker-registry pull secret imperatively
# #
@ -30,20 +29,16 @@ pass_ref() {
export HCLOUD_TOKEN_PASS="${HCLOUD_TOKEN_PASS:-$(pass_ref hetzner/hcloud-token)}" export HCLOUD_TOKEN_PASS="${HCLOUD_TOKEN_PASS:-$(pass_ref hetzner/hcloud-token)}"
export SSH_PUBLIC_KEY_PATH="${SSH_PUBLIC_KEY_PATH:-$HOME/.ssh/id_ed25519.pub}" export SSH_PUBLIC_KEY_PATH="${SSH_PUBLIC_KEY_PATH:-$HOME/.ssh/id_ed25519.pub}"
# Optional project defaults. The infra repo prepares the shared unrip namespace, # Optional project override. Defaults target the built-in unrip project overlay.
# secrets, and registry auth. The app code/manifests are expected in a separate repo.
export PROJECT_NAME="${PROJECT_NAME:-unrip}" export PROJECT_NAME="${PROJECT_NAME:-unrip}"
export PROJECT_NAMESPACE="${PROJECT_NAMESPACE:-$PROJECT_NAME}" export PROJECT_NAMESPACE="${PROJECT_NAMESPACE:-$PROJECT_NAME}"
export APP_REPO_DIR="${APP_REPO_DIR:-$PWD/../unrip-project}"
# export PROJECT_OVERLAY_DIR="$PWD/deploy/k8s/overlays/hetzner-single-node" # export PROJECT_OVERLAY_DIR="$PWD/deploy/k8s/overlays/hetzner-single-node"
# export PROJECT_KUSTOMIZE_PATH="../../projects/unrip/base"
# export PROJECT_SECRET_NAME="unrip-secrets" # export PROJECT_SECRET_NAME="unrip-secrets"
# export PROJECT_SECRET_ENV_BASENAME="unrip.env" # export PROJECT_SECRET_ENV_BASENAME="unrip.env"
# export PROJECT_REGISTRY_SECRET_NAME="unrip-registry-creds" # export PROJECT_REGISTRY_SECRET_NAME="unrip-registry-creds"
# export PROJECT_IMAGE_REPOSITORY="unrip" # export PROJECT_IMAGE_REPOSITORY="unrip"
# export PROJECT_DEPLOYMENTS="near-intents-ingest dummy-reactor dummy-executor dummy-consumer" # export PROJECT_DEPLOYMENTS="near-intents-ingest dummy-reactor dummy-executor dummy-consumer"
# export APP_REPO_KUSTOMIZE_PATH="deploy/k8s/base"
# export APP_FORGEJO_REPO_OWNER="$FORGEJO_ADMIN_USERNAME"
# export APP_FORGEJO_REPO_NAME="$PROJECT_NAME"
# Tailscale-first admin access (recommended) # Tailscale-first admin access (recommended)
export TAILSCALE_AUTH_KEY_PASS="${TAILSCALE_AUTH_KEY_PASS:-$(pass_ref tailscale/auth-key)}" export TAILSCALE_AUTH_KEY_PASS="${TAILSCALE_AUTH_KEY_PASS:-$(pass_ref tailscale/auth-key)}"
@ -56,11 +51,9 @@ export TF_ADMIN_CIDR_BLOCKS="${TF_ADMIN_CIDR_BLOCKS:-[]}"
# Public naming for ingress/TLS # Public naming for ingress/TLS
export PUBLIC_DOMAIN="${PUBLIC_DOMAIN:-doran.133011.xyz}" export PUBLIC_DOMAIN="${PUBLIC_DOMAIN:-doran.133011.xyz}"
export BASE_DOMAIN="${BASE_DOMAIN:-133011.xyz}" export BASE_DOMAIN="${BASE_DOMAIN:-133011.xyz}"
export FORGEJO_DOMAIN="${FORGEJO_DOMAIN:-git.${PUBLIC_DOMAIN}}" export FORGEJO_DOMAIN="${FORGEJO_DOMAIN:-git.${BASE_DOMAIN}}"
export FORGEJO_ROOT_URL="${FORGEJO_ROOT_URL:-https://${FORGEJO_DOMAIN}/}" export FORGEJO_ROOT_URL="${FORGEJO_ROOT_URL:-https://${FORGEJO_DOMAIN}/}"
export REGISTRY_DOMAIN="${REGISTRY_DOMAIN:-registry.${PUBLIC_DOMAIN}}" export REGISTRY_DOMAIN="${REGISTRY_DOMAIN:-registry.${BASE_DOMAIN}}"
export GRAFANA_DOMAIN="${GRAFANA_DOMAIN:-grafana.${PUBLIC_DOMAIN}}"
export GRAFANA_ROOT_URL="${GRAFANA_ROOT_URL:-https://${GRAFANA_DOMAIN}/}"
export LETSENCRYPT_EMAIL="${LETSENCRYPT_EMAIL:-ops@example.com}" export LETSENCRYPT_EMAIL="${LETSENCRYPT_EMAIL:-ops@example.com}"
# Optional DNS automation: choose one provider # Optional DNS automation: choose one provider
@ -84,23 +77,11 @@ export FORGEJO_ADMIN_USERNAME="${FORGEJO_ADMIN_USERNAME:-forgejo-admin}"
export FORGEJO_ADMIN_EMAIL="${FORGEJO_ADMIN_EMAIL:-${FORGEJO_ADMIN_USERNAME}@${BASE_DOMAIN}}" export FORGEJO_ADMIN_EMAIL="${FORGEJO_ADMIN_EMAIL:-${FORGEJO_ADMIN_USERNAME}@${BASE_DOMAIN}}"
export FORGEJO_ADMIN_PASSWORD_PASS="${FORGEJO_ADMIN_PASSWORD_PASS:-$(pass_ref forgejo/admin-password)}" export FORGEJO_ADMIN_PASSWORD_PASS="${FORGEJO_ADMIN_PASSWORD_PASS:-$(pass_ref forgejo/admin-password)}"
# Grafana bootstrap auth for the public observability UI
export GRAFANA_ADMIN_USERNAME="${GRAFANA_ADMIN_USERNAME:-admin}"
export GRAFANA_ADMIN_PASSWORD_PASS="${GRAFANA_ADMIN_PASSWORD_PASS:-$(pass_ref grafana/admin-password)}"
export HEADLAMP_ADMIN_TOKEN_PASS="${HEADLAMP_ADMIN_TOKEN_PASS:-$(pass_ref headlamp/admin-token)}"
# Headlamp bootstrap token handling:
# - bootstrap stores the generated token in HEADLAMP_ADMIN_TOKEN_PASS when set
# - the current default public hostname is HEADLAMP_DOMAIN
# - for a stricter posture, you can still keep Headlamp private behind Tailscale or another admin path
# Optional explicit overrides for CI/testing: # Optional explicit overrides for CI/testing:
# export HCLOUD_TOKEN="..." # export HCLOUD_TOKEN="..."
# export REGISTRY_PASSWORD="..." # export REGISTRY_PASSWORD="..."
# export NEAR_INTENTS_API_KEY="..." # export NEAR_INTENTS_API_KEY="..."
# export FORGEJO_ADMIN_PASSWORD="..." # export FORGEJO_ADMIN_PASSWORD="..."
# export GRAFANA_ADMIN_PASSWORD="..."
# export CLOUDFLARE_API_TOKEN="..." # export CLOUDFLARE_API_TOKEN="..."
# export CLOUDFLARE_ZONE_ID="..." # export CLOUDFLARE_ZONE_ID="..."
# export PORKBUN_API_KEY="..." # export PORKBUN_API_KEY="..."

View file

@ -31,7 +31,6 @@ resolve_secret_var TAILSCALE_AUTH_KEY optional
resolve_secret_var NEAR_INTENTS_API_KEY required resolve_secret_var NEAR_INTENTS_API_KEY required
resolve_secret_var REGISTRY_PASSWORD required resolve_secret_var REGISTRY_PASSWORD required
resolve_secret_var FORGEJO_ADMIN_PASSWORD required resolve_secret_var FORGEJO_ADMIN_PASSWORD required
resolve_secret_var GRAFANA_ADMIN_PASSWORD optional
resolve_secret_var CLOUDFLARE_API_TOKEN optional resolve_secret_var CLOUDFLARE_API_TOKEN optional
resolve_secret_var CLOUDFLARE_ZONE_ID optional resolve_secret_var CLOUDFLARE_ZONE_ID optional
resolve_secret_var PORKBUN_API_KEY optional resolve_secret_var PORKBUN_API_KEY optional
@ -41,14 +40,10 @@ resolve_secret_var PORKBUN_SECRET_API_KEY optional
: "${PUBLIC_DOMAIN:?set PUBLIC_DOMAIN}" : "${PUBLIC_DOMAIN:?set PUBLIC_DOMAIN}"
: "${LETSENCRYPT_EMAIL:?set LETSENCRYPT_EMAIL}" : "${LETSENCRYPT_EMAIL:?set LETSENCRYPT_EMAIL}"
: "${BASE_DOMAIN:?set BASE_DOMAIN}" : "${BASE_DOMAIN:?set BASE_DOMAIN}"
: "${FORGEJO_DOMAIN:=git.${PUBLIC_DOMAIN}}" : "${FORGEJO_DOMAIN:=git.${BASE_DOMAIN}}"
: "${FORGEJO_ROOT_URL:=https://${FORGEJO_DOMAIN}/}" : "${FORGEJO_ROOT_URL:=https://${FORGEJO_DOMAIN}/}"
: "${FORGEJO_INTERNAL_URL:=http://forgejo.forgejo.svc.cluster.local:3000/}" : "${FORGEJO_INTERNAL_URL:=http://forgejo.forgejo.svc.cluster.local:3000/}"
: "${REGISTRY_DOMAIN:=registry.${PUBLIC_DOMAIN}}" : "${REGISTRY_DOMAIN:=registry.${BASE_DOMAIN}}"
: "${GRAFANA_DOMAIN:=grafana.${PUBLIC_DOMAIN}}"
: "${GRAFANA_ROOT_URL:=https://${GRAFANA_DOMAIN}/}"
: "${HEADLAMP_DOMAIN:=headlamp.${PUBLIC_DOMAIN}}"
: "${GRAFANA_ADMIN_USERNAME:=admin}"
: "${REGISTRY_USERNAME:?set REGISTRY_USERNAME}" : "${REGISTRY_USERNAME:?set REGISTRY_USERNAME}"
: "${TAILSCALE_CONTROL_PLANE_HOSTNAME:=}" : "${TAILSCALE_CONTROL_PLANE_HOSTNAME:=}"
: "${TF_ADMIN_CIDR_BLOCKS:=}" : "${TF_ADMIN_CIDR_BLOCKS:=}"
@ -56,11 +51,9 @@ resolve_secret_var PORKBUN_SECRET_API_KEY optional
: "${PROJECT_NAME:=$DEFAULT_PROJECT_NAME}" : "${PROJECT_NAME:=$DEFAULT_PROJECT_NAME}"
: "${PROJECT_NAMESPACE:=$DEFAULT_PROJECT_NAMESPACE}" : "${PROJECT_NAMESPACE:=$DEFAULT_PROJECT_NAMESPACE}"
: "${PROJECT_OVERLAY_DIR:=$OVERLAY_DIR}" : "${PROJECT_OVERLAY_DIR:=$OVERLAY_DIR}"
: "${PROJECT_DIR:=$ROOT_DIR}" : "${BOOTSTRAP_NODE_NAME:=unrip-1}"
: "${PROJECT_REPO_PATH:=.}"
: "${BOOTSTRAP_NODE_NAME:=doran-1}"
: "${SKIP_TERRAFORM_APPLY:=0}" : "${SKIP_TERRAFORM_APPLY:=0}"
: "${PROJECT_KUSTOMIZE_PATH:=}" : "${PROJECT_KUSTOMIZE_PATH:=../../projects/${PROJECT_NAME}/base}"
: "${PROJECT_SECRET_NAME:=${PROJECT_NAME}-secrets}" : "${PROJECT_SECRET_NAME:=${PROJECT_NAME}-secrets}"
: "${PROJECT_SECRET_ENV_BASENAME:=${PROJECT_NAME}.env}" : "${PROJECT_SECRET_ENV_BASENAME:=${PROJECT_NAME}.env}"
: "${PROJECT_REGISTRY_SECRET_NAME:=${PROJECT_NAME}-registry-creds}" : "${PROJECT_REGISTRY_SECRET_NAME:=${PROJECT_NAME}-registry-creds}"
@ -73,30 +66,15 @@ resolve_secret_var PORKBUN_SECRET_API_KEY optional
: "${FORGEJO_REPO_OWNER:=$FORGEJO_ADMIN_USERNAME}" : "${FORGEJO_REPO_OWNER:=$FORGEJO_ADMIN_USERNAME}"
: "${FORGEJO_REPO_NAME:=$(basename "$ROOT_DIR")}" : "${FORGEJO_REPO_NAME:=$(basename "$ROOT_DIR")}"
: "${FORGEJO_REPO_PRIVATE:=true}" : "${FORGEJO_REPO_PRIVATE:=true}"
: "${APP_REPO_DIR:=$(realpath "$ROOT_DIR/../unrip-project")}"
: "${APP_REPO_KUSTOMIZE_PATH:=deploy/k8s/base}"
: "${APP_FORGEJO_REPO_OWNER:=$FORGEJO_REPO_OWNER}"
: "${APP_FORGEJO_REPO_NAME:=$PROJECT_NAME}"
: "${APP_FORGEJO_REPO_PRIVATE:=true}"
: "${BOOTSTRAP_DELIVERY_MODE:=forgejo-actions}" : "${BOOTSTRAP_DELIVERY_MODE:=forgejo-actions}"
BOOTSTRAP_IMAGE="${PROJECT_IMAGE_REPOSITORY}:bootstrap" BOOTSTRAP_IMAGE="${PROJECT_IMAGE_REPOSITORY}:bootstrap"
PROJECT_SECRET_ENV_PATH="$PROJECT_OVERLAY_DIR/secrets/$PROJECT_SECRET_ENV_BASENAME" PROJECT_SECRET_ENV_PATH="$PROJECT_OVERLAY_DIR/secrets/$PROJECT_SECRET_ENV_BASENAME"
GENERATED_OVERLAY_DIR="$STATE_DIR/generated-overlay" GENERATED_OVERLAY_DIR="$STATE_DIR/generated-overlay"
APP_REPO_DIR="$(realpath "$APP_REPO_DIR")"
APP_KUSTOMIZE_DIR="$APP_REPO_DIR/$APP_REPO_KUSTOMIZE_PATH"
if [[ "$BOOTSTRAP_DELIVERY_MODE" != "forgejo-actions" ]]; then if [[ "$BOOTSTRAP_DELIVERY_MODE" != "forgejo-actions" ]]; then
require docker require docker
fi fi
if [[ ! -d "$APP_REPO_DIR/.git" ]]; then
echo "missing app repository at $APP_REPO_DIR" >&2
exit 1
fi
if [[ ! -f "$APP_KUSTOMIZE_DIR/kustomization.yaml" ]]; then
echo "missing app kustomization at $APP_KUSTOMIZE_DIR/kustomization.yaml" >&2
exit 1
fi
if [[ -n "${TAILSCALE_AUTH_KEY:-}" && "$TF_ADMIN_CIDR_BLOCKS" == '[]' && "$BOOTSTRAP_ALLOW_PUBLIC_ADMIN_FALLBACK" == "1" ]]; then if [[ -n "${TAILSCALE_AUTH_KEY:-}" && "$TF_ADMIN_CIDR_BLOCKS" == '[]' && "$BOOTSTRAP_ALLOW_PUBLIC_ADMIN_FALLBACK" == "1" ]]; then
OPERATOR_PUBLIC_IP="$(curl -fsS https://api.ipify.org || true)" OPERATOR_PUBLIC_IP="$(curl -fsS https://api.ipify.org || true)"
if [[ "$OPERATOR_PUBLIC_IP" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then if [[ "$OPERATOR_PUBLIC_IP" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
@ -198,15 +176,6 @@ yaml.safe_dump(config, open(dst, 'w'), sort_keys=False)
PY PY
mkdir -p "$PROJECT_OVERLAY_DIR/secrets" "$GENERATED_OVERLAY_DIR" mkdir -p "$PROJECT_OVERLAY_DIR/secrets" "$GENERATED_OVERLAY_DIR"
OBSERVABILITY_SECRET_ENV_PATH="$PROJECT_OVERLAY_DIR/secrets/observability.env"
if [[ -z "${GRAFANA_ADMIN_PASSWORD:-}" ]]; then
GRAFANA_ADMIN_PASSWORD="$(python3 - <<'PY'
import secrets
print(secrets.token_urlsafe(24))
PY
)"
echo "GRAFANA_ADMIN_PASSWORD not provided; generated a random bootstrap password for Grafana admin user '$GRAFANA_ADMIN_USERNAME'" >&2
fi
cat > "$PROJECT_SECRET_ENV_PATH" <<EOF cat > "$PROJECT_SECRET_ENV_PATH" <<EOF
NEAR_INTENTS_API_KEY=$NEAR_INTENTS_API_KEY NEAR_INTENTS_API_KEY=$NEAR_INTENTS_API_KEY
EOF EOF
@ -214,11 +183,6 @@ cat > "$PROJECT_OVERLAY_DIR/secrets/forgejo.env" <<EOF
root_url=$FORGEJO_ROOT_URL root_url=$FORGEJO_ROOT_URL
domain=$FORGEJO_DOMAIN domain=$FORGEJO_DOMAIN
EOF EOF
cat > "$OBSERVABILITY_SECRET_ENV_PATH" <<EOF
grafana_admin_user=$GRAFANA_ADMIN_USERNAME
grafana_admin_password=$GRAFANA_ADMIN_PASSWORD
grafana_root_url=$GRAFANA_ROOT_URL
EOF
python3 - <<PY python3 - <<PY
import os import os
from pathlib import Path from pathlib import Path
@ -233,39 +197,26 @@ platform_base = (root / "../../platform/base").resolve()
project_base = (root / project_kustomize_path).resolve() if project_kustomize_path else None project_base = (root / project_kustomize_path).resolve() if project_kustomize_path else None
project_secret_env = (root / "secrets" / project_secret_env_basename).resolve() project_secret_env = (root / "secrets" / project_secret_env_basename).resolve()
forgejo_secret_env = (root / "secrets" / "forgejo.env").resolve() forgejo_secret_env = (root / "secrets" / "forgejo.env").resolve()
observability_secret_env = (root / "secrets" / "observability.env").resolve() platform_resources = [
platform_base / "namespace.yaml",
platform_base / "forgejo.yaml",
platform_base / "forgejo-rbac.yaml",
platform_base / "forgejo-runner.yaml",
platform_base / "registry.yaml",
platform_base / "ingress.yaml",
platform_base / "cluster-issuers.yaml",
platform_base / "coredns.yaml",
]
resources = [os.path.relpath(platform_base, generated_root)] resources = [os.path.relpath(path, generated_root) for path in platform_resources]
if project_base: if project_base:
resources.append(os.path.relpath(project_base, generated_root)) resources.append(os.path.relpath(project_base, generated_root))
generated_root.mkdir(parents=True, exist_ok=True) generated_root.mkdir(parents=True, exist_ok=True)
project_secret_env_rel = Path(project_secret_env.name) project_secret_env_rel = Path(project_secret_env.name)
forgejo_secret_env_rel = Path(forgejo_secret_env.name) forgejo_secret_env_rel = Path(forgejo_secret_env.name)
observability_secret_env_rel = Path(observability_secret_env.name)
(generated_root / project_secret_env_rel).write_text(project_secret_env.read_text()) (generated_root / project_secret_env_rel).write_text(project_secret_env.read_text())
(generated_root / forgejo_secret_env_rel).write_text(forgejo_secret_env.read_text()) (generated_root / forgejo_secret_env_rel).write_text(forgejo_secret_env.read_text())
if observability_secret_env.exists():
(generated_root / observability_secret_env_rel).write_text(
observability_secret_env.read_text()
)
secret_generator_entries = [
f" - name: {project_secret_name}\n"
f" namespace: {project_namespace}\n"
f" envs:\n"
f" - {project_secret_env_rel}\n",
" - name: forgejo-secrets\n"
" namespace: forgejo\n"
" envs:\n"
f" - {forgejo_secret_env_rel}\n",
]
if observability_secret_env.exists():
secret_generator_entries.append(
" - name: observability-secrets\n"
" namespace: observability\n"
" envs:\n"
f" - {observability_secret_env_rel}\n"
)
(generated_root / "kustomization.yaml").write_text( (generated_root / "kustomization.yaml").write_text(
"""apiVersion: kustomize.config.k8s.io/v1beta1 """apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
@ -277,102 +228,26 @@ resources:
- path: issuer-email.patch.yaml - path: issuer-email.patch.yaml
- path: storage-class.patch.yaml - path: storage-class.patch.yaml
secretGenerator: secretGenerator:
""" - name: {project_secret_name}
+ "".join(secret_generator_entries) namespace: {project_namespace}
+ """generatorOptions: envs:
- {project_secret_env_rel}
- name: forgejo-secrets
namespace: forgejo
envs:
- {forgejo_secret_env_rel}
generatorOptions:
disableNameSuffixHash: true disableNameSuffixHash: true
""" """.format(
project_secret_name=project_secret_name,
project_namespace=project_namespace,
project_secret_env_rel=project_secret_env_rel,
forgejo_secret_env_rel=forgejo_secret_env_rel,
)
) )
(generated_root / "storage-class.patch.yaml").write_text((root / "storage-class.patch.yaml").read_text()) (generated_root / "storage-class.patch.yaml").write_text((root / "storage-class.patch.yaml").read_text())
(generated_root / "issuer-email.patch.yaml").write_text(f'''apiVersion: cert-manager.io/v1\nkind: ClusterIssuer\nmetadata:\n name: letsencrypt-staging\nspec:\n acme:\n email: {"$LETSENCRYPT_EMAIL"}\n---\napiVersion: cert-manager.io/v1\nkind: ClusterIssuer\nmetadata:\n name: letsencrypt-production\nspec:\n acme:\n email: {"$LETSENCRYPT_EMAIL"}\n''') (generated_root / "issuer-email.patch.yaml").write_text(f'''apiVersion: cert-manager.io/v1\nkind: ClusterIssuer\nmetadata:\n name: letsencrypt-staging\nspec:\n acme:\n email: {"$LETSENCRYPT_EMAIL"}\n---\napiVersion: cert-manager.io/v1\nkind: ClusterIssuer\nmetadata:\n name: letsencrypt-production\nspec:\n acme:\n email: {"$LETSENCRYPT_EMAIL"}\n''')
(generated_root / "ingress-hosts.patch.yaml").write_text(f'''apiVersion: networking.k8s.io/v1 (generated_root / "ingress-hosts.patch.yaml").write_text(f'''apiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: forgejo\n namespace: forgejo\nspec:\n tls:\n - hosts:\n - {"$FORGEJO_DOMAIN"}\n secretName: forgejo-tls\n rules:\n - host: {"$FORGEJO_DOMAIN"}\n---\napiVersion: networking.k8s.io/v1\nkind: Ingress\nmetadata:\n name: registry\n namespace: registry\nspec:\n tls:\n - hosts:\n - {"$REGISTRY_DOMAIN"}\n secretName: registry-tls\n rules:\n - host: {"$REGISTRY_DOMAIN"}\n''')
kind: Ingress
metadata:
name: forgejo
namespace: forgejo
spec:
tls:
- hosts:
- {"$FORGEJO_DOMAIN"}
secretName: forgejo-tls
rules:
- host: {"$FORGEJO_DOMAIN"}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: forgejo
port:
number: 3000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: registry
namespace: registry
spec:
tls:
- hosts:
- {"$REGISTRY_DOMAIN"}
secretName: registry-tls
rules:
- host: {"$REGISTRY_DOMAIN"}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: registry
port:
number: 5000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana
namespace: observability
spec:
tls:
- hosts:
- {"$GRAFANA_DOMAIN"}
secretName: grafana-tls
rules:
- host: {"$GRAFANA_DOMAIN"}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
number: 3000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: headlamp
namespace: observability
spec:
tls:
- hosts:
- {"$HEADLAMP_DOMAIN"}
secretName: headlamp-tls
rules:
- host: {"$HEADLAMP_DOMAIN"}
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: headlamp
port:
number: 80
''')
PY PY
kubectl apply -f "$ROOT_DIR/deploy/k8s/platform/base/namespace.yaml" kubectl apply -f "$ROOT_DIR/deploy/k8s/platform/base/namespace.yaml"
@ -390,36 +265,11 @@ kubectl -n cert-manager delete deployment cert-manager cert-manager-webhook cert
kubectl apply -f "$ROOT_DIR/deploy/k8s/platform/base/cert-manager.yaml" kubectl apply -f "$ROOT_DIR/deploy/k8s/platform/base/cert-manager.yaml"
kubectl wait --for=condition=Established --timeout=180s crd/certificates.cert-manager.io kubectl wait --for=condition=Established --timeout=180s crd/certificates.cert-manager.io
kubectl wait --for=condition=Established --timeout=180s crd/clusterissuers.cert-manager.io kubectl wait --for=condition=Established --timeout=180s crd/clusterissuers.cert-manager.io
kubectl apply -k "$GENERATED_OVERLAY_DIR" kubectl apply -k "$PROJECT_OVERLAY_DIR"
kubectl -n forgejo rollout status deployment/forgejo --timeout=300s kubectl -n forgejo rollout status deployment/forgejo --timeout=300s
kubectl -n registry rollout status deployment/registry --timeout=300s kubectl -n registry rollout status deployment/registry --timeout=300s
kubectl -n observability rollout status deployment/loki --timeout=300s
kubectl -n observability rollout status deployment/grafana --timeout=300s
kubectl -n observability rollout status deployment/headlamp --timeout=300s
kubectl -n observability rollout status daemonset/promtail --timeout=300s
kubectl apply -k "$APP_KUSTOMIZE_DIR"
kubectl -n "$PROJECT_NAMESPACE" rollout status deployment/redpanda --timeout=300s kubectl -n "$PROJECT_NAMESPACE" rollout status deployment/redpanda --timeout=300s
kubectl -n "$PROJECT_NAMESPACE" wait --for=condition=Complete --timeout=300s job/redpanda-topic-bootstrap
HEADLAMP_ADMIN_TOKEN=""
for attempt in $(seq 1 60); do
HEADLAMP_ADMIN_TOKEN="$(kubectl -n observability get secret headlamp-admin-token -o jsonpath='{.data.token}' 2>/dev/null | base64 -d 2>/dev/null || true)"
if [[ -n "$HEADLAMP_ADMIN_TOKEN" ]]; then
break
fi
if (( attempt == 1 || attempt % 6 == 0 )); then
echo "waiting for headlamp admin token (${attempt}/60)..."
fi
sleep 2
done
if [[ -z "$HEADLAMP_ADMIN_TOKEN" ]]; then
echo "warning: headlamp admin token not available yet; read secret headlamp-admin-token manually if needed" >&2
elif [[ -n "${HEADLAMP_ADMIN_TOKEN_PASS:-}" ]]; then
store_secret_to_pass "$HEADLAMP_ADMIN_TOKEN_PASS" "$HEADLAMP_ADMIN_TOKEN"
echo "stored headlamp admin token in pass: $HEADLAMP_ADMIN_TOKEN_PASS"
fi
forgejo_admin_user_b64=$(printf '%s' "$FORGEJO_ADMIN_USERNAME" | base64 | tr -d '\n') forgejo_admin_user_b64=$(printf '%s' "$FORGEJO_ADMIN_USERNAME" | base64 | tr -d '\n')
forgejo_admin_pass_b64=$(printf '%s' "$FORGEJO_ADMIN_PASSWORD" | base64 | tr -d '\n') forgejo_admin_pass_b64=$(printf '%s' "$FORGEJO_ADMIN_PASSWORD" | base64 | tr -d '\n')
@ -499,8 +349,7 @@ wait_for_url "$FORGEJO_BOOTSTRAP_URL" "Forgejo bootstrap URL" 60 2
if [[ "$BOOTSTRAP_DELIVERY_MODE" == "forgejo-actions" ]]; then if [[ "$BOOTSTRAP_DELIVERY_MODE" == "forgejo-actions" ]]; then
FORGEJO_ADMIN_API_TOKEN="$(kubectl -n forgejo exec deploy/forgejo -- /bin/bash --noprofile --norc -lc "su-exec git /usr/local/bin/forgejo admin user generate-access-token --config /data/gitea/conf/app.ini --username '$FORGEJO_ADMIN_USERNAME' --token-name bootstrap-$(date +%s) --scopes read:user,read:repository,write:repository,write:user --raw" | tr -d '\r\n')" FORGEJO_ADMIN_API_TOKEN="$(kubectl -n forgejo exec deploy/forgejo -- /bin/bash --noprofile --norc -lc "su-exec git /usr/local/bin/forgejo admin user generate-access-token --config /data/gitea/conf/app.ini --username '$FORGEJO_ADMIN_USERNAME' --token-name bootstrap-$(date +%s) --scopes read:user,read:repository,write:repository,write:user --raw" | tr -d '\r\n')"
forgejo_bootstrap_args=(
infra_bootstrap_args=(
--forgejo-url "$FORGEJO_BOOTSTRAP_URL" --forgejo-url "$FORGEJO_BOOTSTRAP_URL"
--token "$FORGEJO_ADMIN_API_TOKEN" --token "$FORGEJO_ADMIN_API_TOKEN"
--admin-username "$FORGEJO_ADMIN_USERNAME" --admin-username "$FORGEJO_ADMIN_USERNAME"
@ -513,102 +362,29 @@ if [[ "$BOOTSTRAP_DELIVERY_MODE" == "forgejo-actions" ]]; then
--project-name "$PROJECT_NAME" --project-name "$PROJECT_NAME"
--project-namespace "$PROJECT_NAMESPACE" --project-namespace "$PROJECT_NAMESPACE"
--project-deployments "${PROJECT_DEPLOYMENTS// /,}" --project-deployments "${PROJECT_DEPLOYMENTS// /,}"
--project-path "$PROJECT_REPO_PATH"
) )
if [[ "$FORGEJO_REPO_PRIVATE" == "true" ]]; then if [[ "$FORGEJO_REPO_PRIVATE" == "true" ]]; then
infra_bootstrap_args+=(--repo-private) forgejo_bootstrap_args+=(--repo-private)
fi fi
python3 "$ROOT_DIR/scripts/hetzner/forgejo-bootstrap.py" "${infra_bootstrap_args[@]}" python3 "$ROOT_DIR/scripts/hetzner/forgejo-bootstrap.py" "${forgejo_bootstrap_args[@]}"
FORGEJO_PUSH_URL_BASE="$FORGEJO_BOOTSTRAP_URL" bash "$ROOT_DIR/scripts/hetzner/seed-forgejo-repo.sh" FORGEJO_PUSH_URL_BASE="$FORGEJO_BOOTSTRAP_URL" bash "$ROOT_DIR/scripts/hetzner/seed-forgejo-repo.sh"
app_bootstrap_args=(
--forgejo-url "$FORGEJO_BOOTSTRAP_URL"
--token "$FORGEJO_ADMIN_API_TOKEN"
--admin-username "$FORGEJO_ADMIN_USERNAME"
--repo-owner "$APP_FORGEJO_REPO_OWNER"
--repo-name "$APP_FORGEJO_REPO_NAME"
--kubeconfig "$KUBECONFIG_PATH"
--registry-username "$REGISTRY_USERNAME"
--registry-password "$REGISTRY_PASSWORD"
--registry-host "$REGISTRY_DOMAIN"
--project-name "$PROJECT_NAME"
--project-namespace "$PROJECT_NAMESPACE"
--project-deployments "${PROJECT_DEPLOYMENTS// /,}"
--project-path .
)
if [[ "$APP_FORGEJO_REPO_PRIVATE" == "true" ]]; then
app_bootstrap_args+=(--repo-private)
fi
python3 "$ROOT_DIR/scripts/hetzner/forgejo-bootstrap.py" "${app_bootstrap_args[@]}"
wait_for_url "$FORGEJO_ROOT_URL" "Forgejo public URL" 180 5 wait_for_url "$FORGEJO_ROOT_URL" "Forgejo public URL" 180 5
wait_for_http_status "https://$REGISTRY_DOMAIN/v2/" "registry public URL" '200|401' 180 5 wait_for_http_status "https://$REGISTRY_DOMAIN/v2/" "registry public URL" '200|401' 180 5
APP_COMMIT_SHA="$(git -C "$APP_REPO_DIR" rev-parse HEAD)"
APP_BUILD_JOB="image-build-${APP_COMMIT_SHA:0:12}"
FORGEJO_PUSH_URL_BASE="$FORGEJO_BOOTSTRAP_URL" \
SOURCE_REPO_DIR="$APP_REPO_DIR" \
FORGEJO_REPO_OWNER="$APP_FORGEJO_REPO_OWNER" \
FORGEJO_REPO_NAME="$APP_FORGEJO_REPO_NAME" \
bash "$ROOT_DIR/scripts/hetzner/seed-forgejo-repo.sh"
for attempt in $(seq 1 120); do
if kubectl -n "$PROJECT_NAMESPACE" get job "$APP_BUILD_JOB" >/dev/null 2>&1; then
break
fi
if (( attempt == 1 || attempt % 6 == 0 )); then
echo "waiting for app build job $APP_BUILD_JOB (${attempt}/120)..."
fi
sleep 5
done
if ! kubectl -n "$PROJECT_NAMESPACE" get job "$APP_BUILD_JOB" >/dev/null 2>&1; then
echo "app build job did not appear: $APP_BUILD_JOB" >&2
exit 1
fi
kubectl -n "$PROJECT_NAMESPACE" wait --for=condition=Complete --timeout=1200s "job/$APP_BUILD_JOB"
kubectl -n "$PROJECT_NAMESPACE" logs "job/$APP_BUILD_JOB"
else else
docker build -t "$BOOTSTRAP_IMAGE" "$APP_REPO_DIR" docker build -t "$BOOTSTRAP_IMAGE" "$ROOT_DIR"
docker save "$BOOTSTRAP_IMAGE" \ docker save "$BOOTSTRAP_IMAGE" \
| ssh -i "$SSH_PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "$SSH_TARGET" 'sudo k3s ctr images import -' | ssh -i "$SSH_PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "$SSH_TARGET" 'sudo k3s ctr images import -'
for deployment in $PROJECT_DEPLOYMENTS; do for deployment in $PROJECT_DEPLOYMENTS; do
kubectl -n "$PROJECT_NAMESPACE" set image "deployment/${deployment}" app="$BOOTSTRAP_IMAGE" kubectl -n "$PROJECT_NAMESPACE" set image "deployment/${deployment}" app="$BOOTSTRAP_IMAGE"
done done
for deployment in $PROJECT_DEPLOYMENTS; do
kubectl -n "$PROJECT_NAMESPACE" rollout status "deployment/${deployment}" --timeout=180s
done
fi fi
for deployment in $PROJECT_DEPLOYMENTS; do
kubectl -n "$PROJECT_NAMESPACE" rollout status "deployment/${deployment}" --timeout=300s
done
DURABLE_K3S_API_URL="$K3S_API_URL"
DURABLE_INSECURE_SKIP_TLS_VERIFY=0
if [[ "$USE_SSH_TUNNEL_FOR_K3S" == "1" ]]; then
if [[ -n "${TAILSCALE_CONTROL_PLANE_HOSTNAME:-}" ]]; then
DURABLE_K3S_API_URL="https://${TAILSCALE_CONTROL_PLANE_HOSTNAME}:6443"
elif [[ "$TF_ADMIN_CIDR_BLOCKS" != '[]' ]]; then
DURABLE_K3S_API_URL="https://${SERVER_IP}:6443"
DURABLE_INSECURE_SKIP_TLS_VERIFY=1
fi
fi
python3 - "$KUBECONFIG_PATH" "$DURABLE_K3S_API_URL" "$DURABLE_INSECURE_SKIP_TLS_VERIFY" <<'PY'
import sys
import yaml
path, server, insecure = sys.argv[1], sys.argv[2], sys.argv[3] == '1'
config = yaml.safe_load(open(path))
cluster = config['clusters'][0]['cluster']
cluster['server'] = server
if insecure:
cluster.pop('certificate-authority-data', None)
cluster['insecure-skip-tls-verify'] = True
else:
cluster.pop('insecure-skip-tls-verify', None)
yaml.safe_dump(config, open(path, 'w'), sort_keys=False)
PY
K3S_API_URL="$DURABLE_K3S_API_URL"
echo "bootstrap complete" echo "bootstrap complete"
echo "project_name=$PROJECT_NAME" echo "project_name=$PROJECT_NAME"
echo "project_namespace=$PROJECT_NAMESPACE" echo "project_namespace=$PROJECT_NAMESPACE"
@ -621,9 +397,5 @@ echo "ci_kubeconfig=$CI_KUBECONFIG_PATH"
echo "bootstrap_delivery_mode=$BOOTSTRAP_DELIVERY_MODE" echo "bootstrap_delivery_mode=$BOOTSTRAP_DELIVERY_MODE"
echo "forgejo_url=$FORGEJO_ROOT_URL" echo "forgejo_url=$FORGEJO_ROOT_URL"
echo "forgejo_repo=${FORGEJO_ROOT_URL%/}/$FORGEJO_REPO_OWNER/$FORGEJO_REPO_NAME" echo "forgejo_repo=${FORGEJO_ROOT_URL%/}/$FORGEJO_REPO_OWNER/$FORGEJO_REPO_NAME"
echo "app_repo=${FORGEJO_ROOT_URL%/}/$APP_FORGEJO_REPO_OWNER/$APP_FORGEJO_REPO_NAME"
echo "registry_url=https://$REGISTRY_DOMAIN" echo "registry_url=https://$REGISTRY_DOMAIN"
echo "grafana_url=$GRAFANA_ROOT_URL"
echo "headlamp_url=https://$HEADLAMP_DOMAIN/"
echo "headlamp_token_pass=${HEADLAMP_ADMIN_TOKEN_PASS:-}"
echo "dns_provider=${CLOUDFLARE_API_TOKEN:+cloudflare}${PORKBUN_API_KEY:+porkbun}" echo "dns_provider=${CLOUDFLARE_API_TOKEN:+cloudflare}${PORKBUN_API_KEY:+porkbun}"

View file

@ -4,8 +4,7 @@ set -euo pipefail
: "${CLOUDFLARE_API_TOKEN:?set CLOUDFLARE_API_TOKEN}" : "${CLOUDFLARE_API_TOKEN:?set CLOUDFLARE_API_TOKEN}"
: "${CLOUDFLARE_ZONE_ID:?set CLOUDFLARE_ZONE_ID}" : "${CLOUDFLARE_ZONE_ID:?set CLOUDFLARE_ZONE_ID}"
: "${BASE_DOMAIN:?set BASE_DOMAIN}" : "${BASE_DOMAIN:?set BASE_DOMAIN}"
: "${PUBLIC_DOMAIN:=$BASE_DOMAIN}" : "${SERVER_IP:?set SERVER_IP}"
: "${DNS_MODE:=upsert}"
api() { api() {
curl -fsS -X "$1" "https://api.cloudflare.com/client/v4$2" \ curl -fsS -X "$1" "https://api.cloudflare.com/client/v4$2" \
@ -14,21 +13,15 @@ api() {
${3:+--data "$3"} ${3:+--data "$3"}
} }
lookup_record_id() {
local type="$1"
local name="$2"
curl -fsS "https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records?type=$type&name=$name" \
-H "Authorization: Bearer $CLOUDFLARE_API_TOKEN" \
-H 'Content-Type: application/json' | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["result"][0]["id"] if d.get("result") else "")'
}
upsert_record() { upsert_record() {
local type="$1" local type="$1"
local name="$2" local name="$2"
local content="$3" local content="$3"
local proxied="${4:-false}" local proxied="${4:-false}"
local existing_id local existing_id
existing_id=$(lookup_record_id "$type" "$name") existing_id=$(curl -fsS "https://api.cloudflare.com/client/v4/zones/$CLOUDFLARE_ZONE_ID/dns_records?type=$type&name=$name" \
-H "Authorization: Bearer $CLOUDFLARE_API_TOKEN" \
-H 'Content-Type: application/json' | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["result"][0]["id"] if d.get("result") else "")')
local payload local payload
payload=$(printf '{"type":"%s","name":"%s","content":"%s","ttl":120,"proxied":%s}' "$type" "$name" "$content" "$proxied") payload=$(printf '{"type":"%s","name":"%s","content":"%s","ttl":120,"proxied":%s}' "$type" "$name" "$content" "$proxied")
@ -40,54 +33,8 @@ upsert_record() {
fi fi
} }
delete_record() { upsert_record A "$BASE_DOMAIN" "$SERVER_IP" false
local type="$1" upsert_record A "git.$BASE_DOMAIN" "$SERVER_IP" false
local name="$2" upsert_record A "registry.$BASE_DOMAIN" "$SERVER_IP" false
local existing_id
existing_id=$(lookup_record_id "$type" "$name")
if [[ -n "$existing_id" ]]; then echo "cloudflare dns updated for $BASE_DOMAIN, git.$BASE_DOMAIN, registry.$BASE_DOMAIN"
api DELETE "/zones/$CLOUDFLARE_ZONE_ID/dns_records/$existing_id" >/dev/null
echo "deleted $type $name"
else
echo "skipped missing $type $name"
fi
}
records=(
"$PUBLIC_DOMAIN"
"git.$PUBLIC_DOMAIN"
"registry.$PUBLIC_DOMAIN"
"grafana.$PUBLIC_DOMAIN"
"headlamp.$PUBLIC_DOMAIN"
)
ROOT_RECORD="${records[0]}"
GIT_RECORD="${records[1]}"
REGISTRY_RECORD="${records[2]}"
GRAFANA_RECORD="${records[3]}"
HEADLAMP_RECORD="${records[4]}"
case "$DNS_MODE" in
upsert)
: "${SERVER_IP:?set SERVER_IP}"
upsert_record A "$ROOT_RECORD" "$SERVER_IP" false
upsert_record A "$GIT_RECORD" "$SERVER_IP" false
upsert_record A "$REGISTRY_RECORD" "$SERVER_IP" false
upsert_record A "$GRAFANA_RECORD" "$SERVER_IP" false
upsert_record A "$HEADLAMP_RECORD" "$SERVER_IP" false
echo "cloudflare dns updated for ${records[*]}"
;;
delete)
delete_record A "$ROOT_RECORD"
delete_record A "$GIT_RECORD"
delete_record A "$REGISTRY_RECORD"
delete_record A "$GRAFANA_RECORD"
delete_record A "$HEADLAMP_RECORD"
echo "cloudflare dns cleanup finished for ${records[*]}"
;;
*)
echo "unsupported DNS_MODE: $DNS_MODE" >&2
exit 1
;;
esac

View file

@ -11,36 +11,18 @@ require python3
: "${PORKBUN_API_KEY:?set PORKBUN_API_KEY}" : "${PORKBUN_API_KEY:?set PORKBUN_API_KEY}"
: "${PORKBUN_SECRET_API_KEY:?set PORKBUN_SECRET_API_KEY}" : "${PORKBUN_SECRET_API_KEY:?set PORKBUN_SECRET_API_KEY}"
: "${BASE_DOMAIN:?set BASE_DOMAIN}" : "${BASE_DOMAIN:?set BASE_DOMAIN}"
: "${PUBLIC_DOMAIN:=$BASE_DOMAIN}" : "${SERVER_IP:?set SERVER_IP}"
: "${DNS_MODE:=upsert}"
api_base="https://api.porkbun.com/api/json/v3" api_base="https://api.porkbun.com/api/json/v3"
if [[ "$PUBLIC_DOMAIN" == "$BASE_DOMAIN" ]]; then root_name=""
root_name="" git_name="git"
elif [[ "$PUBLIC_DOMAIN" == *".$BASE_DOMAIN" ]]; then registry_name="registry"
root_name="${PUBLIC_DOMAIN%.${BASE_DOMAIN}}"
else
echo "PUBLIC_DOMAIN must equal BASE_DOMAIN or be a subdomain of BASE_DOMAIN" >&2
exit 1
fi
if [[ -n "$root_name" ]]; then
git_name="git.$root_name"
registry_name="registry.$root_name"
grafana_name="grafana.$root_name"
headlamp_name="headlamp.$root_name"
else
git_name="git"
registry_name="registry"
grafana_name="grafana"
headlamp_name="headlamp"
fi
payload() { payload() {
local name="$1" local content="$1"
local content="$2" printf '{"apikey":"%s","secretapikey":"%s","content":"%s","ttl":"600"}' \
printf '{"apikey":"%s","secretapikey":"%s","name":"%s","type":"A","content":"%s","ttl":"600"}' \ "$PORKBUN_API_KEY" "$PORKBUN_SECRET_API_KEY" "$content"
"$PORKBUN_API_KEY" "$PORKBUN_SECRET_API_KEY" "$name" "$content"
} }
list_records() { list_records() {
@ -49,9 +31,13 @@ list_records() {
--data "{\"apikey\":\"$PORKBUN_API_KEY\",\"secretapikey\":\"$PORKBUN_SECRET_API_KEY\"}" --data "{\"apikey\":\"$PORKBUN_API_KEY\",\"secretapikey\":\"$PORKBUN_SECRET_API_KEY\"}"
} }
lookup_record_id() { upsert_a_record() {
local fqdn="$1" local name="$1"
python3 - "$fqdn" "$(list_records)" <<'PY' local fqdn="$BASE_DOMAIN"
[[ -n "$name" ]] && fqdn="$name.$BASE_DOMAIN"
local record_id
record_id=$(python3 - "$fqdn" "$(list_records)" <<'PY'
import json,sys import json,sys
fqdn=sys.argv[1] fqdn=sys.argv[1]
data=json.loads(sys.argv[2]) data=json.loads(sys.argv[2])
@ -60,31 +46,17 @@ for rec in data.get('records', []):
print(rec.get('id','')) print(rec.get('id',''))
break break
PY PY
} )
upsert_a_record() {
local name="$1"
local fqdn="$BASE_DOMAIN"
[[ -n "$name" ]] && fqdn="$name.$BASE_DOMAIN"
local record_id
record_id=$(lookup_record_id "$fqdn")
local body
body=$(printf '{"apikey":"%s","secretapikey":"%s","name":"%s","type":"A","content":"%s","ttl":"600"}' \
"$PORKBUN_API_KEY" "$PORKBUN_SECRET_API_KEY" "$name" "$SERVER_IP")
if [[ -n "$record_id" ]]; then if [[ -n "$record_id" ]]; then
local delete_body curl -fsS "$api_base/dns/edit/$BASE_DOMAIN/$record_id" \
delete_body=$(printf '{"apikey":"%s","secretapikey":"%s"}' "$PORKBUN_API_KEY" "$PORKBUN_SECRET_API_KEY")
curl -fsS "$api_base/dns/delete/$BASE_DOMAIN/$record_id" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
--data "$delete_body" >/dev/null --data "$(payload "$SERVER_IP")" >/dev/null
curl -fsS "$api_base/dns/create/$BASE_DOMAIN" \
-H 'Content-Type: application/json' \
--data "$body" >/dev/null
echo "updated A $fqdn -> $SERVER_IP" echo "updated A $fqdn -> $SERVER_IP"
else else
local body
body=$(printf '{"apikey":"%s","secretapikey":"%s","name":"%s","type":"A","content":"%s","ttl":"600"}' \
"$PORKBUN_API_KEY" "$PORKBUN_SECRET_API_KEY" "$name" "$SERVER_IP")
curl -fsS "$api_base/dns/create/$BASE_DOMAIN" \ curl -fsS "$api_base/dns/create/$BASE_DOMAIN" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
--data "$body" >/dev/null --data "$body" >/dev/null
@ -92,46 +64,8 @@ upsert_a_record() {
fi fi
} }
delete_a_record() { upsert_a_record "$root_name"
local name="$1" upsert_a_record "$git_name"
local fqdn="$BASE_DOMAIN" upsert_a_record "$registry_name"
[[ -n "$name" ]] && fqdn="$name.$BASE_DOMAIN"
local record_id echo "porkbun dns updated for $BASE_DOMAIN, git.$BASE_DOMAIN, registry.$BASE_DOMAIN"
record_id=$(lookup_record_id "$fqdn")
if [[ -n "$record_id" ]]; then
local body
body=$(printf '{"apikey":"%s","secretapikey":"%s"}' "$PORKBUN_API_KEY" "$PORKBUN_SECRET_API_KEY")
curl -fsS "$api_base/dns/delete/$BASE_DOMAIN/$record_id" \
-H 'Content-Type: application/json' \
--data "$body" >/dev/null
echo "deleted A $fqdn"
else
echo "skipped missing A $fqdn"
fi
}
case "$DNS_MODE" in
upsert)
: "${SERVER_IP:?set SERVER_IP}"
upsert_a_record "$root_name"
upsert_a_record "$git_name"
upsert_a_record "$registry_name"
upsert_a_record "$grafana_name"
upsert_a_record "$headlamp_name"
echo "porkbun dns updated for $PUBLIC_DOMAIN, git.$PUBLIC_DOMAIN, registry.$PUBLIC_DOMAIN, grafana.$PUBLIC_DOMAIN, headlamp.$PUBLIC_DOMAIN"
;;
delete)
delete_a_record "$root_name"
delete_a_record "$git_name"
delete_a_record "$registry_name"
delete_a_record "$grafana_name"
delete_a_record "$headlamp_name"
echo "porkbun dns cleanup finished for $PUBLIC_DOMAIN, git.$PUBLIC_DOMAIN, registry.$PUBLIC_DOMAIN, grafana.$PUBLIC_DOMAIN, headlamp.$PUBLIC_DOMAIN"
;;
*)
echo "unsupported DNS_MODE: $DNS_MODE" >&2
exit 1
;;
esac

View file

@ -38,6 +38,7 @@ TF_VARS=(
-var "hcloud_token=$HCLOUD_TOKEN" -var "hcloud_token=$HCLOUD_TOKEN"
-var "ssh_public_key=$SSH_PUBLIC_KEY" -var "ssh_public_key=$SSH_PUBLIC_KEY"
-var "public_domain=$PUBLIC_DOMAIN" -var "public_domain=$PUBLIC_DOMAIN"
-var "bootstrap_repo_url=local-bootstrap"
-var "tailscale_auth_key=${TAILSCALE_AUTH_KEY:-}" -var "tailscale_auth_key=${TAILSCALE_AUTH_KEY:-}"
-var "tailscale_control_plane_hostname=$TAILSCALE_CONTROL_PLANE_HOSTNAME" -var "tailscale_control_plane_hostname=$TAILSCALE_CONTROL_PLANE_HOSTNAME"
) )

View file

@ -113,7 +113,6 @@ def main():
parser.add_argument('--project-name', required=True) parser.add_argument('--project-name', required=True)
parser.add_argument('--project-namespace', required=True) parser.add_argument('--project-namespace', required=True)
parser.add_argument('--project-deployments', required=True) parser.add_argument('--project-deployments', required=True)
parser.add_argument('--project-path', required=True)
args = parser.parse_args() args = parser.parse_args()
client = ForgejoClient(args.forgejo_url, args.admin_username, args.admin_password, args.token) client = ForgejoClient(args.forgejo_url, args.admin_username, args.admin_password, args.token)
@ -134,7 +133,6 @@ def main():
client.upsert_variable(args.repo_owner, args.repo_name, 'PROJECT_NAME', args.project_name) client.upsert_variable(args.repo_owner, args.repo_name, 'PROJECT_NAME', args.project_name)
client.upsert_variable(args.repo_owner, args.repo_name, 'PROJECT_NAMESPACE', args.project_namespace) client.upsert_variable(args.repo_owner, args.repo_name, 'PROJECT_NAMESPACE', args.project_namespace)
client.upsert_variable(args.repo_owner, args.repo_name, 'PROJECT_DEPLOYMENTS', args.project_deployments) client.upsert_variable(args.repo_owner, args.repo_name, 'PROJECT_DEPLOYMENTS', args.project_deployments)
client.upsert_variable(args.repo_owner, args.repo_name, 'PROJECT_PATH', args.project_path)
print('upserted repo action variables') print('upserted repo action variables')

View file

@ -11,20 +11,14 @@ resolve_secret_var FORGEJO_ADMIN_PASSWORD required
: "${FORGEJO_ROOT_URL:?set FORGEJO_ROOT_URL}" : "${FORGEJO_ROOT_URL:?set FORGEJO_ROOT_URL}"
: "${FORGEJO_PUSH_URL_BASE:=$FORGEJO_ROOT_URL}" : "${FORGEJO_PUSH_URL_BASE:=$FORGEJO_ROOT_URL}"
: "${FORGEJO_ADMIN_USERNAME:?set FORGEJO_ADMIN_USERNAME}" : "${FORGEJO_ADMIN_USERNAME:?set FORGEJO_ADMIN_USERNAME}"
: "${SOURCE_REPO_DIR:=$ROOT_DIR}"
: "${FORGEJO_REPO_OWNER:=$FORGEJO_ADMIN_USERNAME}" : "${FORGEJO_REPO_OWNER:=$FORGEJO_ADMIN_USERNAME}"
: "${FORGEJO_REPO_NAME:=$(basename "$SOURCE_REPO_DIR")}" : "${FORGEJO_REPO_NAME:=$(basename "$ROOT_DIR")}"
: "${FORGEJO_PUSH_REMOTE_NAME:=forgejo}" : "${FORGEJO_PUSH_REMOTE_NAME:=forgejo}"
: "${FORGEJO_PUSH_REF:=HEAD:refs/heads/main}" : "${FORGEJO_PUSH_REF:=HEAD:refs/heads/main}"
: "${FORGEJO_REPO_HTTP_USERNAME:=$FORGEJO_ADMIN_USERNAME}" : "${FORGEJO_REPO_HTTP_USERNAME:=$FORGEJO_ADMIN_USERNAME}"
require git require git
if [[ ! -d "$SOURCE_REPO_DIR/.git" ]]; then
echo "SOURCE_REPO_DIR is not a git repository: $SOURCE_REPO_DIR" >&2
exit 1
fi
urlencode() { urlencode() {
python3 -c 'import sys, urllib.parse; print(urllib.parse.quote(sys.argv[1], safe=""))' "$1" python3 -c 'import sys, urllib.parse; print(urllib.parse.quote(sys.argv[1], safe=""))' "$1"
} }
@ -39,11 +33,11 @@ if [[ -n "${FORGEJO_ADMIN_PASSWORD:-}" ]]; then
auth_remote_url="${auth_remote_url/http:\/\//http://${encoded_username}:${encoded_password}@}" auth_remote_url="${auth_remote_url/http:\/\//http://${encoded_username}:${encoded_password}@}"
auth_remote_url+="/${FORGEJO_REPO_OWNER}/${FORGEJO_REPO_NAME}.git" auth_remote_url+="/${FORGEJO_REPO_OWNER}/${FORGEJO_REPO_NAME}.git"
fi fi
current_remote_url="$(git -C "$SOURCE_REPO_DIR" remote get-url "$FORGEJO_PUSH_REMOTE_NAME" 2>/dev/null || true)" current_remote_url="$(git remote get-url "$FORGEJO_PUSH_REMOTE_NAME" 2>/dev/null || true)"
if [[ -z "$current_remote_url" ]]; then if [[ -z "$current_remote_url" ]]; then
git -C "$SOURCE_REPO_DIR" remote add "$FORGEJO_PUSH_REMOTE_NAME" "$auth_remote_url" git remote add "$FORGEJO_PUSH_REMOTE_NAME" "$auth_remote_url"
elif [[ "$current_remote_url" != "$auth_remote_url" ]]; then elif [[ "$current_remote_url" != "$auth_remote_url" ]]; then
git -C "$SOURCE_REPO_DIR" remote set-url "$FORGEJO_PUSH_REMOTE_NAME" "$auth_remote_url" git remote set-url "$FORGEJO_PUSH_REMOTE_NAME" "$auth_remote_url"
fi fi
askpass_script="$(mktemp)" askpass_script="$(mktemp)"
@ -62,6 +56,6 @@ GIT_TERMINAL_PROMPT=0 \
GIT_ASKPASS="$askpass_script" \ GIT_ASKPASS="$askpass_script" \
FORGEJO_ADMIN_USERNAME="$FORGEJO_ADMIN_USERNAME" \ FORGEJO_ADMIN_USERNAME="$FORGEJO_ADMIN_USERNAME" \
FORGEJO_ADMIN_PASSWORD="$FORGEJO_ADMIN_PASSWORD" \ FORGEJO_ADMIN_PASSWORD="$FORGEJO_ADMIN_PASSWORD" \
git -C "$SOURCE_REPO_DIR" push "$FORGEJO_PUSH_REMOTE_NAME" "$FORGEJO_PUSH_REF" git push "$FORGEJO_PUSH_REMOTE_NAME" "$FORGEJO_PUSH_REF"
echo "seeded ${remote_url} from ${SOURCE_REPO_DIR}" echo "seeded ${remote_url}"

View file

@ -0,0 +1,42 @@
import process from 'node:process';
import { createConsumer } from '../bus/kafka/consumer.mjs';
import { logStatus } from '../core/log.mjs';
import { parseEventMessage } from '../core/event-envelope.mjs';
import { assertTradeResult } from '../core/schemas.mjs';
import { loadConfig } from '../lib/config.mjs';
const config = loadConfig();
const consumer = await createConsumer({
groupId: `${config.kafkaConsumerGroupExecutor}-results-view`,
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
await consumer.subscribe({ topic: config.kafkaTopicExecTradeResult, fromBeginning: false });
logStatus(`result consumer subscribed to ${config.kafkaTopicExecTradeResult}`);
process.on('SIGINT', async () => {
await consumer.disconnect();
process.exit(0);
});
process.on('SIGTERM', async () => {
await consumer.disconnect();
process.exit(0);
});
await consumer.run({
eachMessage: async ({ message }) => {
if (!message.value) return;
let event;
try {
event = parseEventMessage(message.value.toString());
} catch {
logStatus('result consumer received non-JSON message; skipping');
return;
}
assertTradeResult(event);
const payload = event.payload;
console.log(`[result] command_id=${payload.command_id} quote_id=${payload.quote_id} status=${payload.status} result_code=${payload.result_code || 'n/a'}`);
},
});

View file

@ -0,0 +1,93 @@
import process from 'node:process';
import { createConsumer } from '../bus/kafka/consumer.mjs';
import { createProducer } from '../bus/kafka/producer.mjs';
import { buildEventEnvelope, parseEventMessage } from '../core/event-envelope.mjs';
import { createExecutorStateStore } from '../core/executor-state-store.mjs';
import { logStatus } from '../core/log.mjs';
import { assertExecuteTradeCommand, assertTradeResult } from '../core/schemas.mjs';
import { loadConfig } from '../lib/config.mjs';
const config = loadConfig();
const consumer = await createConsumer({
groupId: config.kafkaConsumerGroupExecutor,
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
const producer = await createProducer({
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
const stateStore = createExecutorStateStore({ stateDir: config.executorStateDir });
await consumer.subscribe({ topic: config.kafkaTopicCmdExecuteTrade, fromBeginning: false });
logStatus(`dummy executor subscribed to ${config.kafkaTopicCmdExecuteTrade} as ${config.kafkaConsumerGroupExecutor}`);
logStatus(`dummy executor will publish results to ${config.kafkaTopicExecTradeResult}; state_dir=${config.executorStateDir}`);
async function shutdown() {
await consumer.disconnect();
await producer.disconnect();
process.exit(0);
}
process.on('SIGINT', shutdown);
process.on('SIGTERM', shutdown);
await consumer.run({
eachMessage: async ({ message }) => {
if (!message.value) return;
let event;
try {
event = parseEventMessage(message.value.toString());
} catch {
logStatus('dummy executor received non-JSON message; skipping');
return;
}
assertExecuteTradeCommand(event);
const payload = event.payload;
const commandId = payload.command_id;
const existing = stateStore.get(commandId);
if (existing?.status === 'completed') {
logStatus(`dummy executor skipping duplicate command_id=${commandId}`);
return;
}
stateStore.markProcessing(commandId, {
idempotency_key: payload.idempotency_key,
execution_key: payload.execution_key,
quote_id: payload.quote_id,
});
const pair = `${payload.asset_in} -> ${payload.asset_out}`;
const result = buildEventEnvelope({
source: 'dummy-executor',
venue: event.venue || 'near-intents',
eventType: 'trade_result',
eventId: `exec-${commandId}`,
observedAt: event.observed_at,
payload: {
command_id: commandId,
idempotency_key: payload.idempotency_key,
execution_key: payload.execution_key,
quote_id: payload.quote_id,
status: 'simulated_sent',
result_code: existing?.status === 'processing' ? 'recovered_inflight' : 'sent',
note: 'dummy executor placeholder result',
},
});
assertTradeResult(result);
await producer.sendJson(config.kafkaTopicExecTradeResult, result, { key: payload.execution_key });
stateStore.markCompleted(commandId, {
idempotency_key: payload.idempotency_key,
execution_key: payload.execution_key,
quote_id: payload.quote_id,
result_event_id: result.event_id,
});
console.log(`[dummy-executor] result emitted ${pair} quote_id=${payload.quote_id} command_id=${commandId} status=simulated_sent`);
},
});

View file

@ -0,0 +1,75 @@
import process from 'node:process';
import { createConsumer } from '../bus/kafka/consumer.mjs';
import { createProducer } from '../bus/kafka/producer.mjs';
import { logStatus } from '../core/log.mjs';
import { loadConfig } from '../lib/config.mjs';
import { buildEventEnvelope, parseEventMessage } from '../core/event-envelope.mjs';
import { assertExecuteTradeCommand, assertNormalizedSwapDemand } from '../core/schemas.mjs';
const config = loadConfig();
const consumer = await createConsumer({
groupId: config.kafkaConsumerGroupDummy,
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
const producer = await createProducer({
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
await consumer.subscribe({ topic: config.kafkaTopicNormSwapDemand, fromBeginning: false });
logStatus(`dummy reactor subscribed to ${config.kafkaTopicNormSwapDemand} as ${config.kafkaConsumerGroupDummy}`);
logStatus(`dummy reactor will publish commands to ${config.kafkaTopicCmdExecuteTrade}`);
async function shutdown() {
await consumer.disconnect();
await producer.disconnect();
process.exit(0);
}
process.on('SIGINT', shutdown);
process.on('SIGTERM', shutdown);
await consumer.run({
eachMessage: async ({ message }) => {
if (!message.value) return;
let event;
try {
event = parseEventMessage(message.value.toString());
} catch {
logStatus('dummy reactor received non-JSON message; skipping');
return;
}
assertNormalizedSwapDemand(event);
const payload = event.payload;
const pair = `${payload.asset_in} -> ${payload.asset_out}`;
const quoteId = payload.quote_id;
const commandId = `cmd-${quoteId}`;
const command = buildEventEnvelope({
source: 'dummy-reactor',
venue: event.venue || 'near-intents',
eventType: 'execute_trade',
eventId: commandId,
observedAt: event.observed_at,
payload: {
command_id: commandId,
idempotency_key: `${event.venue || 'near-intents'}:${quoteId}`,
execution_key: `${event.venue || 'near-intents'}:${payload.asset_in}->${payload.asset_out}`,
quote_id: quoteId,
asset_in: payload.asset_in,
asset_out: payload.asset_out,
amount_in: payload.amount_in,
amount_out: payload.amount_out,
reason: 'dummy reactor placeholder decision',
},
});
assertExecuteTradeCommand(command);
await producer.sendJson(config.kafkaTopicCmdExecuteTrade, command, { key: command.payload.execution_key });
console.log(`[dummy-reactor] command emitted ${pair} quote_id=${quoteId} command_id=${commandId}`);
},
});

View file

@ -0,0 +1,41 @@
import process from 'node:process';
import { createProducer } from '../bus/kafka/producer.mjs';
import { logStatus } from '../core/log.mjs';
import { parsePairFilter } from '../core/pair-filter.mjs';
import { loadConfig } from '../lib/config.mjs';
import { startNearIntentsWs } from '../venues/near-intents/ws.mjs';
const config = loadConfig();
const pairFilter = parsePairFilter(process.argv.slice(2));
if (!config.nearIntentsApiKey) {
console.error('Missing NEAR_INTENTS_API_KEY in env or .env');
process.exit(1);
}
const producer = await createProducer({
brokers: config.kafkaBrokers,
clientId: config.kafkaClientId,
});
logStatus(`kafka producer connected; raw_topic=${config.kafkaTopicRawNearIntentsQuote}; normalized_topic=${config.kafkaTopicNormSwapDemand}`);
if (pairFilter) logStatus(`pair filter enabled: ${pairFilter[0]} <-> ${pairFilter[1]}`);
process.on('SIGINT', async () => {
await producer.disconnect();
process.exit(0);
});
process.on('SIGTERM', async () => {
await producer.disconnect();
process.exit(0);
});
await startNearIntentsWs({
apiKey: config.nearIntentsApiKey,
wsUrl: config.nearIntentsWsUrl,
pairFilter,
producer,
rawTopic: config.kafkaTopicRawNearIntentsQuote,
normalizedTopic: config.kafkaTopicNormSwapDemand,
});

View file

@ -0,0 +1,16 @@
import { Kafka } from 'kafkajs';
function createKafka({ brokers = ['127.0.0.1:9092'], clientId = 'unrip' } = {}) {
return new Kafka({ clientId, brokers });
}
export async function createConsumer({ groupId, ...options }) {
const consumer = createKafka(options).consumer({ groupId });
await consumer.connect();
return {
subscribe: (options) => consumer.subscribe(options),
run: (options) => consumer.run(options),
disconnect: () => consumer.disconnect(),
};
}

View file

@ -0,0 +1,21 @@
import { Kafka } from 'kafkajs';
function createKafka({ brokers = ['127.0.0.1:9092'], clientId = 'unrip' } = {}) {
return new Kafka({ clientId, brokers });
}
export async function createProducer(options = {}) {
const producer = createKafka(options).producer();
await producer.connect();
return {
async sendJson(topic, event, { key = event?.event_id ?? event?.key ?? null } = {}) {
await producer.send({
topic,
messages: [{ key, value: JSON.stringify(event) }],
});
},
async disconnect() {
await producer.disconnect();
},
};
}

View file

@ -0,0 +1,41 @@
import crypto from 'node:crypto';
export function buildEventEnvelope({
eventType,
venue,
payload,
source,
eventId = crypto.randomUUID(),
schemaVersion = 1,
observedAt = null,
ingestedAt = new Date(),
raw = null,
}) {
if (!eventType) throw new Error('Missing eventType');
if (!venue) throw new Error('Missing venue');
if (payload == null) throw new Error('Missing payload');
return {
event_id: String(eventId),
event_type: String(eventType),
venue: String(venue),
source: source ? String(source) : null,
schema_version: Number(schemaVersion),
observed_at: toIsoStringOrNull(observedAt),
ingested_at: toIsoStringOrNull(ingestedAt) ?? new Date().toISOString(),
payload,
raw,
};
}
export function parseEventMessage(value) {
const event = typeof value === 'string' ? JSON.parse(value) : value;
if (!event || typeof event !== 'object') throw new Error('Event must be an object');
return event;
}
function toIsoStringOrNull(value) {
if (value == null) return null;
const date = value instanceof Date ? value : new Date(value);
return Number.isNaN(date.getTime()) ? null : date.toISOString();
}

View file

@ -0,0 +1,49 @@
import fs from 'node:fs';
import path from 'node:path';
export function createExecutorStateStore({ stateDir, fileName = 'commands.json' }) {
fs.mkdirSync(stateDir, { recursive: true });
const filePath = path.join(stateDir, fileName);
const state = loadState(filePath);
return {
get(commandId) {
return state[commandId] || null;
},
markProcessing(commandId, metadata) {
state[commandId] = {
...(state[commandId] || {}),
...metadata,
status: 'processing',
updated_at: new Date().toISOString(),
};
persistState(filePath, state);
return state[commandId];
},
markCompleted(commandId, metadata) {
state[commandId] = {
...(state[commandId] || {}),
...metadata,
status: 'completed',
updated_at: new Date().toISOString(),
};
persistState(filePath, state);
return state[commandId];
},
};
}
function loadState(filePath) {
if (!fs.existsSync(filePath)) return {};
try {
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
} catch {
return {};
}
}
function persistState(filePath, state) {
const tempPath = `${filePath}.tmp`;
fs.writeFileSync(tempPath, JSON.stringify(state, null, 2));
fs.renameSync(tempPath, filePath);
}

31
src/core/log.mjs Normal file
View file

@ -0,0 +1,31 @@
export function logStatus(message) {
const time = new Date().toISOString();
console.error(`[${time}] ${message}`);
}
export function startIdleHeartbeat({
label,
getLastActivityAt,
getStatus,
idleAfterMs = 30_000,
checkEveryMs = 5_000,
}) {
let lastHeartbeatAt = 0;
const timer = setInterval(() => {
const lastActivityAt = getLastActivityAt();
const idleForMs = Date.now() - lastActivityAt;
if (idleForMs < idleAfterMs) return;
if (Date.now() - lastHeartbeatAt < idleAfterMs) return;
const seconds = Math.floor(idleForMs / 1000);
const suffix = getStatus ? `; ${getStatus()}` : '';
logStatus(`${label} idle ${seconds}s${suffix}`);
lastHeartbeatAt = Date.now();
}, checkEveryMs);
if (typeof timer.unref === 'function') timer.unref();
return () => clearInterval(timer);
}

17
src/core/pair-filter.mjs Normal file
View file

@ -0,0 +1,17 @@
export function parsePairFilter(argv) {
const idx = argv.indexOf('--pair');
if (idx === -1) return null;
const raw = argv[idx + 1];
if (!raw || !raw.includes('->')) {
throw new Error("Use --pair 'asset_a->asset_b'");
}
const [a, b] = raw.split('->').map((x) => x.trim().toLowerCase());
return [a, b];
}
export function matchesPairFilter(assetIn, assetOut, pairFilter) {
if (!pairFilter) return true;
const x = assetIn.toLowerCase();
const y = assetOut.toLowerCase();
return (x === pairFilter[0] && y === pairFilter[1]) || (x === pairFilter[1] && y === pairFilter[0]);
}

63
src/core/schemas.mjs Normal file
View file

@ -0,0 +1,63 @@
function requireString(value, field) {
if (typeof value !== 'string' || value.length === 0) throw new Error(`Missing ${field}`);
}
function requireObject(value, field) {
if (!value || typeof value !== 'object' || Array.isArray(value)) throw new Error(`Missing ${field}`);
}
export function assertEventEnvelope(event) {
requireObject(event, 'event');
requireString(event.event_id, 'event.event_id');
requireString(event.event_type, 'event.event_type');
requireString(event.venue, 'event.venue');
if (event.source != null) requireString(event.source, 'event.source');
if (typeof event.schema_version !== 'number') throw new Error('Missing event.schema_version');
requireString(event.ingested_at, 'event.ingested_at');
requireObject(event.payload, 'event.payload');
return event;
}
export function assertNormalizedSwapDemand(event) {
assertEventEnvelope(event);
if (event.event_type !== 'swap_demand') throw new Error(`Unexpected event_type: ${event.event_type}`);
const payload = event.payload;
requireString(payload.quote_id, 'payload.quote_id');
requireString(payload.asset_in, 'payload.asset_in');
requireString(payload.asset_out, 'payload.asset_out');
if (payload.amount_in != null) requireString(payload.amount_in, 'payload.amount_in');
if (payload.amount_out != null) requireString(payload.amount_out, 'payload.amount_out');
if (payload.ttl_ms != null) requireString(payload.ttl_ms, 'payload.ttl_ms');
return event;
}
export function assertExecuteTradeCommand(event) {
assertEventEnvelope(event);
if (event.event_type !== 'execute_trade') throw new Error(`Unexpected event_type: ${event.event_type}`);
const payload = event.payload;
requireString(payload.command_id, 'payload.command_id');
requireString(payload.idempotency_key, 'payload.idempotency_key');
requireString(payload.execution_key, 'payload.execution_key');
requireString(payload.quote_id, 'payload.quote_id');
requireString(payload.asset_in, 'payload.asset_in');
requireString(payload.asset_out, 'payload.asset_out');
if (payload.amount_in != null) requireString(payload.amount_in, 'payload.amount_in');
if (payload.amount_out != null) requireString(payload.amount_out, 'payload.amount_out');
return event;
}
export function assertTradeResult(event) {
assertEventEnvelope(event);
if (event.event_type !== 'trade_result') throw new Error(`Unexpected event_type: ${event.event_type}`);
const payload = event.payload;
requireString(payload.command_id, 'payload.command_id');
requireString(payload.idempotency_key, 'payload.idempotency_key');
requireString(payload.execution_key, 'payload.execution_key');
requireString(payload.quote_id, 'payload.quote_id');
requireString(payload.status, 'payload.status');
if (payload.result_code != null) requireString(payload.result_code, 'payload.result_code');
return event;
}

54
src/lib/config.mjs Normal file
View file

@ -0,0 +1,54 @@
import { loadDotenv } from './env.mjs';
const DEFAULTS = {
nearIntentsWsUrl: 'wss://solver-relay-v2.chaindefuser.com/ws',
kafkaBrokers: ['127.0.0.1:9092'],
kafkaClientId: 'unrip',
kafkaTopicRawNearIntentsQuote: 'raw.near_intents.quote',
kafkaTopicNormSwapDemand: 'norm.swap_demand',
kafkaTopicCmdExecuteTrade: 'cmd.execute_trade',
kafkaTopicExecTradeResult: 'exec.trade_result',
kafkaConsumerGroupDummy: 'dummy-reactor-v1',
kafkaConsumerGroupExecutor: 'dummy-executor-v1',
executorStateDir: './var/executor-state',
};
function splitCsv(value) {
return String(value || '')
.split(',')
.map((part) => part.trim())
.filter(Boolean);
}
export function loadConfig({ envPath = '.env' } = {}) {
// Runtime config stays environment-first so the same app build works for:
// - local `.env` development
// - Docker/Compose
// - Kubernetes Secret/ConfigMap injection during Hetzner bootstrap
// This is what lets the local workstation bootstrap provision infra and then
// deploy the exact same image into k3s without app-level config rewrites.
loadDotenv(envPath);
return {
nearIntentsApiKey: process.env.NEAR_INTENTS_API_KEY || '',
nearIntentsWsUrl: process.env.NEAR_INTENTS_WS_URL || DEFAULTS.nearIntentsWsUrl,
kafkaBrokers: splitCsv(process.env.KAFKA_BROKERS).length
? splitCsv(process.env.KAFKA_BROKERS)
: DEFAULTS.kafkaBrokers,
kafkaClientId: process.env.KAFKA_CLIENT_ID || DEFAULTS.kafkaClientId,
kafkaTopicRawNearIntentsQuote:
process.env.KAFKA_TOPIC_RAW_NEAR_INTENTS_QUOTE || DEFAULTS.kafkaTopicRawNearIntentsQuote,
kafkaTopicNormSwapDemand:
process.env.KAFKA_TOPIC_NORM_SWAP_DEMAND || DEFAULTS.kafkaTopicNormSwapDemand,
kafkaTopicCmdExecuteTrade:
process.env.KAFKA_TOPIC_CMD_EXECUTE_TRADE || DEFAULTS.kafkaTopicCmdExecuteTrade,
kafkaTopicExecTradeResult:
process.env.KAFKA_TOPIC_EXEC_TRADE_RESULT || DEFAULTS.kafkaTopicExecTradeResult,
kafkaConsumerGroupDummy:
process.env.KAFKA_CONSUMER_GROUP_DUMMY || DEFAULTS.kafkaConsumerGroupDummy,
kafkaConsumerGroupExecutor:
process.env.KAFKA_CONSUMER_GROUP_EXECUTOR || DEFAULTS.kafkaConsumerGroupExecutor,
executorStateDir:
process.env.EXECUTOR_STATE_DIR || DEFAULTS.executorStateDir,
};
}

17
src/lib/env.mjs Normal file
View file

@ -0,0 +1,17 @@
import fs from 'node:fs';
// `.env` loading is a local/dev convenience only.
// In the repo-driven Hetzner+k3s bootstrap flow, Kubernetes injects runtime
// environment variables from Secrets/ConfigMaps and already-present process.env
// values always win over anything on disk.
export function loadDotenv(path = '.env') {
if (!fs.existsSync(path)) return;
const lines = fs.readFileSync(path, 'utf8').split(/\r?\n/);
for (const raw of lines) {
const line = raw.trim();
if (!line || line.startsWith('#') || !line.includes('=')) continue;
const [key, ...rest] = line.split('=');
const value = rest.join('=').trim().replace(/^['"]|['"]$/g, '');
if (!(key.trim() in process.env)) process.env[key.trim()] = value;
}
}

View file

@ -0,0 +1,5 @@
import { startNearIntentsWs } from './ws.mjs';
export function startNearIntentsIngest(options) {
return startNearIntentsWs(options);
}

View file

@ -0,0 +1,68 @@
import { buildEventEnvelope } from '../../core/event-envelope.mjs';
export function buildNearIntentsRawEnvelope(message, { ingestedAt = new Date() } = {}) {
const raw = isRecord(message) ? message : {};
const quoteId = first(raw, ['quote_id', 'quoteRequestId', 'request_id', 'id', 'quote_hash']);
const occurredAt = first(raw, ['created_at', 'createdAt', 'timestamp', 'ts']);
return buildEventEnvelope({
source: 'near-intents.ws',
venue: 'near-intents',
eventType: 'near_intents_quote_raw',
eventId: quoteId || `near-intents-raw-${ingestedAt.getTime()}`,
observedAt: occurredAt,
ingestedAt,
payload: { message: raw },
raw,
});
}
export function buildNearIntentsQuoteEnvelope(message, { ingestedAt = new Date() } = {}) {
const raw = isRecord(message) ? message : {};
const payload = normalizeNearIntentsQuote(raw);
if (!payload) return null;
const occurredAt = first(raw, ['created_at', 'createdAt', 'timestamp', 'ts']);
return buildEventEnvelope({
source: 'near-intents.ws',
venue: 'near-intents',
eventType: 'swap_demand',
eventId: payload.quote_id,
observedAt: occurredAt,
ingestedAt,
payload,
raw,
});
}
export function normalizeNearIntentsQuote(message) {
const quoteId = first(message, ['quote_id', 'quoteRequestId', 'request_id', 'id']);
const assetIn = first(message, ['defuse_asset_identifier_in', 'sellToken', 'asset_in']);
const assetOut = first(message, ['defuse_asset_identifier_out', 'buyToken', 'asset_out']);
if (!quoteId || !assetIn || !assetOut) return null;
return {
quote_id: String(quoteId),
asset_in: String(assetIn),
asset_out: String(assetOut),
amount_in: stringify(first(message, ['exact_amount_in', 'sellAmount', 'amount_in'])),
amount_out: stringify(first(message, ['exact_amount_out', 'buyAmount', 'amount_out', 'expectedOut', 'quoted_amount_out'])),
ttl_ms: stringify(first(message, ['min_deadline_ms', 'ttl_ms', 'deadline_ms'])),
};
}
function first(obj, keys) {
for (const key of keys) {
if (obj[key] != null) return obj[key];
}
return null;
}
function stringify(value) {
return value == null ? null : String(value);
}
function isRecord(value) {
return !!value && typeof value === 'object' && !Array.isArray(value);
}

View file

@ -0,0 +1,167 @@
import { matchesPairFilter } from '../../core/pair-filter.mjs';
import { logStatus, startIdleHeartbeat } from '../../core/log.mjs';
import { assertNormalizedSwapDemand } from '../../core/schemas.mjs';
import { buildNearIntentsQuoteEnvelope, buildNearIntentsRawEnvelope } from './normalize.mjs';
const DEFAULT_WS_URL = 'wss://solver-relay-v2.chaindefuser.com/ws';
const QUOTE_SUB_ID = 1;
const QUOTE_STATUS_SUB_ID = 2;
export async function startNearIntentsWs({
apiKey,
wsUrl = DEFAULT_WS_URL,
pairFilter,
producer,
rawTopic,
normalizedTopic,
onPublish = defaultOnPublish,
}) {
if (!apiKey) throw new Error('Missing NEAR_INTENTS_API_KEY');
let quoteSubscriptionId = null;
let quoteStatusSubscriptionId = null;
let lastStatusAt = Date.now();
let publishedCount = 0;
let publishLocked = false;
function connect() {
const ws = new WebSocket(wsUrl, {
headers: { Authorization: `Bearer ${apiKey}` },
});
ws.addEventListener('open', () => {
logStatus('near-intents connected');
ws.send(JSON.stringify({ jsonrpc: '2.0', id: QUOTE_SUB_ID, method: 'subscribe', params: ['quote'] }));
ws.send(JSON.stringify({ jsonrpc: '2.0', id: QUOTE_STATUS_SUB_ID, method: 'subscribe', params: ['quote_status'] }));
});
ws.addEventListener('message', async (event) => {
lastStatusAt = Date.now();
const text = typeof event.data === 'string' ? event.data : Buffer.from(event.data).toString('utf8');
let payload;
try {
payload = JSON.parse(text);
} catch {
return;
}
if (payload?.id === QUOTE_SUB_ID) {
quoteSubscriptionId = extractSubscriptionId(payload.result);
return;
}
if (payload?.id === QUOTE_STATUS_SUB_ID) {
quoteStatusSubscriptionId = extractSubscriptionId(payload.result);
return;
}
const eventFrame = extractQuoteEventFrame(payload);
if (!eventFrame) return;
const { subscription, merged } = eventFrame;
if (quoteStatusSubscriptionId && subscription === quoteStatusSubscriptionId) return;
if (quoteSubscriptionId && subscription && subscription !== quoteSubscriptionId) return;
if (publishLocked) return;
const rawEnvelope = buildNearIntentsRawEnvelope(merged);
const envelope = buildNearIntentsQuoteEnvelope(merged);
if (!envelope) return;
assertNormalizedSwapDemand(envelope);
const assetIn = envelope.payload?.asset_in;
const assetOut = envelope.payload?.asset_out;
if (!assetIn || !assetOut) return;
if (!matchesPairFilter(assetIn, assetOut, pairFilter)) return;
publishLocked = true;
try {
await producer.sendJson(rawTopic, rawEnvelope, { key: rawEnvelope.event_id });
await producer.sendJson(normalizedTopic, envelope, { key: envelope.payload.quote_id });
publishedCount += 1;
onPublish(envelope, publishedCount);
} catch (error) {
logStatus(`kafka publish failed: ${error.message || 'unknown error'}`);
} finally {
publishLocked = false;
}
});
ws.addEventListener('close', () => {
logStatus('near-intents disconnected; reconnecting in 2s');
setTimeout(connect, 2000);
});
ws.addEventListener('error', (err) => {
logStatus(`near-intents socket error: ${err.message || 'unknown error'}`);
});
}
startIdleHeartbeat({
label: 'near-intents',
getLastActivityAt: () => lastStatusAt,
getStatus: () => `published=${publishedCount}`,
});
connect();
}
function extractSubscriptionId(result) {
if (typeof result === 'string') return result;
if (result && typeof result === 'object') {
return result.subscription || result.subscription_id || result.subscriber_id || null;
}
return null;
}
function extractQuoteEventFrame(payload) {
const candidates = [];
if (payload?.method === 'event' && payload?.params) {
candidates.push(payload.params);
}
if (payload?.result && typeof payload.result === 'object') {
candidates.push(payload.result);
}
if (payload && typeof payload === 'object') {
candidates.push(payload);
}
for (const candidate of candidates) {
const data = candidate?.data;
const metadata = candidate?.metadata;
const merged = isRecord(data) || isRecord(metadata)
? { ...(isRecord(metadata) ? metadata : {}), ...(isRecord(data) ? data : {}) }
: candidate;
if (!isRecord(merged)) continue;
if (!looksLikeQuotePayload(merged)) continue;
return {
subscription: candidate?.subscription ?? null,
merged,
};
}
return null;
}
function looksLikeQuotePayload(payload) {
return Boolean(
payload.quote_hash
|| payload.quote_id
|| payload.defuse_asset_identifier_in
|| payload.defuse_asset_identifier_out
|| payload.asset_in
|| payload.asset_out,
);
}
function isRecord(value) {
return Boolean(value) && typeof value === 'object' && !Array.isArray(value);
}
function defaultOnPublish() {}

View file

@ -1,37 +0,0 @@
import pathlib
import re
import subprocess
import unittest
ROOT = pathlib.Path(__file__).resolve().parents[1]
class NtfyManifestTest(unittest.TestCase):
def test_platform_kustomization_owns_internal_ntfy_utility_resources(self):
source = (ROOT / 'deploy/k8s/platform/base/kustomization.yaml').read_text()
self.assertIn('utility-namespace.yaml', source)
self.assertIn('ntfy.yaml', source)
def test_ntfy_manifest_is_internal_clusterip_service_with_health_checks(self):
source = (ROOT / 'deploy/k8s/platform/base/ntfy.yaml').read_text()
self.assertIn('namespace: utility', source)
self.assertIn('image: binwiederhier/ntfy:v2.21.0', source)
self.assertRegex(source, r'kind: Service[\s\S]*type: ClusterIP')
self.assertIn('path: /v1/health', source)
self.assertIn('base-url: http://ntfy.utility.svc.cluster.local', source)
self.assertNotIn('kind: Ingress', source)
def test_overlay_render_contains_cluster_owned_ntfy_without_public_ingress(self):
rendered = subprocess.check_output(
['kubectl', 'kustomize', 'deploy/k8s/overlays/hetzner-single-node'],
cwd=ROOT,
text=True,
)
self.assertIn('name: utility', rendered)
self.assertIn('name: ntfy', rendered)
self.assertIn('image: binwiederhier/ntfy:v2.21.0', rendered)
self.assertNotRegex(rendered, re.compile(r'kind: Ingress[\s\S]*name: ntfy'))
if __name__ == '__main__':
unittest.main()