commit 284e465588d53875e30a955b6c62db91c5fbee19
Author: philipp <klein.philipp@gmail.com>
Date:   Sat Apr 18 11:23:28 2026 +0200

    Prepare Kubernetes orderbooks deployment

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..d8e90c2
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,26 @@
+.git/
+.venv/
+__pycache__/
+*.pyc
+*.pyo
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+artifacts/
+data/
+reports/
+orchestration/
+.env
+*.env
+rclone.conf
+**/rclone.conf
+*.pem
+*.key
+*.p12
+*.pfx
+id_rsa*
+id_ed25519*
+*mnemonic*
+*wallet*
+*credential*
+*secret*
diff --git a/.forgejo/workflows/deploy.yml b/.forgejo/workflows/deploy.yml
new file mode 100644
index 0000000..67006ca
--- /dev/null
+++ b/.forgejo/workflows/deploy.yml
@@ -0,0 +1,162 @@
+name: deploy
+
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+
+jobs:
+  deploy:
+    runs-on: linux-amd64
+    env:
+      IMAGE_TAG: ${{ github.sha }}
+      REGISTRY_HOST: ${{ vars.REGISTRY_HOST }}
+      PROJECT_NAME: ${{ vars.PROJECT_NAME || 'orderbooks' }}
+      PROJECT_NAMESPACE: ${{ vars.PROJECT_NAMESPACE || 'orderbooks' }}
+      PROJECT_DEPLOYMENTS: ${{ vars.PROJECT_DEPLOYMENTS || 'orderbooks-collector' }}
+      PROJECT_REGISTRY_SECRET_NAME: ${{ vars.PROJECT_REGISTRY_SECRET_NAME || 'orderbooks-registry-creds' }}
+      REPO_CLONE_URL: ${{ github.server_url }}/${{ github.repository }}.git
+    steps:
+      - name: Install tooling
+        run: |
+          if command -v git >/dev/null 2>&1 && command -v kubectl >/dev/null 2>&1 && command -v python3 >/dev/null 2>&1; then
+            exit 0
+          fi
+
+          if command -v apk >/dev/null 2>&1; then
+            apk add --no-cache git kubectl python3
+            exit 0
+          fi
+
+          if command -v apt-get >/dev/null 2>&1; then
+            apt-get update
+            apt-get install -y git curl ca-certificates python3
+            curl -fsSLo /usr/local/bin/kubectl "https://dl.k8s.io/release/$(curl -fsSL https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
+            chmod +x /usr/local/bin/kubectl
+            exit 0
+          fi
+
+          echo "missing git/kubectl/python3 and no supported package manager found" >&2
+          exit 1
+
+      - name: Prepare workspace
+        run: |
+          workspace_root="${RUNNER_TEMP:-/tmp}"
+          workspace_dir="$(mktemp -d "${workspace_root%/}/orderbooks-deploy-XXXXXX")"
+          echo "WORKSPACE_DIR=$workspace_dir" >> "$GITHUB_ENV"
+          echo "runner workspace: $workspace_dir"
+
+      - name: Load kubeconfig
+        run: |
+          mkdir -p "$HOME/.kube"
+          printf '%s' '${{ secrets.KUBECONFIG_B64 }}' | base64 -d > "$HOME/.kube/config"
+          kubectl get ns
+
+      - name: Checkout repo
+        env:
+          REPO_TOKEN: ${{ github.token }}
+        run: |
+          git -c credential.username=oauth2 -c http.extraHeader="Authorization: Bearer ${REPO_TOKEN}" clone --depth=1 "${REPO_CLONE_URL}" "$WORKSPACE_DIR"
+          cd "$WORKSPACE_DIR"
+          current_sha="$(git rev-parse HEAD)"
+          if [ "$current_sha" != "$GITHUB_SHA" ]; then
+            git -c credential.username=oauth2 -c http.extraHeader="Authorization: Bearer ${REPO_TOKEN}" fetch --depth=1 origin "${GITHUB_SHA}"
+            git checkout --detach "${GITHUB_SHA}"
+          else
+            git checkout --detach "$current_sha"
+          fi
+          git rev-parse HEAD
+
+      - name: Resolve deployment settings
+        run: |
+          if [ -z "${REGISTRY_HOST:-}" ]; then
+            echo "REGISTRY_HOST repo variable is required" >&2
+            exit 1
+          fi
+          IMAGE="$REGISTRY_HOST/$PROJECT_NAME:$IMAGE_TAG"
+          BUILD_JOB="image-build-$(printf '%s' "$GITHUB_SHA" | cut -c1-12)"
+          {
+            echo "IMAGE=$IMAGE"
+            echo "BUILD_JOB=$BUILD_JOB"
+          } >> "$GITHUB_ENV"
+
+      - name: Ensure namespace exists
+        run: |
+          kubectl apply -f "$WORKSPACE_DIR/deploy/k8s/base/namespace.yaml"
+
+      - name: Build and push image in-cluster
+        env:
+          REPO_TOKEN: ${{ github.token }}
+        run: |
+          kubectl -n "$PROJECT_NAMESPACE" delete job "$BUILD_JOB" --ignore-not-found=true
+          cat <<EOF | kubectl apply -f -
+          apiVersion: batch/v1
+          kind: Job
+          metadata:
+            name: ${BUILD_JOB}
+            namespace: ${PROJECT_NAMESPACE}
+          spec:
+            backoffLimit: 0
+            ttlSecondsAfterFinished: 3600
+            template:
+              spec:
+                restartPolicy: Never
+                volumes:
+                  - name: workspace
+                    emptyDir: {}
+                  - name: registry-creds
+                    secret:
+                      secretName: ${PROJECT_REGISTRY_SECRET_NAME}
+                      items:
+                        - key: .dockerconfigjson
+                          path: config.json
+                initContainers:
+                  - name: checkout
+                    image: alpine/git:2.47.2
+                    env:
+                      - name: REPO_TOKEN
+                        value: ${REPO_TOKEN}
+                      - name: REPO_CLONE_URL
+                        value: ${REPO_CLONE_URL}
+                      - name: GITHUB_SHA
+                        value: ${GITHUB_SHA}
+                    command: ["/bin/sh", "-lc"]
+                    args:
+                      - >-
+                        git -c credential.username=oauth2 -c http.extraHeader="Authorization: Bearer ${REPO_TOKEN}" clone --depth=1 "${REPO_CLONE_URL}" /workspace &&
+                        cd /workspace &&
+                        git -c credential.username=oauth2 -c http.extraHeader="Authorization: Bearer ${REPO_TOKEN}" fetch --depth=1 origin "${GITHUB_SHA}" &&
+                        git checkout --detach "${GITHUB_SHA}"
+                    volumeMounts:
+                      - name: workspace
+                        mountPath: /workspace
+                containers:
+                  - name: kaniko
+                    image: gcr.io/kaniko-project/executor:v1.23.2-debug
+                    args:
+                      - --context=/workspace
+                      - --dockerfile=/workspace/Dockerfile
+                      - --destination=${IMAGE}
+                      - --cache=false
+                    volumeMounts:
+                      - name: workspace
+                        mountPath: /workspace
+                      - name: registry-creds
+                        mountPath: /kaniko/.docker
+          EOF
+          kubectl -n "$PROJECT_NAMESPACE" wait --for=condition=Complete --timeout=20m "job/$BUILD_JOB"
+          kubectl -n "$PROJECT_NAMESPACE" logs "job/$BUILD_JOB"
+
+      - name: Apply release manifests and wait for rollout
+        run: |
+          kubectl kustomize "$WORKSPACE_DIR/deploy/k8s/base" \
+            | IMAGE="$IMAGE" python3 -c 'import os, sys; sys.stdout.write(sys.stdin.read().replace("registry.doran.133011.xyz/orderbooks:bootstrap", os.environ["IMAGE"]))' \
+            | kubectl apply -f -
+
+          printf '%s' "$PROJECT_DEPLOYMENTS" | tr ',' '\n' \
+            | while IFS= read -r deployment; do
+                [ -n "$deployment" ] || continue
+                kubectl -n "$PROJECT_NAMESPACE" set image "deployment/$deployment" "*=$IMAGE"
+                kubectl -n "$PROJECT_NAMESPACE" rollout status "deployment/$deployment" --timeout=300s
+              done
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8e8cbb7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,43 @@
+# Local runtime data and evidence stay local
+data/
+artifacts/
+reports/
+orchestration/
+
+# Python/cache/build noise
+__pycache__/
+*.py[cod]
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+*.egg-info/
+build/
+dist/
+
+# Environments and local config
+.venv/
+.env
+*.env
+!.dockerignore
+!.gitignore
+
+# Kubernetes/rclone/secret material
+kubeconfig*
+*.kubeconfig
+rclone.conf
+**/rclone.conf
+*.pem
+*.key
+*.p12
+*.pfx
+id_rsa*
+id_ed25519*
+*mnemonic*
+*wallet*
+*credential*
+*secret*
+
+# Editor/OS noise
+.DS_Store
+.idea/
+.vscode/
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..558c87b
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,91 @@
+# Agent Instructions
+
+Project: Cross-Market Live Orderbook Archive
+
+This repository exists to preserve live market microstructure data that is usually lost: order books, spreads, liquidity, depth, timestamps, request metadata, and enough raw context to later decide whether a trading idea was observable, fillable, and reproducible at the time.
+
+The first market is Polymarket. Future markets may include NEAR-related venues and other prediction or crypto markets, but do not build generic multi-market infrastructure before the second market exists.
+
+## Active Collaboration Model
+
+This project uses a two-role workflow:
+
+- `orchestrator`: coordinates checkpoints with the user, keeps scope narrow, records decisions, reviews evidence, states gates, and decides the next smallest step.
+- `builder`: works in a separate session to implement the active checkpoint artifacts, run commands, collect evidence, and write manifests/reports.
+
+The current primary chat session is the `orchestrator`. The orchestrator should not silently become the builder unless the user explicitly asks. The builder should treat `AGENTS.md`, `ROADMAP.md`, `docs/METHODOLOGY.md`, and the active checkpoint report as the durable source of instructions.
+
+Hand-offs between orchestrator and builder must be written to disk under `orchestration/` or `reports/checkpoints/` when they contain decisions, scope changes, endpoint findings, or validation results. Chat-only instructions are not enough for project-critical state.
+
+## Non-Negotiable Rules
+
+1. Preserve raw data first. Raw API and websocket payloads are the source of truth. Derived datasets are secondary and must reference raw files.
+2. No trading. Do not add order placement, signing, private-key handling, wallet logic, strategy execution, or bot behavior.
+3. No secrets in the repo. Never commit API keys, rclone credentials, wallet material, cookies, or private endpoints.
+4. Every checkpoint needs durable evidence on disk: code or docs, config or run instructions, manifest/report, and validation evidence.
+5. Do not claim success without commands, outputs, files, checksums, or real collected data to support the claim.
+6. Do not delete mistakes. If an artifact is wrong, misleading, partial, or deprecated, preserve it and label it with a reason and replacement.
+7. Keep the scope narrow. No dashboard, database, ML, strategy, backtest, or generic framework until the roadmap gate allows it.
+8. Public data only unless a later checkpoint explicitly documents why authenticated public-data access is required.
+9. "Production-ready" is forbidden until the collector has completed a documented 24h soak test with acceptable quality.
+
+## Expected Workflow
+
+For each checkpoint:
+
+1. Define the smallest useful checkpoint.
+2. Build only what is needed for that checkpoint.
+3. Validate with real commands and, when applicable, real public data.
+4. Write a machine-readable manifest and a short markdown note.
+5. State PASS, FAIL, or BLOCKED.
+6. Identify the strongest fake-progress risk.
+7. Recommend the next smallest step.
+8. Stop only when a real user or orchestrator decision is needed.
+
+## Repository Conventions
+
+- `scripts/`: executable probes, discovery scripts, collectors, normalizers, and upload helpers.
+- `config/`: example configuration only. Real secrets and machine-local config stay outside git.
+- `docs/`: durable methodology, data contracts, operational runbooks, and endpoint notes.
+- `orchestration/prompts/`: prompts and templates used by future agents.
+- `data/probes/`: bounded endpoint probe outputs and probe notes.
+- `data/discovery/`: market discovery outputs and manifests.
+- `data/live_sample/`: short sample collector runs.
+- `data/normalized_sample/`: derived sample outputs generated from raw samples.
+- `data/manifests/`: machine-readable manifests for probes, collectors, normalization, uploads, and checkpoints.
+- `reports/`: human-readable checkpoint, soak test, and incident reports.
+- `systemd/`: VPS runtime units when added.
+
+The initial Polymarket implementation should remain simple scripts until the collector works. Introduce `collectors/<market_name>/` only when adding a second market or when duplication proves painful.
+
+## Artifact Status Labels
+
+Every durable artifact should be treated as one of:
+
+- `valid`: current and usable.
+- `partial`: useful but incomplete.
+- `deprecated`: superseded by a newer artifact.
+- `invalid`: known to be wrong or misleading.
+
+When marking an artifact `deprecated` or `invalid`, write a sibling markdown note or manifest entry with:
+
+- original artifact path
+- status
+- reason
+- replacement path, if any
+- labeled_at_utc
+- labeled_by
+
+Do not remove the original artifact unless the user explicitly asks and there is a written reason.
+
+## Adding New Market Connectors Later
+
+Before adding a second market, Polymarket must have working discovery, raw order-book collection, Google Drive offload, and a 24h soak test.
+
+When the gate is met:
+
+1. Create `collectors/<market_name>/` for market-specific code.
+2. Keep shared code minimal and concrete.
+3. Reuse the same raw-first file layout and manifest format.
+4. Document endpoint quirks, timestamp semantics, rate limits, and schema differences in `docs/`.
+5. Avoid abstract base classes until at least two real collectors expose repeated code that is painful to maintain.
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..086f077
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,28 @@
+FROM python:3.12-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    ORDERBOOKS_APP_DIR=/app \
+    ORDERBOOKS_DATA_DIR=/var/lib/orderbooks \
+    ORDERBOOKS_PYTHON=python3
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends bash ca-certificates rclone \
+    && rm -rf /var/lib/apt/lists/* \
+    && groupadd --system --gid 10001 orderbooks \
+    && useradd --system --uid 10001 --gid 10001 --home-dir /var/lib/orderbooks --shell /usr/sbin/nologin orderbooks
+
+WORKDIR /app
+
+COPY AGENTS.md ROADMAP.md ./
+COPY config/ config/
+COPY docs/ docs/
+COPY scripts/ scripts/
+
+RUN chmod +x scripts/*.sh \
+    && mkdir -p /var/lib/orderbooks/discovery /var/lib/orderbooks/raw_orderbooks /var/lib/orderbooks/manifests \
+    && chown -R orderbooks:orderbooks /var/lib/orderbooks /app
+
+USER 10001:10001
+
+CMD ["/bin/bash", "/app/scripts/run_polymarket_collector_loop.sh"]
diff --git a/ROADMAP.md b/ROADMAP.md
new file mode 100644
index 0000000..04e439e
--- /dev/null
+++ b/ROADMAP.md
@@ -0,0 +1,212 @@
+# Roadmap
+
+Project: Cross-Market Live Orderbook Archive
+
+Goal: build a reliable, minimal, always-on archive of live market microstructure data so future research agents can test whether strategies were actually observable, fillable, and reproducible in real time.
+
+The roadmap is checkpoint-driven. Each checkpoint must leave durable artifacts, validation evidence, and an explicit gate result.
+
+## Current Status
+
+- Latest completed checkpoint: Checkpoint 7, Google Drive Offload
+- Latest gate: PASS
+- Next checkpoint: Checkpoint 8, 24h Soak Test Plan
+- Initial market: Polymarket
+- Future market work: gated until Polymarket is stable
+
+## Checkpoint 1: Project Scaffold And Methodology
+
+Goal: create the minimum repository structure and rules that keep future agents on track.
+
+Artifacts:
+
+- `AGENTS.md`
+- `ROADMAP.md`
+- `docs/METHODOLOGY.md`
+- `docs/DATA_CONTRACT.md`
+- `docs/OPERATIONS.md`
+- `orchestration/prompts/`
+
+Requirements:
+
+- Define project goal.
+- Define anti-fake-progress rules.
+- Define raw-first storage policy.
+- Define checkpoint reporting format.
+- Define no-trading/no-private-key policy.
+- Define how to label deprecated or misleading artifacts instead of deleting them.
+- Define how new market connectors should be added later.
+
+Pass condition: the repo contains durable project rules and the next checkpoint is specific enough to execute.
+
+## Checkpoint 2: Polymarket Public Data Source Probe
+
+Goal: determine exactly which public Polymarket endpoints can support live collection.
+
+Questions:
+
+- How to discover active Polymarket markets?
+- How to filter BTC up/down markets?
+- How to resolve conditionId and token IDs?
+- How to fetch current order book for one token?
+- Is there a batch order-book endpoint?
+- Is there a market websocket for order-book updates?
+- Is there a trade websocket or recent trades endpoint?
+- What rate limits are documented or observed?
+- What fields are returned?
+- What timestamps exist?
+
+Artifacts:
+
+- `scripts/probe_polymarket_public_sources.py`
+- `data/probes/polymarket_public_sources_probe_v1.json`
+- `data/probes/polymarket_public_sources_probe_v1.md`
+
+Pass condition: we know the exact endpoint set and can fetch at least one active market metadata record and one current order book.
+
+## Checkpoint 3: Minimal BTC Market Discovery
+
+Goal: build a small script that finds active BTC up/down Polymarket markets and resolves both outcome token IDs.
+
+Artifacts:
+
+- `scripts/discover_polymarket_btc_markets.py`
+- `data/discovery/polymarket_btc_markets_latest.json`
+- `data/discovery/polymarket_btc_markets_manifest.json`
+- `data/discovery/polymarket_btc_markets.md`
+
+Requirements:
+
+- Public endpoints only.
+- No trading.
+- No API keys unless strictly needed for public data.
+- Never store secrets in the repo.
+- Preserve raw metadata responses.
+- Write normalized market records with slug, question, conditionId, token IDs, outcomes, times, status, source, and `fetched_at_utc`.
+
+Pass condition: the script reliably outputs currently active BTC markets with token IDs.
+
+## Checkpoint 4: Minimal Orderbook Snapshot Collector
+
+Goal: collect raw order-book snapshots for active BTC markets at a fixed interval.
+
+Artifacts:
+
+- `scripts/collect_polymarket_orderbooks.py`
+- `config/polymarket_collector.example.yaml`
+- `data/live_sample/...`
+- `data/manifests/orderbook_collector_sample_manifest.json`
+- `docs/POLYMARKET_COLLECTOR.md`
+
+Requirements:
+
+- Collect active BTC markets only.
+- Fetch order books for both outcome tokens.
+- Store raw API responses as gzip JSONL.
+- Add local `collected_at_utc`, collector version, endpoint URL, and request params.
+- Rotate files by hour or run.
+- Include a manifest with timing, markets, request counts, status codes, rows, output files, and checksums.
+- Handle graceful shutdown and rate limits.
+- Do not add a database.
+
+Pass condition: a 5-10 minute sample run creates valid compressed raw snapshots and a manifest.
+
+## Checkpoint 5: Normalized Snapshot Extract
+
+Goal: create a derived normalized dataset from raw snapshots while preserving raw files as source of truth.
+
+Artifacts:
+
+- `scripts/normalize_polymarket_orderbooks.py`
+- `data/normalized_sample/...`
+- `data/manifests/orderbook_normalization_sample_manifest.json`
+- `docs/ORDERBOOK_SCHEMA.md`
+
+Pass condition: a sample raw file can be normalized and basic sanity checks pass.
+
+## Checkpoint 6: VPS Runtime Package
+
+Goal: make the collector deployable on a small VPS.
+
+Artifacts:
+
+- `systemd/polymarket-orderbook-collector.service`
+- `config/polymarket_collector.vps.example.yaml`
+- `scripts/run_polymarket_collector_cycle.sh`
+- `docs/VPS_DEPLOYMENT.md`
+
+Uploader service and timer units are deferred to Checkpoint 7 with Google Drive
+offload. Creating empty uploader units in Checkpoint 6 would be fake progress.
+
+Pass condition: a user can follow docs on a VPS and run the collector.
+
+## Checkpoint 7: Google Drive Offload
+
+Goal: add periodic upload to Google Drive using `rclone`.
+
+Artifacts:
+
+- `scripts/upload_archive_rclone.sh`
+- `config/rclone.example.md`
+- `docs/GOOGLE_DRIVE_OFFLOAD.md`
+- sample upload manifest format
+
+Pass condition: a dry-run and a real small test upload succeed and are documented.
+
+## Checkpoint 8: 24h Soak Test Plan
+
+Goal: run the collector for a real 24h period and validate reliability.
+
+Artifacts:
+
+- `reports/soak_test_YYYY-MM-DD.md`
+- `data/manifests/...`
+
+Metrics:
+
+- uptime
+- markets tracked
+- total snapshots
+- missed interval estimate
+- API errors
+- rate limits
+- file sizes
+- compression ratio
+- Google Drive upload status
+- restart behavior
+- disk usage
+- data quality checks
+
+Pass condition: a 24h run completes with acceptable data quality and documented issues.
+
+## Checkpoint 9: Add Second Market Only After Polymarket Is Stable
+
+Goal: prepare for NEAR or another market only after Polymarket collector reliability is proven.
+
+Do not start this checkpoint until:
+
+- Polymarket discovery works.
+- Polymarket order-book collection works.
+- Google Drive offload works.
+- The 24h soak test is complete.
+
+Architecture principles:
+
+- Use `collectors/<market_name>/` only when adding the second market.
+- Keep shared code minimal.
+- Avoid abstract base classes until duplication is painful.
+- Keep raw-first, normalized-second, manifest-always file format consistent across markets.
+
+## Anti-Fake-Progress Gates
+
+- No dashboard before 24h data reliability.
+- No database before the file archive becomes painful.
+- No strategy or backtest code in this project.
+- No live trading.
+- No generic multi-market abstraction before the second market exists.
+- No claiming "production-ready" before a 24h soak test.
+- No deleting bad artifacts; label them deprecated or invalid and write why.
+
+## Next Smallest Step
+
+Checkpoint 2 is next. It should inspect official Polymarket docs and perform bounded public endpoint probes to determine the exact live collection sources, schemas, timestamps, and rate-limit behavior.
diff --git a/config/polymarket_collector.example.yaml b/config/polymarket_collector.example.yaml
new file mode 100644
index 0000000..3446dfc
--- /dev/null
+++ b/config/polymarket_collector.example.yaml
@@ -0,0 +1,20 @@
+# Example config for the bounded Checkpoint 4 Polymarket order-book sample.
+# This file contains no secrets. The collector reads only public endpoints.
+
+discovery_path: data/discovery/polymarket_btc_markets_latest.json
+output_dir: data/live_sample
+manifest_path: data/manifests/orderbook_collector_sample_manifest.json
+
+# Keep the default sample deliberately small to avoid unnecessary endpoint load.
+market_limit: 2
+interval_seconds: 30
+duration_seconds: 300
+
+clob_books_url: https://clob.polymarket.com/books
+request_timeout_seconds: 15
+max_retries: 2
+backoff_seconds: 2
+
+# Do not start tracking markets too close to their end time. Default covers
+# the 5-minute sample duration plus a 2-minute buffer.
+market_end_safety_seconds: 420
diff --git a/config/polymarket_collector.vps.example.yaml b/config/polymarket_collector.vps.example.yaml
new file mode 100644
index 0000000..09353ce
--- /dev/null
+++ b/config/polymarket_collector.vps.example.yaml
@@ -0,0 +1,17 @@
+# Checkpoint 6 VPS example config for the raw Polymarket order-book collector.
+# Copy to /etc/orderbooks/polymarket_collector.vps.yaml on a VPS and edit paths
+# if the service uses a different data directory.
+
+discovery_path: /var/lib/orderbooks/discovery/polymarket_btc_markets_latest.json
+output_dir: /var/lib/orderbooks/raw_orderbooks
+manifest_path: /var/lib/orderbooks/manifests/polymarket_orderbook_collector_latest.json
+
+market_limit: 2
+interval_seconds: 30
+duration_seconds: 300
+market_end_safety_seconds: 420
+
+clob_books_url: https://clob.polymarket.com/books
+request_timeout_seconds: 15
+max_retries: 2
+backoff_seconds: 2
diff --git a/config/rclone.example.md b/config/rclone.example.md
new file mode 100644
index 0000000..70bd06e
--- /dev/null
+++ b/config/rclone.example.md
@@ -0,0 +1,76 @@
+# rclone Configuration Example
+
+Status: valid
+
+This file documents the expected `rclone` setup for Checkpoint 7. It is not an
+`rclone.conf` file and must not be copied into the repository with private auth
+material.
+
+## Remote Name
+
+The examples use this remote path:
+
+```text
+gdrive:orderbooks/polymarket
+```
+
+You may choose another remote name or folder. The uploader reads the destination
+from:
+
+```text
+ORDERBOOKS_RCLONE_DEST
+```
+
+For the systemd service, set it in:
+
+```text
+/etc/orderbooks/orderbook-uploader.env
+```
+
+Example:
+
+```text
+ORDERBOOKS_RCLONE_DEST=gdrive:orderbooks/polymarket
+```
+
+Do not place private auth files, browser tokens, API keys, wallet material, or
+session material in this repository.
+
+## Configure Google Drive Outside The Repo
+
+Install `rclone` on the VPS, then configure the remote as the service user or
+with a root-managed config path that the service can read:
+
+```sh
+sudo apt-get install -y rclone
+sudo -u orderbooks rclone config
+sudo -u orderbooks rclone lsd gdrive:
+```
+
+If the service user uses the default rclone config path, keep that file outside
+the repository under the service user's home/config directory.
+
+## Uploader Environment File
+
+Create:
+
+```text
+/etc/orderbooks/orderbook-uploader.env
+```
+
+Minimal example:
+
+```text
+ORDERBOOKS_RCLONE_DEST=gdrive:orderbooks/polymarket
+```
+
+Optional overrides:
+
+```text
+ORDERBOOKS_UPLOAD_DATA_DIR=/var/lib/orderbooks
+ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS=600
+ORDERBOOKS_UPLOAD_RETENTION_DAYS=7
+ORDERBOOKS_RCLONE_BIN=/usr/bin/rclone
+```
+
+The environment file belongs on the VPS. Do not commit a machine-local version.
diff --git a/deploy/k8s/base/configmap.yaml b/deploy/k8s/base/configmap.yaml
new file mode 100644
index 0000000..8b33cf3
--- /dev/null
+++ b/deploy/k8s/base/configmap.yaml
@@ -0,0 +1,25 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: orderbooks-collector-config
+  namespace: orderbooks
+  labels:
+    app.kubernetes.io/name: orderbooks
+    app.kubernetes.io/part-of: orderbooks
+    app.kubernetes.io/component: collector
+    app.kubernetes.io/managed-by: kustomize
+data:
+  polymarket_collector.yaml: |
+    discovery_path: /var/lib/orderbooks/discovery/polymarket_btc_markets_latest.json
+    output_dir: /var/lib/orderbooks/raw_orderbooks
+    manifest_path: /var/lib/orderbooks/manifests/polymarket_orderbook_collector_latest.json
+
+    market_limit: 2
+    interval_seconds: 30
+    duration_seconds: 300
+    market_end_safety_seconds: 420
+
+    clob_books_url: https://clob.polymarket.com/books
+    request_timeout_seconds: 15
+    max_retries: 2
+    backoff_seconds: 2
diff --git a/deploy/k8s/base/cronjob-uploader.yaml b/deploy/k8s/base/cronjob-uploader.yaml
new file mode 100644
index 0000000..2a11c4a
--- /dev/null
+++ b/deploy/k8s/base/cronjob-uploader.yaml
@@ -0,0 +1,92 @@
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: orderbooks-uploader
+  namespace: orderbooks
+  labels:
+    app.kubernetes.io/name: orderbooks
+    app.kubernetes.io/part-of: orderbooks
+    app.kubernetes.io/component: uploader
+spec:
+  schedule: "*/15 * * * *"
+  concurrencyPolicy: Forbid
+  successfulJobsHistoryLimit: 3
+  failedJobsHistoryLimit: 3
+  jobTemplate:
+    spec:
+      backoffLimit: 0
+      ttlSecondsAfterFinished: 86400
+      template:
+        metadata:
+          labels:
+            app.kubernetes.io/name: orderbooks
+            app.kubernetes.io/part-of: orderbooks
+            app.kubernetes.io/component: uploader
+        spec:
+          restartPolicy: Never
+          imagePullSecrets:
+            - name: orderbooks-registry-creds
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 10001
+            runAsGroup: 10001
+            fsGroup: 10001
+            fsGroupChangePolicy: OnRootMismatch
+          containers:
+            - name: uploader
+              image: registry.doran.133011.xyz/orderbooks:bootstrap
+              imagePullPolicy: IfNotPresent
+              command:
+                - /bin/bash
+                - /app/scripts/upload_archive_rclone.sh
+                - --execute
+              env:
+                - name: ORDERBOOKS_DATA_DIR
+                  value: /var/lib/orderbooks
+                - name: ORDERBOOKS_UPLOAD_DATA_DIR
+                  value: /var/lib/orderbooks
+                - name: ORDERBOOKS_UPLOAD_RAW_DIR
+                  value: /var/lib/orderbooks/raw_orderbooks
+                - name: ORDERBOOKS_UPLOAD_SOURCE_MANIFEST_DIR
+                  value: /var/lib/orderbooks/manifests
+                - name: ORDERBOOKS_UPLOAD_MANIFEST_DIR
+                  value: /var/lib/orderbooks/manifests
+                - name: ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS
+                  value: "600"
+                - name: ORDERBOOKS_UPLOAD_RETENTION_DAYS
+                  value: "7"
+                - name: ORDERBOOKS_RCLONE_BIN
+                  value: /usr/bin/rclone
+                - name: ORDERBOOKS_RCLONE_DEST
+                  value: gdrive:orderbooks/polymarket
+                - name: RCLONE_CONFIG
+                  value: /etc/rclone/rclone.conf
+              volumeMounts:
+                - name: orderbooks-data
+                  mountPath: /var/lib/orderbooks
+                - name: rclone-config
+                  mountPath: /etc/rclone/rclone.conf
+                  subPath: rclone.conf
+                  readOnly: true
+              resources:
+                requests:
+                  cpu: 50m
+                  memory: 128Mi
+                limits:
+                  cpu: 500m
+                  memory: 512Mi
+              securityContext:
+                allowPrivilegeEscalation: false
+                capabilities:
+                  drop:
+                    - ALL
+          volumes:
+            - name: orderbooks-data
+              persistentVolumeClaim:
+                claimName: orderbooks-data
+            - name: rclone-config
+              secret:
+                secretName: orderbooks-rclone-config
+                items:
+                  - key: rclone.conf
+                    path: rclone.conf
diff --git a/deploy/k8s/base/deployment-collector.yaml b/deploy/k8s/base/deployment-collector.yaml
new file mode 100644
index 0000000..7878469
--- /dev/null
+++ b/deploy/k8s/base/deployment-collector.yaml
@@ -0,0 +1,86 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: orderbooks-collector
+  namespace: orderbooks
+  labels:
+    app.kubernetes.io/name: orderbooks
+    app.kubernetes.io/part-of: orderbooks
+    app.kubernetes.io/component: collector
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: orderbooks
+      app.kubernetes.io/component: collector
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: orderbooks
+        app.kubernetes.io/part-of: orderbooks
+        app.kubernetes.io/component: collector
+    spec:
+      terminationGracePeriodSeconds: 120
+      imagePullSecrets:
+        - name: orderbooks-registry-creds
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 10001
+        runAsGroup: 10001
+        fsGroup: 10001
+        fsGroupChangePolicy: OnRootMismatch
+      containers:
+        - name: collector
+          image: registry.doran.133011.xyz/orderbooks:bootstrap
+          imagePullPolicy: IfNotPresent
+          command:
+            - /bin/bash
+            - /app/scripts/run_polymarket_collector_loop.sh
+          env:
+            - name: ORDERBOOKS_APP_DIR
+              value: /app
+            - name: ORDERBOOKS_PYTHON
+              value: python3
+            - name: ORDERBOOKS_DATA_DIR
+              value: /var/lib/orderbooks
+            - name: ORDERBOOKS_COLLECTOR_CONFIG
+              value: /etc/orderbooks/polymarket_collector.yaml
+            - name: ORDERBOOKS_DISCOVERY_DIR
+              value: /var/lib/orderbooks/discovery
+            - name: ORDERBOOKS_OUTPUT_DIR
+              value: /var/lib/orderbooks/raw_orderbooks
+            - name: ORDERBOOKS_MANIFEST_DIR
+              value: /var/lib/orderbooks/manifests
+            - name: ORDERBOOKS_LOOP_SLEEP_SECONDS
+              value: "15"
+          volumeMounts:
+            - name: orderbooks-data
+              mountPath: /var/lib/orderbooks
+            - name: collector-config
+              mountPath: /etc/orderbooks/polymarket_collector.yaml
+              subPath: polymarket_collector.yaml
+              readOnly: true
+          resources:
+            requests:
+              cpu: 50m
+              memory: 128Mi
+            limits:
+              cpu: 500m
+              memory: 512Mi
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+      volumes:
+        - name: orderbooks-data
+          persistentVolumeClaim:
+            claimName: orderbooks-data
+        - name: collector-config
+          configMap:
+            name: orderbooks-collector-config
+            items:
+              - key: polymarket_collector.yaml
+                path: polymarket_collector.yaml
diff --git a/deploy/k8s/base/kustomization.yaml b/deploy/k8s/base/kustomization.yaml
new file mode 100644
index 0000000..010b89a
--- /dev/null
+++ b/deploy/k8s/base/kustomization.yaml
@@ -0,0 +1,9 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+namespace: orderbooks
+resources:
+  - namespace.yaml
+  - configmap.yaml
+  - pvc.yaml
+  - deployment-collector.yaml
+  - cronjob-uploader.yaml
diff --git a/deploy/k8s/base/namespace.yaml b/deploy/k8s/base/namespace.yaml
new file mode 100644
index 0000000..fbd6526
--- /dev/null
+++ b/deploy/k8s/base/namespace.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: orderbooks
+  labels:
+    app.kubernetes.io/name: orderbooks
+    app.kubernetes.io/part-of: orderbooks
diff --git a/deploy/k8s/base/pvc.yaml b/deploy/k8s/base/pvc.yaml
new file mode 100644
index 0000000..678b6a5
--- /dev/null
+++ b/deploy/k8s/base/pvc.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: orderbooks-data
+  namespace: orderbooks
+  labels:
+    app.kubernetes.io/name: orderbooks
+    app.kubernetes.io/part-of: orderbooks
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: local-path
+  resources:
+    requests:
+      storage: 10Gi
diff --git a/docs/DATA_CONTRACT.md b/docs/DATA_CONTRACT.md
new file mode 100644
index 0000000..fffe7f1
--- /dev/null
+++ b/docs/DATA_CONTRACT.md
@@ -0,0 +1,168 @@
+# Data Contract
+
+The archive is raw-first. Raw market data must be preserved before normalization, aggregation, upload, or analysis.
+
+## Storage Principles
+
+- Store the raw response payload exactly as received whenever practical.
+- Add collector metadata beside the raw payload, not inside it.
+- Use UTC timestamps in ISO 8601 format with a `Z` suffix.
+- Use gzip JSONL for high-frequency snapshot data.
+- Rotate live collection files by hour or run.
+- Include checksums in manifests for all closed files.
+- Keep normalized files derived and traceable back to raw files.
+- Never store secrets, cookies, private keys, wallet material, or authenticated session state.
+
+## Directory Layout
+
+Initial expected layout:
+
+```text
+data/
+  probes/
+  discovery/
+  live_sample/
+  normalized_sample/
+  manifests/
+reports/
+  checkpoints/
+```
+
+Future sustained collection layout:
+
+```text
+data/
+  raw/
+    polymarket/
+      orderbooks/
+        YYYY/
+          MM/
+            DD/
+              HH/
+                polymarket_orderbooks_YYYYMMDDTHHMMSSZ.jsonl.gz
+  normalized/
+    polymarket/
+      orderbooks/
+        YYYY/
+          MM/
+            DD/
+              polymarket_orderbooks_normalized_YYYYMMDD.jsonl.gz
+  manifests/
+```
+
+Do not create a database until compressed file archives are proven painful.
+
+## Raw Orderbook Snapshot Envelope
+
+Checkpoint 4 should store one JSON object per line using this envelope or a documented successor:
+
+```json
+{
+  "schema_name": "raw_orderbook_snapshot",
+  "schema_version": 1,
+  "collector": {
+    "name": "polymarket_orderbook_collector",
+    "version": "0.1.0"
+  },
+  "market": {
+    "market_name": "polymarket",
+    "market_slug": "example-slug",
+    "condition_id": "0x...",
+    "token_id": "123",
+    "outcome": "Yes"
+  },
+  "collection": {
+    "collected_at_utc": "2026-04-14T20:53:49Z",
+    "sequence": 1
+  },
+  "request": {
+    "method": "GET",
+    "url": "https://example.invalid/orderbook",
+    "params": {
+      "token_id": "123"
+    },
+    "status_code": 200,
+    "duration_ms": 123
+  },
+  "raw": {}
+}
+```
+
+`raw` is the unmodified response payload. If the endpoint returns text or bytes, record encoding and store a lossless representation.
+
+## Discovery Record Fields
+
+Checkpoint 3 normalized market records should include:
+
+- `market_name`
+- `market_slug`
+- `title` or `question`
+- `condition_id`
+- `tokens`
+- `outcomes`
+- `start_time_utc`, if available
+- `end_time_utc`, if available
+- `active`
+- `closed`
+- `endpoint_source`
+- `fetched_at_utc`
+- `raw_ref`
+
+`tokens` should preserve the mapping between outcome labels and token IDs.
+
+## Normalized Snapshot Fields
+
+Checkpoint 5 normalized records should include:
+
+- `market_name`
+- `market_slug`
+- `condition_id`
+- `token_id`
+- `outcome`
+- `collected_at_utc`
+- `best_bid`
+- `best_ask`
+- `spread`
+- `midpoint`
+- `bid_depth_total`
+- `ask_depth_total`
+- `bid_depth_within_1c`
+- `ask_depth_within_1c`
+- `bid_depth_within_2c`
+- `ask_depth_within_2c`
+- `bid_depth_within_5c`
+- `ask_depth_within_5c`
+- `raw_file`
+- `raw_line_number`, when feasible
+
+Normalized data is invalid if it cannot reference the raw source record.
+
+## Manifest Requirements
+
+Collection and transformation manifests should include:
+
+- manifest schema name and version
+- checkpoint or process name
+- start and end timestamps
+- market names and market IDs tracked
+- input files
+- output files
+- request counts
+- success and failure counts
+- status-code counts
+- row counts
+- checksums for closed files
+- command used
+- config path or config digest
+- warnings and known gaps
+- gate status
+
+Checksums should use SHA-256 unless a later report explains why another hash is used.
+
+## Timestamp Policy
+
+- `collected_at_utc`: local collector timestamp taken as close as possible to receipt of data.
+- `fetched_at_utc`: timestamp for metadata or discovery fetches.
+- Endpoint-provided timestamps must be preserved under their original field names in `raw`.
+- If endpoint timestamp semantics are unclear, write the ambiguity into the probe report.
+
diff --git a/docs/GOOGLE_DRIVE_OFFLOAD.md b/docs/GOOGLE_DRIVE_OFFLOAD.md
new file mode 100644
index 0000000..a61dbcd
--- /dev/null
+++ b/docs/GOOGLE_DRIVE_OFFLOAD.md
@@ -0,0 +1,294 @@
+# Google Drive Offload
+
+Status: valid
+
+This document covers Checkpoint 7: offloading closed raw collector files and
+manifests to Google Drive with `rclone`.
+
+This checkpoint does not prove production readiness or 24/7 reliability. A real
+small upload must be run with a configured remote, and the later 24h soak test
+must still pass.
+
+## Scope
+
+Included:
+
+- `scripts/upload_archive_rclone.sh`
+- `systemd/polymarket-orderbook-uploader.service`
+- `systemd/polymarket-orderbook-uploader.timer`
+- dry-run mode by default
+- real upload only with `--execute`
+- rclone verification with `rclone check`
+- per-run upload manifests
+- optional local cleanup only after successful verification
+
+Excluded:
+
+- dashboards
+- databases
+- strategies or backtests
+- trading, signing, order placement, or wallet logic
+- hardcoded private auth material
+
+## Install rclone
+
+On Ubuntu or Debian:
+
+```sh
+sudo apt-get update
+sudo apt-get install -y rclone
+```
+
+Confirm:
+
+```sh
+rclone version
+```
+
+## Configure A Google Drive Remote
+
+Configure the remote outside this repository. For a service-user setup:
+
+```sh
+sudo -u orderbooks rclone config
+sudo -u orderbooks rclone lsd gdrive:
+```
+
+The example remote path is:
+
+```text
+gdrive:orderbooks/polymarket
+```
+
+Any valid `rclone` destination may be used. The uploader reads it from:
+
+```text
+ORDERBOOKS_RCLONE_DEST
+```
+
+For systemd, create:
+
+```text
+/etc/orderbooks/orderbook-uploader.env
+```
+
+Example:
+
+```text
+ORDERBOOKS_RCLONE_DEST=gdrive:orderbooks/polymarket
+```
+
+Do not commit the machine-local rclone config or any private auth material.
+
+## What Gets Uploaded
+
+By default the script targets:
+
+| Source | Default path |
+| --- | --- |
+| raw collector files | `/var/lib/orderbooks/raw_orderbooks` |
+| collector manifests | `/var/lib/orderbooks/manifests` |
+
+It does not target normalized sample files by default.
+
+Files modified within the last 10 minutes are skipped to avoid active collector
+files:
+
+```text
+ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS=600
+```
+
+The script preserves repository/data-directory relative paths on the remote. For
+example:
+
+```text
+/var/lib/orderbooks/raw_orderbooks/polymarket/orderbooks/<run_id>/file.jsonl.gz
+```
+
+uploads to:
+
+```text
+<remote>/raw_orderbooks/polymarket/orderbooks/<run_id>/file.jsonl.gz
+```
+
+## Dry Run
+
+Dry-run is the default. It plans files, stages a temporary copy, invokes
+`rclone copy --dry-run`, and writes an upload manifest.
+
+Example for a VPS:
+
+```sh
+/opt/orderbooks/scripts/upload_archive_rclone.sh \
+  --data-dir /var/lib/orderbooks \
+  --dest "$ORDERBOOKS_RCLONE_DEST"
+```
+
+Example against the repository sample data:
+
+```sh
+scripts/upload_archive_rclone.sh \
+  --data-dir data \
+  --dest gdrive:orderbooks/polymarket/checkpoint7-test \
+  --manifest-path data/manifests/upload_archive_real_test_dry_run_manifest.json \
+  --min-age-seconds 0 \
+  --rclone-bin /usr/bin/rclone
+```
+
+Dry-run does not prove remote write access.
+
+## Execute Upload
+
+Run a real upload only after the remote is configured and the dry-run plan looks
+right:
+
+```sh
+/opt/orderbooks/scripts/upload_archive_rclone.sh \
+  --execute \
+  --data-dir /var/lib/orderbooks \
+  --dest "$ORDERBOOKS_RCLONE_DEST"
+```
+
+The script runs:
+
+```text
+rclone copy <staged files> <remote> --checksum
+rclone check <staged files> <remote> --one-way --checksum
+```
+
+The upload gate is `PASS` only when the copy succeeds and verification succeeds.
+
+## Retention And Cleanup
+
+Local files are kept by default, even after upload verification.
+
+Cleanup requires an explicit flag:
+
+```sh
+/opt/orderbooks/scripts/upload_archive_rclone.sh \
+  --execute \
+  --cleanup-after-verify \
+  --retention-days 7 \
+  --data-dir /var/lib/orderbooks \
+  --dest "$ORDERBOOKS_RCLONE_DEST"
+```
+
+Cleanup deletes only files that were selected for upload, uploaded, verified, and
+older than the retention window. The default retention window is 7 days.
+
+## Upload Manifest
+
+Each run writes a manifest such as:
+
+```text
+/var/lib/orderbooks/manifests/upload_archive_YYYYMMDDTHHMMSSZ.json
+```
+
+The manifest records:
+
+- planned files
+- attempted files
+- dry-run files
+- uploaded files
+- verified files
+- skipped open or recent files
+- retained local files
+- deleted local files
+- SHA-256 checksums
+- command mode
+- start/end time
+- rclone copy/check exit codes
+- gate status
+
+For this repository, the sample manifest path is:
+
+```text
+data/manifests/upload_archive_sample_manifest.json
+```
+
+The verified Checkpoint 7 real-test manifest is:
+
+```text
+data/manifests/upload_archive_real_test_manifest.json
+```
+
+## systemd Timer
+
+Install the unit files:
+
+```sh
+sudo install -o root -g root -m 0644 /opt/orderbooks/systemd/polymarket-orderbook-uploader.service /etc/systemd/system/polymarket-orderbook-uploader.service
+sudo install -o root -g root -m 0644 /opt/orderbooks/systemd/polymarket-orderbook-uploader.timer /etc/systemd/system/polymarket-orderbook-uploader.timer
+sudo systemctl daemon-reload
+```
+
+Create the environment file:
+
+```sh
+sudo install -o root -g orderbooks -m 0640 /dev/null /etc/orderbooks/orderbook-uploader.env
+sudo editor /etc/orderbooks/orderbook-uploader.env
+```
+
+At minimum, set:
+
+```text
+ORDERBOOKS_RCLONE_DEST=gdrive:orderbooks/polymarket
+```
+
+Enable the timer:
+
+```sh
+sudo systemctl enable --now polymarket-orderbook-uploader.timer
+```
+
+Run one upload immediately:
+
+```sh
+sudo systemctl start polymarket-orderbook-uploader.service
+```
+
+## Logs
+
+Use the systemd journal:
+
+```sh
+sudo systemctl status polymarket-orderbook-uploader.service
+sudo journalctl -u polymarket-orderbook-uploader.service -f
+sudo systemctl list-timers polymarket-orderbook-uploader.timer
+```
+
+## Current Checkpoint 7 Result
+
+Initial local validation was blocked when `rclone` was unavailable. That blocked
+manifest remains at:
+
+```text
+data/manifests/upload_archive_sample_manifest.json
+```
+
+After `rclone` was configured as `/usr/bin/rclone` with remote `gdrive:`, a dry
+run and one tiny real upload were run against:
+
+```text
+gdrive:orderbooks/polymarket/checkpoint7-test
+```
+
+The real upload manifest records `rclone copy` exit code 0 and `rclone check`
+exit code 0:
+
+```text
+data/manifests/upload_archive_real_test_manifest.json
+```
+
+Current gate:
+
+```text
+PASS
+```
+
+## What Remains Unproven
+
+- Long-run upload reliability.
+- Interaction between hourly uploads and a 24h collector soak test.
+- Retention cleanup after verified upload.
+- Production readiness.
diff --git a/docs/KUBERNETES_DEPLOYMENT.md b/docs/KUBERNETES_DEPLOYMENT.md
new file mode 100644
index 0000000..2a0a697
--- /dev/null
+++ b/docs/KUBERNETES_DEPLOYMENT.md
@@ -0,0 +1,148 @@
+# Kubernetes Deployment
+
+Status: draft runtime package for Checkpoint 8G
+
+This document describes the Kubernetes package for the Polymarket raw
+order-book collector. It follows the shared Hetzner k3s cluster model from
+`../nuri/unrip3`: application code, Dockerfile, manifests, and Forgejo workflow
+live in this repository; platform services, the shared registry, and the shared
+Forgejo runner remain platform-owned.
+
+This package does not claim production readiness. Production readiness still
+requires a real Kubernetes runtime smoke run with preserved evidence.
+
+## Cluster Decisions
+
+- Namespace: `orderbooks`
+- Workstation kubeconfig for validation: `../nuri/unrip3/.state/hetzner/kubeconfig.yaml`
+- Shared registry and shared Forgejo runner
+- Existing rclone Secret: `orderbooks/orderbooks-rclone-config`
+- Secret key mounted by the uploader: `rclone.conf`
+
+Do not commit or print rclone config contents.
+
+## Runtime Layout
+
+The collector and uploader share one PVC:
+
+```text
+PVC: orderbooks-data
+mount: /var/lib/orderbooks
+raw files: /var/lib/orderbooks/raw_orderbooks
+manifests: /var/lib/orderbooks/manifests
+discovery: /var/lib/orderbooks/discovery
+```
+
+The collector uses one Deployment with one replica. The container runs
+`/app/scripts/run_polymarket_collector_loop.sh`, which repeatedly executes the
+existing bounded collector cycle and records loop failure/interruption manifests
+instead of relying on Kubernetes crash loops for normal operation.
+
+The uploader uses one CronJob. It runs the existing rclone uploader in execute
+mode, mounts the same PVC, mounts `orderbooks-rclone-config` read-only at
+`/etc/rclone/rclone.conf`, sets `RCLONE_CONFIG` to that file, and uploads only
+closed/aged files.
+
+
+## Bootstrap This App Repo
+
+Run the orderbooks-specific bootstrap from this repository:
+
+```sh
+scripts/deploy/bootstrap_orderbooks_k8s.sh
+```
+
+The bootstrap loads platform defaults and resolved secrets from the local
+platform state without printing secret values. It ensures namespace `orderbooks`,
+creates or updates `orderbooks-registry-creds`, verifies the existing
+`orderbooks-rclone-config` secret has key `rclone.conf`, creates or updates the
+Forgejo repo `philipp/orderbooks`, and upserts the required Actions secret and
+variables.
+
+After bootstrap, push a clean source tree to Forgejo `main`. Do not push local
+`data/`, `artifacts/`, `reports/`, `orchestration/`, kubeconfigs, rclone config,
+`.env`, private keys, or other local evidence/secrets.
+
+## Image Build And Deploy
+
+The Forgejo workflow is `.forgejo/workflows/deploy.yml`. It follows the shared
+runner pattern:
+
+1. load `KUBECONFIG_B64` from Forgejo secrets;
+2. clone this repo inside the runner;
+3. create an in-cluster Kaniko Job;
+4. build and push `REGISTRY_HOST/orderbooks:<git-sha>`;
+5. apply `deploy/k8s/base` with the built image;
+6. wait for `deployment/orderbooks-collector` rollout.
+
+Required Forgejo repo secret:
+
+```text
+KUBECONFIG_B64
+```
+
+Required Forgejo repo variable:
+
+```text
+REGISTRY_HOST
+```
+
+Project defaults used by the workflow:
+
+```text
+PROJECT_NAME=orderbooks
+PROJECT_NAMESPACE=orderbooks
+PROJECT_DEPLOYMENTS=orderbooks-collector
+PROJECT_REGISTRY_SECRET_NAME=orderbooks-registry-creds
+```
+
+The registry pull/build secret `orderbooks-registry-creds` must exist in the
+`orderbooks` namespace before the workflow builds and deploys.
+
+## Pre-Deploy Validation
+
+From this repository:
+
+```sh
+bash -n scripts/run_polymarket_collector_loop.sh
+bash -n scripts/k8s_runtime_smoke_check.sh
+kubectl kustomize deploy/k8s/base
+KUBECONFIG=../nuri/unrip3/.state/hetzner/kubeconfig.yaml   kubectl apply -k deploy/k8s/base --dry-run=server
+KUBECONFIG=../nuri/unrip3/.state/hetzner/kubeconfig.yaml   kubectl -n orderbooks get secret orderbooks-rclone-config   -o go-template='{{if index .data "rclone.conf"}}rclone_secret_key_present{{else}}rclone_secret_key_missing{{end}}{{"\n"}}'
+```
+
+The last command checks only whether the key exists. It must not print secret
+data.
+
+## Runtime Smoke Gate
+
+After the image is built and the workload is actually deployed, run:
+
+```sh
+KUBECONFIG=../nuri/unrip3/.state/hetzner/kubeconfig.yaml   scripts/k8s_runtime_smoke_check.sh   --namespace orderbooks   --deployment orderbooks-collector   --cronjob orderbooks-uploader   --raw-dir /var/lib/orderbooks/raw_orderbooks   --manifest-dir /var/lib/orderbooks/manifests   --wait-seconds 1800 \
+  --upload-min-age-seconds 600
+```
+
+The smoke gate uses `kubectl`, not systemd. It writes local JSON evidence under
+`data/manifests/k8s_runtime_smoke_<UTC_TIMESTAMP>.json` by default. It verifies:
+
+- collector pod is running;
+- latest collector manifest has `gate_status: PASS`, `rows_written > 0`, and
+  `failure_count: 0`;
+- raw gzip JSONL parses and is under `/var/lib/orderbooks/raw_orderbooks`;
+- deleting the collector pod does not corrupt the old raw file checksum or row
+  count;
+- a later post-restart collector cycle writes valid rows;
+- an uploader Job created from the CronJob completes;
+- the latest upload manifest records a verified rclone upload with at least one
+  verified file.
+
+A failed smoke run still writes JSON evidence and exits nonzero. Preserve failed
+manifests, raw files, upload manifests, and pod logs for review.
+
+## Not Included
+
+- No trading, signing, wallets, private keys, or API keys.
+- No dashboard, database, strategy, backtest, or second-market connector.
+- No websocket rewrite.
+- No rclone config contents in this repository.
diff --git a/docs/METHODOLOGY.md b/docs/METHODOLOGY.md
new file mode 100644
index 0000000..f342405
--- /dev/null
+++ b/docs/METHODOLOGY.md
@@ -0,0 +1,104 @@
+# Methodology
+
+This project uses checkpoint-driven compound engineering. The point is to preserve useful data and operational learning, not to accumulate scaffolding.
+
+## Checkpoint Cycle
+
+Every checkpoint follows the same loop:
+
+1. Define the smallest useful checkpoint.
+2. Build only what is required for that checkpoint.
+3. Validate with real commands and real data when applicable.
+4. Write durable artifacts: code or docs, config or run instructions, manifest/report, and validation evidence.
+5. State `PASS`, `FAIL`, or `BLOCKED`.
+6. Identify the strongest fake-progress risk.
+7. Recommend the next smallest step.
+8. Stop and ask only when a real decision is needed.
+
+## Gate States
+
+- `PASS`: the checkpoint pass condition is met and evidence is on disk.
+- `FAIL`: the checkpoint was attempted but did not meet its pass condition.
+- `BLOCKED`: work cannot continue without a decision, credential, service, or unavailable dependency.
+- `PARTIAL`: useful artifacts exist, but the checkpoint should not be treated as passed.
+
+## Evidence Rules
+
+- Evidence must be reproducible from files and commands, not just chat.
+- If a command was used to validate behavior, record the command and summarize the result in a report or manifest.
+- If data was collected, preserve raw data and include checksums.
+- If synthetic or sample data is used, label it explicitly.
+- If a claim depends on a public endpoint, record the endpoint, request parameters, response fields, status codes, timestamps, and fetch time.
+- Do not claim reliability from a short sample run. Reliability requires the roadmap soak test.
+
+## Machine-Readable Manifest Format
+
+Checkpoint manifests should be JSON and stored under `data/manifests/`. Use this shape unless a later checkpoint documents a better schema:
+
+```json
+{
+  "checkpoint_id": 1,
+  "checkpoint_name": "Project Scaffold And Methodology",
+  "status": "PASS",
+  "started_at_utc": "2026-04-14T20:53:49Z",
+  "ended_at_utc": "2026-04-14T20:53:49Z",
+  "scope": "Durable project rules and roadmap only; no collector implementation.",
+  "artifacts": [
+    {
+      "path": "AGENTS.md",
+      "kind": "project_rules",
+      "status": "valid"
+    }
+  ],
+  "validation": {
+    "commands": [
+      {
+        "command": "git status --short",
+        "result": "completed"
+      }
+    ],
+    "summary": "Required files exist and contain checkpoint rules."
+  },
+  "decisions": [],
+  "assumptions": [],
+  "fake_progress_risk": "Most progress is documentation until public Polymarket endpoint behavior is proven.",
+  "next_step": "Run Checkpoint 2 public source probe."
+}
+```
+
+## Markdown Checkpoint Report Format
+
+Checkpoint reports should be stored under `reports/checkpoints/` and include:
+
+- active checkpoint
+- scope
+- files created or changed
+- validation commands and results
+- project rules or operational lessons added
+- pass/fail/gate
+- strongest fake-progress risk
+- next smallest step
+
+## Deprecated Or Misleading Artifacts
+
+Do not delete mistakes. Preserve the original artifact and label it.
+
+Preferred labels:
+
+- Add a manifest entry with `status: "deprecated"` or `status: "invalid"`.
+- Add a sibling note named `<artifact>.deprecated.md` or `<artifact>.invalid.md` when a human explanation is useful.
+- Include why the artifact is wrong, when it was labeled, who labeled it, and what replaces it.
+
+If an artifact is dangerous because it contains secrets, stop and ask the user. Do not spread or copy the secret into reports.
+
+## Anti-Fake-Progress Rules
+
+- No dashboard before 24h data reliability.
+- No database before plain compressed files become painful.
+- No strategy, backtest, optimizer, or trading bot code.
+- No private-key or signing code.
+- No generic multi-market abstraction before a second market exists.
+- No "production-ready" claim before a 24h soak test.
+- No endpoint assumptions without probe evidence.
+- No normalized dataset that cannot trace back to raw records.
+
diff --git a/docs/OPERATIONS.md b/docs/OPERATIONS.md
new file mode 100644
index 0000000..d392da9
--- /dev/null
+++ b/docs/OPERATIONS.md
@@ -0,0 +1,93 @@
+# Operations
+
+This document defines operational rules before the collector exists. It should be updated with exact commands as checkpoints add scripts, services, and upload jobs.
+
+## Current Operational Status
+
+- Collector implementation: not started.
+- Supported market: none yet; Polymarket is the first planned market.
+- Deployment target: small VPS.
+- Offload target: Google Drive through `rclone`.
+- Reliability status: not production-ready until a documented 24h soak test passes.
+
+## Safety Rules
+
+- No trading.
+- No order placement.
+- No wallet signing.
+- No private keys.
+- No secrets in git.
+- No dashboards, databases, ML, or strategy code before the roadmap gate allows them.
+
+## Local Runtime Principles
+
+Future scripts should:
+
+- accept a configurable data directory
+- write logs to a predictable location
+- write raw gzip JSONL snapshots
+- rotate files by hour or run
+- close files cleanly on shutdown
+- write manifests after runs
+- avoid corrupting closed files on restart
+- handle public endpoint errors and rate limits conservatively
+
+## VPS Deployment Principles
+
+Checkpoint 6 should document:
+
+- Python version and virtualenv setup
+- package installation
+- environment variables
+- systemd or Docker Compose runtime
+- service user and file permissions
+- data directory ownership
+- log locations
+- restart policy
+- disk usage checks
+- safe upgrade and rollback steps
+
+## Google Drive Offload Principles
+
+Checkpoint 7 should use `rclone` and must:
+
+- avoid hardcoded credentials
+- upload only closed or rotated files
+- support dry-run mode
+- verify upload success
+- preserve local files until upload is verified
+- maintain checksums
+- keep the last N days locally
+- write an upload manifest
+
+## Incident And Bad-Data Handling
+
+If data looks wrong:
+
+1. Preserve the raw files.
+2. Stop relying on the affected derived files.
+3. Label the artifact `invalid` or `deprecated`.
+4. Write a short note explaining the issue and replacement, if any.
+5. Keep the learning in docs or reports.
+
+Examples of bad-data conditions:
+
+- endpoint returned a schema different from expected
+- token/outcome mapping was wrong
+- timestamps were misunderstood
+- rate limits caused large gaps
+- gzip file was not closed cleanly
+- upload succeeded but checksum did not match
+
+## Minimum Reliability Claim
+
+A short sample run can prove that code writes files. It cannot prove 24/7 reliability.
+
+The project may only claim production readiness after:
+
+- discovery works
+- raw order-book collection works
+- offload works
+- 24h soak test completes
+- data quality and gap metrics are documented
+
diff --git a/docs/ORDERBOOK_SCHEMA.md b/docs/ORDERBOOK_SCHEMA.md
new file mode 100644
index 0000000..a22c45b
--- /dev/null
+++ b/docs/ORDERBOOK_SCHEMA.md
@@ -0,0 +1,102 @@
+# Orderbook Snapshot Schema
+
+Status: valid
+
+This document covers the Checkpoint 5 normalized order-book sample. The raw
+gzip JSONL files remain the source of truth. Normalized rows are derived records
+for quick inspection and later quality checks.
+
+## Normalized Snapshot
+
+Schema name: `normalized_orderbook_snapshot`
+
+Schema version: `1`
+
+File format: gzip JSONL, one JSON object per line.
+
+Sample location:
+
+```text
+data/normalized_sample/polymarket/orderbooks/<run_id>/polymarket_orderbooks_normalized_<run_id>.jsonl.gz
+```
+
+Every normalized row must reference exactly one raw gzip JSONL source row:
+
+- `raw_file`: repository-relative path to the raw gzip JSONL file.
+- `raw_line_number`: 1-based line number inside that raw gzip JSONL file.
+
+Derived data is invalid if either lineage field is missing or points to a
+missing raw file.
+
+## Field Contract
+
+| Field | Type | Meaning |
+| --- | --- | --- |
+| `schema_name` | string | Always `normalized_orderbook_snapshot`. |
+| `schema_version` | number | Schema version, currently `1`. |
+| `market_name` | string | Market source name from the raw envelope. |
+| `market_slug` | string | Polymarket market slug from the raw envelope. |
+| `condition_id` | string | Polymarket condition ID from the raw envelope. |
+| `token_id` | string | Polymarket CLOB token ID from the raw envelope. |
+| `outcome` | string | Outcome label associated with `token_id`. |
+| `collected_at_utc` | string | Collector timestamp from the raw envelope. |
+| `best_bid` | string or null | Maximum bid price, or null when no bids exist. |
+| `best_ask` | string or null | Minimum ask price, or null when no asks exist. |
+| `spread` | string or null | `best_ask - best_bid` when both sides exist. |
+| `midpoint` | string or null | `(best_bid + best_ask) / 2` when both sides exist. |
+| `bid_depth_total` | string | Sum of all bid sizes. |
+| `ask_depth_total` | string | Sum of all ask sizes. |
+| `bid_depth_within_1c` | string | Sum of bid sizes priced at least `best_bid - 0.01`. |
+| `ask_depth_within_1c` | string | Sum of ask sizes priced at most `best_ask + 0.01`. |
+| `bid_depth_within_2c` | string | Sum of bid sizes priced at least `best_bid - 0.02`. |
+| `ask_depth_within_2c` | string | Sum of ask sizes priced at most `best_ask + 0.02`. |
+| `bid_depth_within_5c` | string | Sum of bid sizes priced at least `best_bid - 0.05`. |
+| `ask_depth_within_5c` | string | Sum of ask sizes priced at most `best_ask + 0.05`. |
+| `raw_file` | string | Repository-relative raw gzip JSONL path. |
+| `raw_line_number` | number | 1-based source line number in `raw_file`. |
+
+## Numeric Encoding
+
+Prices and sizes are parsed with Python `Decimal`. Derived numeric values are
+emitted as exact decimal strings rather than JSON numbers. This keeps precision
+visible and avoids binary floating-point rounding.
+
+Missing price-derived values are emitted as `null`. Depth totals and depth bands
+are emitted as decimal strings and use `"0"` when the relevant side is empty.
+
+## Calculation Rules
+
+- `best_bid`: maximum bid price.
+- `best_ask`: minimum ask price.
+- `spread`: `best_ask - best_bid` when both sides exist.
+- `midpoint`: `(best_bid + best_ask) / 2` when both sides exist.
+- `bid_depth_total`: sum of all bid sizes.
+- `ask_depth_total`: sum of all ask sizes.
+- `bid_depth_within_1c`: sum bid sizes with price greater than or equal to
+  `best_bid - 0.01`.
+- `ask_depth_within_1c`: sum ask sizes with price less than or equal to
+  `best_ask + 0.01`.
+- The same band rule is used for `0.02` and `0.05`.
+
+## Sanity Rules
+
+A normalized file should pass these checks:
+
+- Output row count equals raw input row count unless skipped rows are recorded.
+- Every row has `raw_file` and `raw_line_number`.
+- Every referenced raw file exists.
+- `spread` is non-negative whenever both sides exist.
+- `midpoint` is between `best_bid` and `best_ask` whenever both sides exist.
+- Depth totals and band depths are non-negative.
+- At least one `Up` row and one `Down` row exist in the sample.
+- The gzip JSONL file decompresses and every line parses as JSON.
+- The manifest checksum matches the normalized output file.
+
+## Current Known Gaps
+
+- This schema covers a derived sample extract only.
+- It does not define sustained daily normalized partitions.
+- It does not include upload, daemon runtime, dashboards, databases, strategy
+  code, backtests, trading behavior, or wallet behavior.
+- Long-run schema stability still depends on future collection and soak-test
+  evidence.
diff --git a/docs/POLYMARKET_COLLECTOR.md b/docs/POLYMARKET_COLLECTOR.md
new file mode 100644
index 0000000..e39c750
--- /dev/null
+++ b/docs/POLYMARKET_COLLECTOR.md
@@ -0,0 +1,149 @@
+# Polymarket Collector
+
+Artifact status: `valid`
+
+## Scope
+
+This document covers the Checkpoint 4 bounded raw order-book sample collector.
+
+It does not describe a production service. It does not include normalization, upload, systemd, dashboards, databases, strategies, trading, wallet logic, private keys, API keys, or private endpoints.
+
+## Inputs
+
+The collector reads active BTC markets from:
+
+```text
+data/discovery/polymarket_btc_markets_latest.json
+```
+
+Checkpoint 3 writes normalized market records with `condition_id` and `tokens` preserving the `Up` and `Down` outcome-token mapping. The collector uses only those records and does not perform market discovery itself.
+
+If the discovery file is stale or contains no usable active markets, run:
+
+```sh
+python3 scripts/discover_polymarket_btc_markets.py
+```
+
+## Endpoint
+
+The sample uses the public CLOB batch order-book endpoint:
+
+```text
+POST https://clob.polymarket.com/books
+```
+
+Request body shape:
+
+```json
+[
+  {"token_id": "<up_token_id>"},
+  {"token_id": "<down_token_id>"}
+]
+```
+
+No authentication is used.
+
+## Running A Bounded Sample
+
+Default sample command:
+
+```sh
+python3 scripts/collect_polymarket_orderbooks.py
+```
+
+The default config is:
+
+```text
+config/polymarket_collector.example.yaml
+```
+
+The example config is deliberately small:
+
+- `market_limit: 2`
+- `interval_seconds: 30`
+- `duration_seconds: 300`
+- `market_end_safety_seconds: 420`
+
+This produces a 5-minute sample for at most 2 markets, fetching both `Up` and `Down` outcome tokens by batch request.
+
+## Outputs
+
+Raw gzip JSONL snapshots are written under:
+
+```text
+data/live_sample/polymarket/orderbooks/<run_id>/
+```
+
+The sample manifest is written to:
+
+```text
+data/manifests/orderbook_collector_sample_manifest.json
+```
+
+Files rotate by run for this checkpoint. Hourly rotation is intentionally left for a later sustained runtime checkpoint.
+
+## Raw JSONL Envelope
+
+Each gzip JSONL line is a raw-first envelope:
+
+```json
+{
+  "schema_name": "raw_orderbook_snapshot",
+  "schema_version": 1,
+  "collector": {
+    "name": "polymarket_orderbook_collector",
+    "version": "0.1.0"
+  },
+  "market": {
+    "market_name": "polymarket",
+    "market_slug": "example",
+    "condition_id": "0x...",
+    "token_id": "123",
+    "outcome": "Up",
+    "market_end_time_utc": "2026-04-14T22:00:00Z"
+  },
+  "collection": {
+    "collected_at_utc": "2026-04-14T21:00:00Z",
+    "sequence": 1,
+    "response_index": 0
+  },
+  "request": {
+    "method": "POST",
+    "url": "https://clob.polymarket.com/books",
+    "params": null,
+    "json_body": [{"token_id": "123"}],
+    "status_code": 200,
+    "duration_ms": 123,
+    "attempts": []
+  },
+  "raw": {}
+}
+```
+
+The `raw` object is the unmodified order-book object returned by CLOB for that token.
+
+## Rate-Limit Handling
+
+The sample is conservative:
+
+- Uses a small market cap by default.
+- Uses a fixed interval between batch requests.
+- Applies request timeout.
+- Retries `429` and `5xx` responses with exponential backoff.
+- Does not use concurrent requests.
+
+## Shutdown
+
+`SIGINT` and `SIGTERM` set a stop flag. The current request, if any, finishes or times out, the gzip file closes, and the manifest is written with a shutdown warning.
+
+## Known Gaps
+
+- This is a short run-rotated sample, not a daemon.
+- It does not prove 24/7 reliability.
+- It does not implement hourly rotation.
+- It does not refresh discovery during a run.
+- It does not normalize snapshots.
+- It does not upload files.
+- It does not use websockets.
+
+The project must not claim production readiness until the later 24h soak test passes with documented quality metrics.
diff --git a/docs/PRODUCTION_DEFINITION_OF_DONE.md b/docs/PRODUCTION_DEFINITION_OF_DONE.md
new file mode 100644
index 0000000..583e340
--- /dev/null
+++ b/docs/PRODUCTION_DEFINITION_OF_DONE.md
@@ -0,0 +1,54 @@
+# Production Definition Of Done
+
+Status: ACTIVE
+
+Defined at UTC: 2026-04-17T09:12:02Z
+
+This project is done for the first production milestone only when it is reliably
+collecting Polymarket BTC order-book data on a small VPS with evidence on disk.
+Packaging, docs, local samples, and local soak tests are useful evidence, but
+not the finish line.
+
+## Done Means
+
+1. The collector runs on the VPS under systemd using `/opt/orderbooks` for code
+   and `/var/lib/orderbooks` for data.
+2. Raw gzip JSONL order-book snapshots are written for active BTC up/down
+   markets, with manifests beside them.
+3. The service survives a forced restart: after restart, a later collection
+   cycle writes valid raw rows without corrupting prior files.
+4. Temporary network/API failure is handled as an operational failure, not data
+   loss: failures are visible in logs/manifests, and the next successful cycle
+   resumes writing new files.
+5. Google Drive upload runs from the VPS through `rclone`, verifies success, and
+   leaves local files in place until upload is confirmed.
+6. A final production report and machine-readable manifest record exact commands,
+   timestamps, files, checksums, restart result, upload result, and remaining
+   risks.
+
+## Not Required For This Milestone
+
+- No second market.
+- No dashboard.
+- No database.
+- No strategy or backtest code.
+- No websocket rewrite unless polling proves insufficient.
+- No generic multi-market abstraction.
+
+## Maximum Remaining Builder Turns
+
+The remaining work is capped at three builder turns:
+
+1. Accept deploy bundle and prepare the minimal VPS reliability gate.
+2. Execute or guide the VPS cutover and collect runtime evidence.
+3. Fix only blocking production issues found by the VPS gate, then write the
+   final pass/fail report.
+
+If actual VPS access is unavailable, the gate must be `BLOCKED_NEEDS_VPS_ACCESS`,
+not production ready.
+
+## Current Evidence
+
+- Deploy bundle gate: `DEPLOY_BUNDLE_READY`.
+- Local 24h soak final manifest exists but remains `NEEDS_REVIEW`.
+- Production readiness remains false until VPS runtime evidence exists.
diff --git a/docs/VPS_CUTOVER_RUNBOOK.md b/docs/VPS_CUTOVER_RUNBOOK.md
new file mode 100644
index 0000000..59d66c8
--- /dev/null
+++ b/docs/VPS_CUTOVER_RUNBOOK.md
@@ -0,0 +1,341 @@
+# VPS Cutover Runbook
+
+Status: valid
+
+Checkpoint 8 status is `WAIVED_BY_USER`, not `PASS`. This runbook prepares a
+VPS cutover for the existing Polymarket raw order-book collector only. It does
+not claim production readiness, second-market support, dashboards, databases,
+strategies, or trading.
+
+## Scope
+
+Included:
+
+- VPS prerequisite checks.
+- Repository copy/update steps.
+- Public Polymarket collector service install.
+- Google Drive offload timer install with rclone.
+- Liveness, cycle health, and upload verification commands.
+- Rollback and stop commands.
+
+Excluded:
+
+- Private API access.
+- Wallets, keys, mnemonics, signing, order placement, or trading.
+- Database, dashboard, strategy, or second-market work.
+
+## Recommended VPS Layout
+
+Use the existing package paths unless the VPS has a reason to differ:
+
+```text
+repository: /opt/orderbooks
+python virtualenv: /opt/orderbooks/.venv
+config: /etc/orderbooks/polymarket_collector.vps.yaml
+collector env: /etc/orderbooks/polymarket-orderbook-collector.env
+uploader env: /etc/orderbooks/orderbook-uploader.env
+data root: /var/lib/orderbooks
+raw files: /var/lib/orderbooks/raw_orderbooks
+manifests: /var/lib/orderbooks/manifests
+discovery: /var/lib/orderbooks/discovery
+```
+
+The `orderbooks` system user should own `/var/lib/orderbooks`. The repository
+under `/opt/orderbooks` can be root-owned and world-readable.
+
+## VPS Prerequisites
+
+On Ubuntu or Debian:
+
+```sh
+sudo apt-get update
+sudo apt-get install -y git python3 python3-venv rclone
+sudo useradd --system --home /var/lib/orderbooks --shell /usr/sbin/nologin orderbooks || true
+sudo mkdir -p /opt /etc/orderbooks /var/lib/orderbooks/discovery /var/lib/orderbooks/raw_orderbooks /var/lib/orderbooks/manifests /var/log/orderbooks
+sudo chown -R orderbooks:orderbooks /var/lib/orderbooks /var/log/orderbooks
+```
+
+No API keys, private keys, mnemonics, wallets, or trading credentials are
+required by this project. rclone credentials are the only machine-local
+credential material expected for Google Drive offload, and they must stay
+outside the repository.
+
+## Copy Or Update The Repository
+
+First install:
+
+```sh
+cd /opt
+sudo git clone <repo-url> orderbooks
+```
+
+Update an existing checkout:
+
+```sh
+cd /opt/orderbooks
+sudo git fetch --all --prune
+sudo git pull --ff-only
+```
+
+Prepare repository permissions and the Python virtualenv:
+
+```sh
+cd /opt/orderbooks
+sudo chmod +x scripts/run_polymarket_collector_cycle.sh scripts/upload_archive_rclone.sh scripts/vps_preflight_check.sh scripts/vps_runtime_smoke_check.sh
+sudo python3 -m venv .venv
+sudo .venv/bin/python -m pip install --upgrade pip
+sudo chown -R root:root /opt/orderbooks
+sudo chmod -R a+rX /opt/orderbooks
+```
+
+The current collector scripts use the Python standard library.
+
+## Configure Public Collector Runtime
+
+Install the example config, then review it:
+
+```sh
+sudo install -o root -g root -m 0644 /opt/orderbooks/config/polymarket_collector.vps.example.yaml /etc/orderbooks/polymarket_collector.vps.yaml
+sudo editor /etc/orderbooks/polymarket_collector.vps.yaml
+```
+
+Optional collector env overrides:
+
+```sh
+sudo install -o root -g orderbooks -m 0640 /dev/null /etc/orderbooks/polymarket-orderbook-collector.env
+sudo editor /etc/orderbooks/polymarket-orderbook-collector.env
+```
+
+Example values:
+
+```text
+ORDERBOOKS_DATA_DIR=/var/lib/orderbooks
+ORDERBOOKS_OUTPUT_DIR=/var/lib/orderbooks/raw_orderbooks
+ORDERBOOKS_DISCOVERY_MAX_PAGES=3
+```
+
+## Configure Rclone
+
+Configure rclone as the `orderbooks` user. Do not print or commit
+`rclone.conf`.
+
+```sh
+sudo -u orderbooks rclone config
+sudo -u orderbooks rclone listremotes
+sudo -u orderbooks rclone lsf gdrive: --max-depth 1
+```
+
+Create the uploader env file:
+
+```sh
+sudo install -o root -g orderbooks -m 0640 /dev/null /etc/orderbooks/orderbook-uploader.env
+sudo editor /etc/orderbooks/orderbook-uploader.env
+```
+
+Example:
+
+```text
+ORDERBOOKS_RCLONE_DEST=gdrive:orderbooks/polymarket
+ORDERBOOKS_RCLONE_BIN=/usr/bin/rclone
+ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS=600
+```
+
+The uploader verifies uploads with `rclone check`. Dry runs do not prove remote
+write access.
+
+## Run VPS Preflight
+
+Run the preflight before installing or starting services:
+
+```sh
+cd /opt/orderbooks
+sudo -u orderbooks /opt/orderbooks/scripts/vps_preflight_check.sh \
+  --app-dir /opt/orderbooks \
+  --python-bin /opt/orderbooks/.venv/bin/python \
+  --rclone-bin /usr/bin/rclone \
+  --rclone-remote gdrive:orderbooks/polymarket \
+  --data-dir /var/lib/orderbooks \
+  --manifest-dir /var/lib/orderbooks/manifests \
+  --log-dir /var/log/orderbooks \
+  --min-free-gib 5
+```
+
+The preflight does not print rclone configuration. It checks repository files,
+Python compilation, shell syntax, systemd unit parsing when available, rclone
+availability, optional remote readability, target directory writability, disk
+space, and the absence of required project secrets.
+
+## Install Systemd Units
+
+Install collector and uploader units:
+
+```sh
+sudo install -o root -g root -m 0644 /opt/orderbooks/systemd/polymarket-orderbook-collector.service /etc/systemd/system/polymarket-orderbook-collector.service
+sudo install -o root -g root -m 0644 /opt/orderbooks/systemd/polymarket-orderbook-uploader.service /etc/systemd/system/polymarket-orderbook-uploader.service
+sudo install -o root -g root -m 0644 /opt/orderbooks/systemd/polymarket-orderbook-uploader.timer /etc/systemd/system/polymarket-orderbook-uploader.timer
+sudo systemctl daemon-reload
+sudo systemd-analyze verify /etc/systemd/system/polymarket-orderbook-collector.service /etc/systemd/system/polymarket-orderbook-uploader.service /etc/systemd/system/polymarket-orderbook-uploader.timer
+```
+
+Enable and start:
+
+```sh
+sudo systemctl enable --now polymarket-orderbook-collector.service
+sudo systemctl enable --now polymarket-orderbook-uploader.timer
+```
+
+Run one uploader cycle immediately after the collector has produced closed raw
+files:
+
+```sh
+sudo systemctl start polymarket-orderbook-uploader.service
+```
+
+Run the minimal runtime reliability smoke gate after both units are installed,
+rclone is configured, and at least one closed raw file is older than the
+uploader minimum age (default: 600 seconds):
+
+```sh
+sudo /opt/orderbooks/scripts/vps_runtime_smoke_check.sh \
+  --app-dir /opt/orderbooks \
+  --data-dir /var/lib/orderbooks \
+  --raw-dir /var/lib/orderbooks/raw_orderbooks \
+  --manifest-dir /var/lib/orderbooks/manifests \
+  --collector-service polymarket-orderbook-collector.service \
+  --uploader-service polymarket-orderbook-uploader.service \
+  --wait-seconds 900
+```
+
+This command is the minimal production reliability gate. It records a JSON
+evidence manifest under `/var/lib/orderbooks/manifests/`, verifies a valid
+collector cycle, forces one collector service restart, verifies the prior raw
+gzip file still parses with the same checksum, waits for a later valid cycle,
+starts the uploader, and records upload success or failure evidence. Preserve
+failed smoke manifests and journal logs for review.
+
+## Check Liveness
+
+Collector service:
+
+```sh
+sudo systemctl status polymarket-orderbook-collector.service
+sudo journalctl -u polymarket-orderbook-collector.service --since "30 minutes ago"
+```
+
+Uploader timer and service:
+
+```sh
+sudo systemctl list-timers polymarket-orderbook-uploader.timer
+sudo systemctl status polymarket-orderbook-uploader.service
+sudo journalctl -u polymarket-orderbook-uploader.service --since "2 hours ago"
+```
+
+Recent artifacts:
+
+```sh
+find /var/lib/orderbooks/raw_orderbooks -type f -name '*.jsonl.gz' -printf '%TY-%Tm-%TdT%TH:%TM:%TS %s %p\n' | sort | tail
+find /var/lib/orderbooks/manifests -type f -name '*.json' -printf '%TY-%Tm-%TdT%TH:%TM:%TS %s %p\n' | sort | tail
+```
+
+## Check Latest Cycle Health
+
+Inspect the newest collector manifest:
+
+```sh
+latest_collector="$(find /var/lib/orderbooks/manifests -type f -name 'polymarket_orderbook_collector_*.json' | sort | tail -n 1)"
+python3 -m json.tool "$latest_collector" | sed -n '1,180p'
+```
+
+Minimum healthy signs:
+
+```text
+gate_status: PASS
+rows_written: greater than 0
+failure_count: 0
+failures: []
+```
+
+Verify the latest raw gzip parses and row count matches its manifest:
+
+```sh
+python3 - "$latest_collector" <<'PY'
+import gzip
+import json
+import sys
+from pathlib import Path
+
+manifest = json.loads(Path(sys.argv[1]).read_text())
+for item in manifest.get("output_files", []):
+    path = Path(item["path"])
+    rows = 0
+    with gzip.open(path, "rt", encoding="utf-8") as handle:
+        for line in handle:
+            if line.strip():
+                json.loads(line)
+                rows += 1
+    print({"path": str(path), "rows": rows, "manifest_rows": item.get("rows"), "matches": rows == item.get("rows")})
+PY
+```
+
+## Verify Uploads
+
+Inspect the newest upload manifest:
+
+```sh
+latest_upload="$(find /var/lib/orderbooks/manifests -type f -name 'upload_archive_*.json' | sort | tail -n 1)"
+python3 -m json.tool "$latest_upload" | sed -n '1,220p'
+```
+
+Minimum healthy signs:
+
+```text
+operation_status: UPLOAD_VERIFIED
+gate_status: PASS
+rclone.copy_exit_code: 0
+rclone.check_exit_code: 0
+counts.uploaded equals counts.verified
+```
+
+Manual remote spot-check without printing config:
+
+```sh
+sudo -u orderbooks rclone lsf "$ORDERBOOKS_RCLONE_DEST" --max-depth 2 | head
+```
+
+## Rollback Or Stop
+
+Stop uploader timer first:
+
+```sh
+sudo systemctl disable --now polymarket-orderbook-uploader.timer
+sudo systemctl stop polymarket-orderbook-uploader.service
+```
+
+Stop collector:
+
+```sh
+sudo systemctl stop polymarket-orderbook-collector.service
+```
+
+Disable collector if needed:
+
+```sh
+sudo systemctl disable polymarket-orderbook-collector.service
+```
+
+Preserve `/var/lib/orderbooks` and `/var/lib/orderbooks/manifests` for evidence.
+If an artifact is wrong, label it as invalid or deprecated in a sibling note
+rather than deleting it.
+
+## Still Not Production Proven
+
+Because the domestic 24h soak wait was waived by the user, the following remain
+unproven:
+
+- A completed 24h collector run with reviewed final metrics.
+- 24h interaction between collector rotation and uploader timer.
+- VPS-specific long-run disk, network, rclone, and systemd behavior.
+- Retention cleanup behavior under verified upload load.
+
+Treat this as cutover preparation. The VPS is not deployed until the commands
+are run on the VPS and evidence is written.
diff --git a/docs/VPS_DEPLOYMENT.md b/docs/VPS_DEPLOYMENT.md
new file mode 100644
index 0000000..e4ac708
--- /dev/null
+++ b/docs/VPS_DEPLOYMENT.md
@@ -0,0 +1,298 @@
+# VPS Deployment
+
+Status: valid
+
+This document covers the Checkpoint 6 systemd runtime package for the raw
+Polymarket order-book collector.
+
+It does not claim production readiness or 24/7 reliability. That remains gated
+on the later 24h soak test.
+
+## Scope
+
+Included:
+
+- systemd service for the raw collector cycle
+- Python virtualenv setup
+- service user and directory permissions
+- configurable data directory
+- discovery refresh before each collector cycle
+- journal-based logs
+- safe restart model for finite collector runs
+
+Excluded:
+
+- Google Drive offload
+- `rclone`
+- uploader scripts, services, or timers
+- normalization changes
+- dashboards
+- databases
+- strategies or backtests
+- trading, order placement, signing, or wallet logic
+
+Uploader service and timer units are intentionally deferred to Checkpoint 7.
+
+## Runtime Model
+
+The systemd service runs:
+
+```text
+/opt/orderbooks/scripts/run_polymarket_collector_cycle.sh
+```
+
+Each cycle:
+
+1. Refreshes BTC market discovery into the configured data directory.
+2. Runs `scripts/collect_polymarket_orderbooks.py` once.
+3. Writes run-rotated raw gzip JSONL files.
+4. Writes a per-cycle collector manifest.
+5. Exits after the configured finite duration.
+
+The unit uses `Restart=always`, so systemd starts the next cycle after the prior
+cycle exits or fails.
+
+The example config uses a 300 second collection cycle. This is deliberately
+short because current BTC up/down markets are short-lived and the collector
+refreshes discovery only before a cycle starts. Do not increase the cycle beyond
+the practical market horizon unless the collector later learns to refresh market
+selection during a run.
+
+## Paths
+
+Default VPS paths:
+
+| Purpose | Path |
+| --- | --- |
+| Application checkout | `/opt/orderbooks` |
+| Python virtualenv | `/opt/orderbooks/.venv` |
+| Service config | `/etc/orderbooks/polymarket_collector.vps.yaml` |
+| Optional env override file | `/etc/orderbooks/polymarket-orderbook-collector.env` |
+| Data directory | `/var/lib/orderbooks` |
+| Discovery artifacts | `/var/lib/orderbooks/discovery` |
+| Raw order-book output base | `/var/lib/orderbooks/raw_orderbooks` |
+| Per-cycle manifests | `/var/lib/orderbooks/manifests` |
+
+Adjust these paths if the repository is installed somewhere other than
+`/opt/orderbooks`.
+
+## Environment Variables
+
+The service defines safe defaults and can load overrides from:
+
+```text
+/etc/orderbooks/polymarket-orderbook-collector.env
+```
+
+Supported variables:
+
+| Variable | Default | Meaning |
+| --- | --- | --- |
+| `ORDERBOOKS_APP_DIR` | `/opt/orderbooks` | Repository checkout path. |
+| `ORDERBOOKS_DATA_DIR` | `/var/lib/orderbooks` | Base directory for data files. |
+| `ORDERBOOKS_PYTHON` | `/opt/orderbooks/.venv/bin/python` | Python interpreter. |
+| `ORDERBOOKS_COLLECTOR_CONFIG` | `/etc/orderbooks/polymarket_collector.vps.yaml` | Collector config path. |
+| `ORDERBOOKS_DISCOVERY_DIR` | `$ORDERBOOKS_DATA_DIR/discovery` | Discovery artifact directory. |
+| `ORDERBOOKS_OUTPUT_DIR` | `$ORDERBOOKS_DATA_DIR/raw_orderbooks` | Collector output base directory. |
+| `ORDERBOOKS_MANIFEST_DIR` | `$ORDERBOOKS_DATA_DIR/manifests` | Per-cycle manifest directory. |
+| `ORDERBOOKS_DISCOVERY_LIMIT` | `100` | Gamma event page limit per discovery page. |
+| `ORDERBOOKS_DISCOVERY_MAX_PAGES` | `3` | Discovery page cap per cycle. |
+| `ORDERBOOKS_DISCOVERY_TIMEOUT` | `15` | Discovery request timeout in seconds. |
+
+Example override file:
+
+```text
+ORDERBOOKS_DATA_DIR=/var/lib/orderbooks
+ORDERBOOKS_DISCOVERY_MAX_PAGES=3
+```
+
+No API keys are required for this checkpoint.
+
+## Install On Ubuntu Or Debian
+
+Run package and account setup as root or with `sudo`:
+
+```sh
+sudo apt-get update
+sudo apt-get install -y git python3 python3-venv
+sudo useradd --system --home /var/lib/orderbooks --shell /usr/sbin/nologin orderbooks
+sudo mkdir -p /opt /etc/orderbooks /var/lib/orderbooks/discovery /var/lib/orderbooks/raw_orderbooks /var/lib/orderbooks/manifests
+```
+
+Install or update the repository under `/opt/orderbooks`. One option is:
+
+```sh
+cd /opt
+sudo git clone <repo-url> orderbooks
+```
+
+If the checkout already exists:
+
+```sh
+cd /opt/orderbooks
+sudo git pull --ff-only
+```
+
+Prepare permissions:
+
+```sh
+sudo chown -R root:root /opt/orderbooks
+sudo chmod -R a+rX /opt/orderbooks
+sudo chmod +x /opt/orderbooks/scripts/run_polymarket_collector_cycle.sh
+sudo chown -R orderbooks:orderbooks /var/lib/orderbooks
+```
+
+Create the virtualenv:
+
+```sh
+cd /opt/orderbooks
+sudo python3 -m venv .venv
+sudo .venv/bin/python -m pip install --upgrade pip
+sudo chown -R root:root .venv
+sudo chmod -R a+rX .venv
+```
+
+The current Checkpoint 6 scripts use only the Python standard library.
+
+Install the VPS config and service unit:
+
+```sh
+sudo install -o root -g root -m 0644 /opt/orderbooks/config/polymarket_collector.vps.example.yaml /etc/orderbooks/polymarket_collector.vps.yaml
+sudo install -o root -g root -m 0644 /opt/orderbooks/systemd/polymarket-orderbook-collector.service /etc/systemd/system/polymarket-orderbook-collector.service
+```
+
+Review `/etc/orderbooks/polymarket_collector.vps.yaml` before starting the
+service. The example writes under `/var/lib/orderbooks`.
+
+Enable and start:
+
+```sh
+sudo systemctl daemon-reload
+sudo systemctl enable --now polymarket-orderbook-collector.service
+```
+
+## Logs And Status
+
+Use the systemd journal:
+
+```sh
+sudo systemctl status polymarket-orderbook-collector.service
+sudo journalctl -u polymarket-orderbook-collector.service -f
+```
+
+Recent logs without following:
+
+```sh
+sudo journalctl -u polymarket-orderbook-collector.service --since "1 hour ago"
+```
+
+## Output Files
+
+Raw gzip JSONL files are written under:
+
+```text
+/var/lib/orderbooks/raw_orderbooks/polymarket/orderbooks/<run_id>/
+```
+
+Per-cycle manifests are written under:
+
+```text
+/var/lib/orderbooks/manifests/polymarket_orderbook_collector_<cycle_id>.json
+```
+
+Discovery artifacts are refreshed under:
+
+```text
+/var/lib/orderbooks/discovery/
+```
+
+## Restart And Stop Behavior
+
+The unit uses:
+
+```text
+Restart=always
+RestartSec=30s
+TimeoutStopSec=90s
+KillSignal=SIGTERM
+KillMode=control-group
+```
+
+The collector handles `SIGTERM` by finishing or timing out the current request,
+closing the gzip output, and writing the manifest. Every cycle writes to a new
+run directory, so closed files are not reopened by the next cycle.
+
+Stop the service with:
+
+```sh
+sudo systemctl stop polymarket-orderbook-collector.service
+```
+
+Start it again with:
+
+```sh
+sudo systemctl start polymarket-orderbook-collector.service
+```
+
+## Local Validation Without Starting The Service
+
+These checks do not require root:
+
+```sh
+python3 -m py_compile scripts/discover_polymarket_btc_markets.py scripts/collect_polymarket_orderbooks.py
+bash -n scripts/run_polymarket_collector_cycle.sh
+python3 - <<'PY'
+from pathlib import Path
+from scripts.collect_polymarket_orderbooks import load_flat_yaml
+cfg = load_flat_yaml(Path('config/polymarket_collector.vps.example.yaml'))
+required = {
+    'discovery_path',
+    'output_dir',
+    'manifest_path',
+    'market_limit',
+    'interval_seconds',
+    'duration_seconds',
+}
+missing = sorted(required - set(cfg))
+assert not missing, missing
+assert cfg['duration_seconds'] > 0
+print('config parse ok')
+PY
+```
+
+If systemd tools are available locally:
+
+```sh
+systemd-analyze verify systemd/polymarket-orderbook-collector.service
+```
+
+The local machine may not have `/opt/orderbooks` or the `orderbooks` service
+user. Treat missing VPS path or user messages as deployment-environment warnings,
+not collector syntax failures.
+
+## Safe Upgrade
+
+Stop the service, update files, rerun validation, then start the service:
+
+```sh
+sudo systemctl stop polymarket-orderbook-collector.service
+cd /opt/orderbooks
+sudo git pull --ff-only
+sudo .venv/bin/python -m py_compile scripts/discover_polymarket_btc_markets.py scripts/collect_polymarket_orderbooks.py
+sudo systemctl daemon-reload
+sudo systemctl start polymarket-orderbook-collector.service
+```
+
+Do not remove existing data files during an upgrade. If a bad artifact is found,
+preserve it and label it invalid or deprecated with a replacement path when one
+exists.
+
+## Current Limits
+
+- This package runs the existing raw collector; it does not add a daemon inside
+  Python.
+- The systemd loop is a restart model around finite collector cycles.
+- It does not upload files.
+- It does not prove long-run reliability.
+- Production readiness remains blocked until discovery, raw collection, offload,
+  and a documented 24h soak test all pass.
diff --git a/scripts/build_vps_deploy_bundle.sh b/scripts/build_vps_deploy_bundle.sh
new file mode 100755
index 0000000..b0a9cab
--- /dev/null
+++ b/scripts/build_vps_deploy_bundle.sh
@@ -0,0 +1,366 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+APP_DIR="${ORDERBOOKS_APP_DIR:-$(pwd)}"
+OUTPUT_DIR="${ORDERBOOKS_VPS_BUNDLE_OUTPUT_DIR:-artifacts/vps}"
+TIMESTAMP="${ORDERBOOKS_VPS_BUNDLE_TIMESTAMP:-$(date -u +%Y%m%dT%H%M%SZ)}"
+BUNDLE_BASENAME="orderbooks_vps_deploy_${TIMESTAMP}"
+TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz"
+MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json"
+
+usage() {
+  cat <<'EOF'
+Usage: scripts/build_vps_deploy_bundle.sh [options]
+
+Build a deployable VPS bundle from the current working tree. The bundle is
+intended to be copied to a VPS and unpacked under /opt/orderbooks.
+
+Options:
+  --app-dir DIR       Source working tree. Default: ORDERBOOKS_APP_DIR or current directory.
+  --output-dir DIR    Bundle output directory. Default: artifacts/vps.
+  --timestamp TS      Override UTC timestamp used in artifact names.
+  --help              Show this help.
+
+The bundle uses a narrow allowlist and excludes live data, caches, git metadata,
+virtualenvs, rclone config, private keys, wallets, mnemonics, and generated
+artifacts. It does not print secrets and does not write Python bytecode.
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --app-dir)
+      APP_DIR="$2"
+      shift 2
+      ;;
+    --output-dir)
+      OUTPUT_DIR="$2"
+      TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz"
+      MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json"
+      shift 2
+      ;;
+    --timestamp)
+      TIMESTAMP="$2"
+      BUNDLE_BASENAME="orderbooks_vps_deploy_${TIMESTAMP}"
+      TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz"
+      MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json"
+      shift 2
+      ;;
+    --help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "Unknown argument: $1" >&2
+      usage >&2
+      exit 2
+      ;;
+  esac
+done
+
+APP_DIR="${APP_DIR%/}"
+if [[ ! -d "${APP_DIR}" ]]; then
+  echo "Source app directory does not exist: ${APP_DIR}" >&2
+  exit 1
+fi
+
+mkdir -p "${OUTPUT_DIR}"
+cd "${APP_DIR}"
+
+if [[ -e "${TARBALL}" || -e "${MANIFEST}" ]]; then
+  echo "Refusing to overwrite existing bundle artifact: ${TARBALL} or ${MANIFEST}" >&2
+  exit 1
+fi
+
+FILELIST="$(mktemp)"
+trap 'rm -f "${FILELIST}"' EXIT
+
+PYTHONDONTWRITEBYTECODE=1 python3 - "${FILELIST}" "${MANIFEST}" "${TARBALL}" "${TIMESTAMP}" <<'PY_BUNDLE_SELECT'
+import datetime as dt
+import fnmatch
+import hashlib
+import json
+import os
+import sys
+from pathlib import Path
+
+filelist_path = Path(sys.argv[1])
+manifest_path = Path(sys.argv[2])
+tarball_path = Path(sys.argv[3])
+timestamp = sys.argv[4]
+root = Path.cwd()
+
+allowed_files = [
+    Path("AGENTS.md"),
+    Path("ROADMAP.md"),
+]
+allowed_dirs = [
+    Path("config"),
+    Path("docs"),
+    Path("scripts"),
+    Path("systemd"),
+    Path("reports/checkpoints"),
+]
+allowed_globs = [
+    "data/manifests/checkpoint_*.json",
+]
+excluded_patterns = [
+    ".git/",
+    ".venv/",
+    "artifacts/",
+    "data/soak_test/",
+    "data/live_sample/",
+    "data/normalized_sample/",
+    "**/__pycache__/",
+    "**/*.pyc",
+    "**/*.pyo",
+    "**/.pytest_cache/",
+    "**/.mypy_cache/",
+    "**/.ruff_cache/",
+    "**/rclone.conf",
+    "**/.env",
+    "**/*.pem",
+    "**/*.key",
+    "**/*.p12",
+    "**/*.pfx",
+    "**/id_rsa*",
+    "**/id_ed25519*",
+    "**/*mnemonic*",
+    "**/*wallet*",
+    "**/*credential*",
+    "**/*secret*",
+]
+required_files = [
+    "AGENTS.md",
+    "ROADMAP.md",
+    "config/polymarket_collector.vps.example.yaml",
+    "config/rclone.example.md",
+    "docs/VPS_CUTOVER_RUNBOOK.md",
+    "docs/VPS_DEPLOYMENT.md",
+    "docs/GOOGLE_DRIVE_OFFLOAD.md",
+    "scripts/build_vps_deploy_bundle.sh",
+    "scripts/vps_preflight_check.sh",
+    "scripts/vps_runtime_smoke_check.sh",
+    "scripts/run_polymarket_collector_cycle.sh",
+    "scripts/upload_archive_rclone.sh",
+    "scripts/discover_polymarket_btc_markets.py",
+    "scripts/collect_polymarket_orderbooks.py",
+    "scripts/normalize_polymarket_orderbooks.py",
+    "systemd/polymarket-orderbook-collector.service",
+    "systemd/polymarket-orderbook-uploader.service",
+    "systemd/polymarket-orderbook-uploader.timer",
+]
+
+forbidden_path_fragments = [
+    "/.git/",
+    "/.venv/",
+    "/__pycache__/",
+    "/data/soak_test/",
+    "/data/live_sample/",
+    "/data/normalized_sample/",
+    "/artifacts/",
+]
+forbidden_names = {
+    "rclone.conf",
+    ".env",
+    "id_rsa",
+    "id_ed25519",
+}
+forbidden_suffixes = {
+    ".pyc",
+    ".pyo",
+    ".pem",
+    ".key",
+    ".p12",
+    ".pfx",
+}
+secretish_name_tokens = [
+    "mnemonic",
+    "wallet",
+    "credential",
+    "secret",
+]
+
+def as_posix(path: Path) -> str:
+    return path.as_posix()
+
+def is_forbidden(path: Path) -> tuple[bool, str | None]:
+    rel = as_posix(path)
+    wrapped = f"/{rel}/" if path.is_dir() else f"/{rel}"
+    if path.is_absolute() or ".." in path.parts:
+        return True, "absolute_or_parent_path"
+    for fragment in forbidden_path_fragments:
+        if fragment in wrapped:
+            return True, f"forbidden_fragment:{fragment}"
+    if any(part in {".git", ".venv", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache"} for part in path.parts):
+        return True, "forbidden_cache_or_metadata_dir"
+    lower_name = path.name.lower()
+    if lower_name in forbidden_names:
+        return True, f"forbidden_name:{path.name}"
+    if path.suffix.lower() in forbidden_suffixes:
+        return True, f"forbidden_suffix:{path.suffix}"
+    if any(token in lower_name for token in secretish_name_tokens):
+        return True, f"secretish_name:{path.name}"
+    if rel.startswith(("data/soak_test/", "data/live_sample/", "data/normalized_sample/", "artifacts/")):
+        return True, "forbidden_prefix"
+    return False, None
+
+def iter_allowed_files():
+    seen = set()
+    for path in allowed_files:
+        if path.is_file() and path not in seen:
+            seen.add(path)
+            yield path
+    for directory in allowed_dirs:
+        if not directory.exists():
+            continue
+        for path in sorted(directory.rglob("*")):
+            if path.is_file() and path not in seen:
+                seen.add(path)
+                yield path
+    for pattern in allowed_globs:
+        for path in sorted(root.glob(pattern)):
+            if path.is_file() and path not in seen:
+                seen.add(path)
+                yield path
+
+def sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+included = []
+excluded = []
+for path in iter_allowed_files():
+    forbidden, reason = is_forbidden(path)
+    if forbidden:
+        excluded.append({"path": as_posix(path), "reason": reason})
+        continue
+    stat = path.stat()
+    included.append({
+        "path": as_posix(path),
+        "bytes": stat.st_size,
+        "sha256": sha256_file(path),
+    })
+
+included_paths = sorted(item["path"] for item in included)
+missing_required = sorted(path for path in required_files if path not in included_paths)
+if missing_required:
+    raise SystemExit(f"missing required bundle files: {missing_required}")
+if not included:
+    raise SystemExit("bundle file list is empty")
+
+filelist_path.write_bytes(b"".join(path.encode("utf-8") + b"\0" for path in included_paths))
+created_at = dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+manifest = {
+    "schema_name": "vps_deploy_bundle_manifest",
+    "schema_version": 1,
+    "created_at_utc": created_at,
+    "timestamp": timestamp,
+    "tarball_path": as_posix(tarball_path),
+    "manifest_path": as_posix(manifest_path),
+    "source_root": str(root),
+    "bundle_intent": "Copy to a VPS and unpack under /opt/orderbooks; VPS execution remains pending.",
+    "production_ready": False,
+    "vps_deployed": False,
+    "included_roots": [str(path) for path in allowed_files + allowed_dirs] + allowed_globs,
+    "excluded_patterns": excluded_patterns,
+    "required_files": required_files,
+    "included_file_count": len(included),
+    "included_files": included,
+    "excluded_selected_files": excluded,
+    "missing_required_files": missing_required,
+    "validation": {
+        "required_files_present_before_tar": not missing_required,
+        "forbidden_paths_absent_before_tar": True,
+        "tarball_validation_completed": False,
+    },
+}
+manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+PY_BUNDLE_SELECT
+
+tar --create --gzip --file "${TARBALL}" --null --files-from "${FILELIST}" --owner=0 --group=0 --numeric-owner
+
+PYTHONDONTWRITEBYTECODE=1 python3 - "${TARBALL}" "${MANIFEST}" <<'PY_BUNDLE_VALIDATE'
+import hashlib
+import json
+import sys
+import tarfile
+from pathlib import Path
+
+tarball_path = Path(sys.argv[1])
+manifest_path = Path(sys.argv[2])
+manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
+required_files = set(manifest["required_files"])
+
+def sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+def forbidden_reason(name: str) -> str | None:
+    parts = name.split("/")
+    lower_name = parts[-1].lower()
+    if name.startswith("/") or any(part == ".." for part in parts):
+        return "absolute_or_parent_path"
+    if parts[0] in {".git", ".venv", "artifacts"}:
+        return f"forbidden_top_level:{parts[0]}"
+    if len(parts) >= 2 and parts[0] == "data" and parts[1] in {"soak_test", "live_sample", "normalized_sample"}:
+        return f"forbidden_data_dir:data/{parts[1]}"
+    if any(part in {".git", ".venv", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache"} for part in parts):
+        return "forbidden_cache_or_metadata_dir"
+    if lower_name in {"rclone.conf", ".env", "id_rsa", "id_ed25519"}:
+        return f"forbidden_name:{lower_name}"
+    if any(lower_name.endswith(suffix) for suffix in (".pyc", ".pyo", ".pem", ".key", ".p12", ".pfx")):
+        return "forbidden_suffix"
+    if any(token in lower_name for token in ("mnemonic", "wallet", "credential", "secret")):
+        return "secretish_name"
+    return None
+
+with tarfile.open(tarball_path, "r:gz") as archive:
+    members = [member for member in archive.getmembers() if member.isfile()]
+    names = sorted(member.name for member in members)
+
+forbidden = [{"path": name, "reason": forbidden_reason(name)} for name in names if forbidden_reason(name)]
+missing_required = sorted(required_files - set(names))
+if forbidden or missing_required:
+    manifest["validation"].update({
+        "tarball_validation_completed": True,
+        "forbidden_paths_absent_in_tarball": not forbidden,
+        "required_files_present_in_tarball": not missing_required,
+        "forbidden_paths_in_tarball": forbidden,
+        "missing_required_files_in_tarball": missing_required,
+    })
+    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+    raise SystemExit(f"bundle validation failed forbidden={forbidden} missing_required={missing_required}")
+
+manifest["tarball_bytes"] = tarball_path.stat().st_size
+manifest["tarball_sha256"] = sha256_file(tarball_path)
+manifest["tarball_content_count"] = len(names)
+manifest["tarball_contents"] = names
+manifest["validation"].update({
+    "tarball_validation_completed": True,
+    "forbidden_paths_absent_in_tarball": True,
+    "required_files_present_in_tarball": True,
+    "forbidden_paths_in_tarball": [],
+    "missing_required_files_in_tarball": [],
+})
+manifest["gate_status"] = "PASS"
+manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+PY_BUNDLE_VALIDATE
+
+printf 'BUNDLE_TARBALL=%s\n' "${TARBALL}"
+printf 'BUNDLE_MANIFEST=%s\n' "${MANIFEST}"
+python3 - <<'PY_PRINT' "${MANIFEST}"
+import json
+import sys
+from pathlib import Path
+m = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
+print(f"BUNDLE_SHA256={m['tarball_sha256']}")
+print(f"BUNDLE_BYTES={m['tarball_bytes']}")
+print(f"BUNDLE_FILE_COUNT={m['tarball_content_count']}")
+PY_PRINT
diff --git a/scripts/collect_polymarket_orderbooks.py b/scripts/collect_polymarket_orderbooks.py
new file mode 100755
index 0000000..c37727c
--- /dev/null
+++ b/scripts/collect_polymarket_orderbooks.py
@@ -0,0 +1,668 @@
+#!/usr/bin/env python3
+"""Minimal raw Polymarket order-book snapshot sample collector.
+
+Checkpoint 4 scope: finite sample run only. This script reads the BTC discovery
+artifact, fetches public CLOB batch order books for a small market set, writes
+raw gzip JSONL envelopes, and closes with a manifest. It is not a daemon and it
+does not trade.
+"""
+
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import gzip
+import hashlib
+import json
+import signal
+import sys
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+
+COLLECTOR_NAME = "polymarket_orderbook_collector"
+COLLECTOR_VERSION = "0.1.0"
+SCHEMA_NAME = "raw_orderbook_snapshot"
+SCHEMA_VERSION = 1
+CLOB_BOOKS_URL = "https://clob.polymarket.com/books"
+
+DEFAULT_CONFIG_PATH = Path("config/polymarket_collector.example.yaml")
+DEFAULT_DISCOVERY_PATH = Path("data/discovery/polymarket_btc_markets_latest.json")
+DEFAULT_OUTPUT_DIR = Path("data/live_sample")
+DEFAULT_MANIFEST_PATH = Path("data/manifests/orderbook_collector_sample_manifest.json")
+
+SAFE_RESPONSE_HEADERS = {
+    "cache-control",
+    "cf-cache-status",
+    "cf-ray",
+    "content-length",
+    "content-type",
+    "date",
+    "retry-after",
+    "server",
+    "x-ratelimit-limit",
+    "x-ratelimit-remaining",
+    "x-ratelimit-reset",
+    "ratelimit-limit",
+    "ratelimit-remaining",
+    "ratelimit-reset",
+}
+
+STOP_REQUESTED = False
+STOP_SIGNAL: str | None = None
+
+
+def handle_stop(signum: int, _frame: Any) -> None:
+    global STOP_REQUESTED, STOP_SIGNAL
+    STOP_REQUESTED = True
+    STOP_SIGNAL = signal.Signals(signum).name
+
+
+def utc_now() -> dt.datetime:
+    return dt.datetime.now(dt.UTC)
+
+
+def iso_z(value: dt.datetime | None = None) -> str:
+    value = value or utc_now()
+    return value.astimezone(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def compact_timestamp(value: dt.datetime | None = None) -> str:
+    value = value or utc_now()
+    return value.astimezone(dt.UTC).strftime("%Y%m%dT%H%M%SZ")
+
+
+def parse_iso(value: Any) -> dt.datetime | None:
+    if not isinstance(value, str) or not value.strip():
+        return None
+    text = value.strip()
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        parsed = dt.datetime.fromisoformat(text)
+    except ValueError:
+        return None
+    if parsed.tzinfo is None:
+        parsed = parsed.replace(tzinfo=dt.UTC)
+    return parsed.astimezone(dt.UTC)
+
+
+def sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def parse_scalar(value: str) -> Any:
+    value = value.strip()
+    if not value:
+        return ""
+    if value[0] in {"'", '"'} and value[-1:] == value[0]:
+        return value[1:-1]
+    lower = value.lower()
+    if lower in {"true", "false"}:
+        return lower == "true"
+    if lower in {"null", "none"}:
+        return None
+    try:
+        return int(value)
+    except ValueError:
+        pass
+    try:
+        return float(value)
+    except ValueError:
+        return value
+
+
+def load_flat_yaml(path: Path) -> dict[str, Any]:
+    """Parse the flat YAML subset used by the example config."""
+    config: dict[str, Any] = {}
+    if not path.exists():
+        return config
+    for line_number, raw_line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1):
+        line = raw_line.split("#", 1)[0].strip()
+        if not line:
+            continue
+        if ":" not in line:
+            raise ValueError(f"Unsupported config line {line_number}: {raw_line}")
+        key, value = line.split(":", 1)
+        key = key.strip()
+        if not key:
+            raise ValueError(f"Missing config key on line {line_number}")
+        config[key] = parse_scalar(value)
+    return config
+
+
+def config_digest(path: Path | None) -> str | None:
+    if path is None or not path.exists():
+        return None
+    return sha256_file(path)
+
+
+def filter_headers(headers: Any) -> dict[str, str]:
+    safe: dict[str, str] = {}
+    for key, value in dict(headers).items():
+        if key.lower() in SAFE_RESPONSE_HEADERS:
+            safe[key] = value
+    return safe
+
+
+def http_post_json(
+    *,
+    url: str,
+    json_body: Any,
+    timeout_seconds: float,
+    max_retries: int,
+    backoff_seconds: float,
+) -> dict[str, Any]:
+    body_bytes = json.dumps(json_body, separators=(",", ":")).encode("utf-8")
+    attempts: list[dict[str, Any]] = []
+    final_json: Any | None = None
+    final_text_preview: str | None = None
+    final_json_error: str | None = None
+    final_status_code: int | None = None
+    final_headers: dict[str, str] = {}
+
+    for attempt_index in range(max_retries + 1):
+        started_at = iso_z()
+        started_monotonic = time.monotonic()
+        status_code: int | None = None
+        response_headers: dict[str, str] = {}
+        response_text = ""
+        error: str | None = None
+        try:
+            request = urllib.request.Request(
+                url,
+                data=body_bytes,
+                headers={
+                    "Accept": "application/json",
+                    "Content-Type": "application/json",
+                    "User-Agent": "orderbooks-checkpoint-4-sample/0.1.0",
+                },
+                method="POST",
+            )
+            with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
+                status_code = response.status
+                response_headers = filter_headers(response.headers)
+                response_text = response.read().decode("utf-8", errors="replace")
+        except urllib.error.HTTPError as exc:
+            status_code = exc.code
+            response_headers = filter_headers(exc.headers)
+            response_text = exc.read().decode("utf-8", errors="replace")
+            error = f"HTTPError: {exc}"
+        except Exception as exc:  # noqa: BLE001 - preserve request failure evidence
+            error = f"{type(exc).__name__}: {exc}"
+
+        duration_ms = round((time.monotonic() - started_monotonic) * 1000, 3)
+        parsed_json = None
+        json_error = None
+        if response_text:
+            try:
+                parsed_json = json.loads(response_text)
+            except json.JSONDecodeError as exc:
+                json_error = str(exc)
+
+        attempts.append(
+            {
+                "attempt": attempt_index + 1,
+                "started_at_utc": started_at,
+                "ended_at_utc": iso_z(),
+                "duration_ms": duration_ms,
+                "status_code": status_code,
+                "headers": response_headers,
+                "error": error,
+                "json_error": json_error,
+            }
+        )
+        final_json = parsed_json
+        final_json_error = json_error
+        final_text_preview = response_text[:1000] if parsed_json is None else None
+        final_status_code = status_code
+        final_headers = response_headers
+
+        retryable = status_code == 429 or (status_code is not None and 500 <= status_code <= 599)
+        if error is None and status_code is not None and 200 <= status_code < 300:
+            break
+        if not retryable or attempt_index >= max_retries or STOP_REQUESTED:
+            break
+        retry_after = response_headers.get("Retry-After") or response_headers.get("retry-after")
+        sleep_seconds = backoff_seconds * (2**attempt_index)
+        if retry_after:
+            try:
+                sleep_seconds = max(sleep_seconds, float(retry_after))
+            except ValueError:
+                pass
+        time.sleep(sleep_seconds)
+
+    return {
+        "request": {
+            "method": "POST",
+            "url": url,
+            "json_body": json_body,
+        },
+        "response": {
+            "status_code": final_status_code,
+            "headers": final_headers,
+            "json": final_json,
+            "json_error": final_json_error,
+            "text_preview": final_text_preview,
+        },
+        "attempts": attempts,
+        "duration_ms": round(sum(attempt["duration_ms"] for attempt in attempts), 3),
+        "ok": final_status_code is not None and 200 <= final_status_code < 300 and final_json_error is None,
+    }
+
+
+def load_discovery(path: Path) -> dict[str, Any]:
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def market_is_usable(market: dict[str, Any], now: dt.datetime, safety_seconds: int) -> tuple[bool, list[str]]:
+    reasons: list[str] = []
+    if market.get("active") is not True:
+        reasons.append("not_active")
+    if market.get("closed") is not False:
+        reasons.append("closed")
+    if market.get("accepting_orders") is not True:
+        reasons.append("not_accepting_orders")
+    if market.get("enable_order_book") is not True:
+        reasons.append("order_book_not_enabled")
+    end_time = parse_iso(market.get("end_time_utc"))
+    if end_time is None:
+        reasons.append("missing_end_time")
+    elif end_time <= now + dt.timedelta(seconds=safety_seconds):
+        reasons.append("too_close_to_end_or_expired")
+    tokens = market.get("tokens")
+    if not isinstance(tokens, list) or len(tokens) < 2:
+        reasons.append("missing_two_tokens")
+    else:
+        outcomes = [token.get("outcome") for token in tokens if isinstance(token, dict)]
+        token_ids = [token.get("token_id") for token in tokens if isinstance(token, dict)]
+        if outcomes[:2] != ["Up", "Down"] or not all(token_ids[:2]):
+            reasons.append("bad_up_down_token_mapping")
+    return not reasons, reasons
+
+
+def select_markets(
+    discovery: dict[str, Any],
+    *,
+    market_limit: int,
+    market_end_safety_seconds: int,
+) -> tuple[list[dict[str, Any]], dict[str, int]]:
+    now = utc_now()
+    selected: list[dict[str, Any]] = []
+    rejection_counts: dict[str, int] = {}
+    markets = discovery.get("normalized_markets") or []
+    for market in markets:
+        if not isinstance(market, dict):
+            rejection_counts["not_object"] = rejection_counts.get("not_object", 0) + 1
+            continue
+        usable, reasons = market_is_usable(market, now, market_end_safety_seconds)
+        if not usable:
+            for reason in reasons:
+                rejection_counts[reason] = rejection_counts.get(reason, 0) + 1
+            continue
+        selected.append(market)
+        if len(selected) >= market_limit:
+            break
+    return selected, dict(sorted(rejection_counts.items()))
+
+
+def flatten_tokens(markets: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    tokens: list[dict[str, Any]] = []
+    for market in markets:
+        for token in market.get("tokens", [])[:2]:
+            tokens.append(
+                {
+                    "market_name": market.get("market_name"),
+                    "market_slug": market.get("market_slug"),
+                    "condition_id": market.get("condition_id"),
+                    "token_id": str(token.get("token_id")),
+                    "outcome": token.get("outcome"),
+                    "market_end_time_utc": market.get("end_time_utc"),
+                }
+            )
+    return tokens
+
+
+def build_snapshot_envelope(
+    *,
+    raw_book: dict[str, Any],
+    token_meta: dict[str, Any],
+    collected_at_utc: str,
+    sequence: int,
+    request_record: dict[str, Any],
+    response_index: int,
+) -> dict[str, Any]:
+    return {
+        "schema_name": SCHEMA_NAME,
+        "schema_version": SCHEMA_VERSION,
+        "collector": {
+            "name": COLLECTOR_NAME,
+            "version": COLLECTOR_VERSION,
+        },
+        "market": {
+            "market_name": token_meta.get("market_name"),
+            "market_slug": token_meta.get("market_slug"),
+            "condition_id": token_meta.get("condition_id"),
+            "token_id": token_meta.get("token_id"),
+            "outcome": token_meta.get("outcome"),
+            "market_end_time_utc": token_meta.get("market_end_time_utc"),
+        },
+        "collection": {
+            "collected_at_utc": collected_at_utc,
+            "sequence": sequence,
+            "response_index": response_index,
+        },
+        "request": {
+            "method": request_record["request"]["method"],
+            "url": request_record["request"]["url"],
+            "params": None,
+            "json_body": request_record["request"]["json_body"],
+            "status_code": request_record["response"]["status_code"],
+            "duration_ms": request_record["duration_ms"],
+            "attempts": request_record["attempts"],
+        },
+        "raw": raw_book,
+    }
+
+
+def summarize_output_file(path: Path, rows_written: int) -> dict[str, Any]:
+    return {
+        "path": path.as_posix(),
+        "status": "valid" if path.exists() and path.stat().st_size > 0 else "missing",
+        "bytes": path.stat().st_size if path.exists() else 0,
+        "rows": rows_written,
+        "sha256": sha256_file(path) if path.exists() else None,
+    }
+
+
+def write_manifest(path: Path, manifest: dict[str, Any]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+
+def config_value(config: dict[str, Any], args: argparse.Namespace, key: str, default: Any) -> Any:
+    cli_value = getattr(args, key)
+    if cli_value is not None:
+        return cli_value
+    return config.get(key, default)
+
+
+def build_runtime_config(args: argparse.Namespace) -> dict[str, Any]:
+    config_path = args.config
+    file_config = load_flat_yaml(config_path) if config_path else {}
+    runtime = {
+        "discovery_path": Path(config_value(file_config, args, "discovery_path", DEFAULT_DISCOVERY_PATH)),
+        "output_dir": Path(config_value(file_config, args, "output_dir", DEFAULT_OUTPUT_DIR)),
+        "manifest_path": Path(config_value(file_config, args, "manifest_path", DEFAULT_MANIFEST_PATH)),
+        "market_limit": int(config_value(file_config, args, "market_limit", 2)),
+        "interval_seconds": float(config_value(file_config, args, "interval_seconds", 30.0)),
+        "duration_seconds": float(config_value(file_config, args, "duration_seconds", 300.0)),
+        "request_timeout_seconds": float(config_value(file_config, args, "request_timeout_seconds", 15.0)),
+        "max_retries": int(config_value(file_config, args, "max_retries", 2)),
+        "backoff_seconds": float(config_value(file_config, args, "backoff_seconds", 2.0)),
+        "market_end_safety_seconds": int(config_value(file_config, args, "market_end_safety_seconds", 420)),
+        "clob_books_url": str(config_value(file_config, args, "clob_books_url", CLOB_BOOKS_URL)),
+        "config_path": config_path,
+        "config_sha256": config_digest(config_path),
+        "config_snapshot": file_config,
+    }
+    if runtime["market_limit"] < 1:
+        raise ValueError("market_limit must be >= 1")
+    if runtime["interval_seconds"] <= 0:
+        raise ValueError("interval_seconds must be > 0")
+    if runtime["duration_seconds"] <= 0:
+        raise ValueError("duration_seconds must be > 0")
+    return runtime
+
+
+def run_collection(runtime: dict[str, Any], command: str) -> tuple[dict[str, Any], Path]:
+    signal.signal(signal.SIGINT, handle_stop)
+    signal.signal(signal.SIGTERM, handle_stop)
+
+    started = utc_now()
+    started_at_utc = iso_z(started)
+    discovery_path: Path = runtime["discovery_path"]
+    discovery = load_discovery(discovery_path)
+    selected_markets, rejection_counts = select_markets(
+        discovery,
+        market_limit=runtime["market_limit"],
+        market_end_safety_seconds=runtime["market_end_safety_seconds"],
+    )
+    warnings: list[str] = []
+    failures: list[dict[str, Any]] = []
+    if not selected_markets:
+        warnings.append("No usable active BTC markets found in discovery input.")
+
+    tokens = flatten_tokens(selected_markets)
+    run_id = compact_timestamp(started)
+    output_dir = runtime["output_dir"] / "polymarket" / "orderbooks" / run_id
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_file = output_dir / f"polymarket_orderbooks_{run_id}.jsonl.gz"
+
+    request_count = 0
+    success_count = 0
+    failure_count = 0
+    status_code_counts: dict[str, int] = {}
+    rows_written = 0
+    sequence = 0
+    token_row_counts = {token["token_id"]: 0 for token in tokens}
+
+    deadline = time.monotonic() + runtime["duration_seconds"]
+    token_by_id = {token["token_id"]: token for token in tokens}
+    request_body = [{"token_id": token["token_id"]} for token in tokens]
+
+    with gzip.open(output_file, "wt", encoding="utf-8") as handle:
+        while tokens and not STOP_REQUESTED and time.monotonic() < deadline:
+            loop_started = time.monotonic()
+            collected_at_utc = iso_z()
+            request_count += 1
+            request_record = http_post_json(
+                url=runtime["clob_books_url"],
+                json_body=request_body,
+                timeout_seconds=runtime["request_timeout_seconds"],
+                max_retries=runtime["max_retries"],
+                backoff_seconds=runtime["backoff_seconds"],
+            )
+            status_code = request_record["response"]["status_code"]
+            status_key = str(status_code)
+            status_code_counts[status_key] = status_code_counts.get(status_key, 0) + 1
+            if request_record["ok"] and isinstance(request_record["response"]["json"], list):
+                success_count += 1
+                for response_index, raw_book in enumerate(request_record["response"]["json"]):
+                    if not isinstance(raw_book, dict):
+                        failure_count += 1
+                        failures.append(
+                            {
+                                "collected_at_utc": collected_at_utc,
+                                "reason": "book_response_item_not_object",
+                                "response_index": response_index,
+                            }
+                        )
+                        continue
+                    asset_id = str(raw_book.get("asset_id") or "")
+                    token_meta = token_by_id.get(asset_id)
+                    if token_meta is None:
+                        failure_count += 1
+                        failures.append(
+                            {
+                                "collected_at_utc": collected_at_utc,
+                                "reason": "unknown_asset_id_in_book_response",
+                                "asset_id": asset_id,
+                            }
+                        )
+                        continue
+                    sequence += 1
+                    envelope = build_snapshot_envelope(
+                        raw_book=raw_book,
+                        token_meta=token_meta,
+                        collected_at_utc=collected_at_utc,
+                        sequence=sequence,
+                        request_record=request_record,
+                        response_index=response_index,
+                    )
+                    handle.write(json.dumps(envelope, separators=(",", ":"), sort_keys=True) + "\n")
+                    rows_written += 1
+                    token_row_counts[asset_id] = token_row_counts.get(asset_id, 0) + 1
+                handle.flush()
+            else:
+                failure_count += 1
+                failures.append(
+                    {
+                        "collected_at_utc": collected_at_utc,
+                        "reason": "request_failed_or_non_json_list",
+                        "status_code": status_code,
+                        "attempts": request_record["attempts"],
+                        "json_error": request_record["response"]["json_error"],
+                        "text_preview": request_record["response"]["text_preview"],
+                    }
+                )
+
+            remaining_interval = runtime["interval_seconds"] - (time.monotonic() - loop_started)
+            while remaining_interval > 0 and not STOP_REQUESTED and time.monotonic() < deadline:
+                sleep_for = min(remaining_interval, deadline - time.monotonic(), 1.0)
+                if sleep_for <= 0:
+                    break
+                time.sleep(sleep_for)
+                remaining_interval = runtime["interval_seconds"] - (time.monotonic() - loop_started)
+
+    ended = utc_now()
+    ended_at_utc = iso_z(ended)
+    duration_seconds_actual = round((ended - started).total_seconds(), 3)
+    if STOP_REQUESTED:
+        warnings.append(f"Graceful shutdown requested by {STOP_SIGNAL}.")
+    if runtime["duration_seconds"] < 300:
+        warnings.append("Configured run duration was shorter than the roadmap 5-minute sample target.")
+    if not failures and request_count > 0:
+        failures = []
+    output_summary = summarize_output_file(output_file, rows_written)
+    gate_status = "PASS" if rows_written > 0 and all(count > 0 for count in token_row_counts.values()) else "FAIL"
+    if not tokens:
+        gate_status = "BLOCKED"
+    if request_count == 0:
+        gate_status = "FAIL" if tokens else "BLOCKED"
+    manifest = {
+        "schema_name": "orderbook_collector_sample_manifest",
+        "schema_version": 1,
+        "checkpoint_id": 4,
+        "checkpoint_name": "Minimal Orderbook Snapshot Collector",
+        "gate_status": gate_status,
+        "collector": {
+            "name": COLLECTOR_NAME,
+            "version": COLLECTOR_VERSION,
+        },
+        "started_at_utc": started_at_utc,
+        "ended_at_utc": ended_at_utc,
+        "run_duration_seconds": duration_seconds_actual,
+        "configured_duration_seconds": runtime["duration_seconds"],
+        "interval_seconds": runtime["interval_seconds"],
+        "command": command,
+        "config": {
+            "path": runtime["config_path"].as_posix() if runtime["config_path"] else None,
+            "sha256": runtime["config_sha256"],
+            "snapshot": runtime["config_snapshot"],
+            "effective": {
+                "discovery_path": discovery_path.as_posix(),
+                "output_dir": runtime["output_dir"].as_posix(),
+                "manifest_path": runtime["manifest_path"].as_posix(),
+                "market_limit": runtime["market_limit"],
+                "interval_seconds": runtime["interval_seconds"],
+                "duration_seconds": runtime["duration_seconds"],
+                "request_timeout_seconds": runtime["request_timeout_seconds"],
+                "max_retries": runtime["max_retries"],
+                "backoff_seconds": runtime["backoff_seconds"],
+                "market_end_safety_seconds": runtime["market_end_safety_seconds"],
+                "clob_books_url": runtime["clob_books_url"],
+            },
+        },
+        "discovery": {
+            "path": discovery_path.as_posix(),
+            "fetched_at_utc": discovery.get("fetched_at_utc"),
+            "source_summary": discovery.get("summary"),
+            "rejection_counts_before_selection": rejection_counts,
+        },
+        "markets_tracked": [
+            {
+                "market_name": market.get("market_name"),
+                "market_slug": market.get("market_slug"),
+                "condition_id": market.get("condition_id"),
+                "end_time_utc": market.get("end_time_utc"),
+            }
+            for market in selected_markets
+        ],
+        "tokens_tracked": tokens,
+        "request_count": request_count,
+        "success_count": success_count,
+        "failure_count": failure_count,
+        "status_code_counts": dict(sorted(status_code_counts.items())),
+        "rows_written": rows_written,
+        "token_row_counts": token_row_counts,
+        "output_files": [output_summary],
+        "failures": failures,
+        "warnings": warnings,
+        "known_gaps": [
+            "This is a short run-rotated sample, not a daemon.",
+            "Hourly rotation is documented but not implemented in this checkpoint.",
+            "No websocket capture, normalization, upload, systemd unit, dashboard, database, or trading behavior is included.",
+            "A 5-minute sample proves file-writing behavior only; it does not prove 24/7 reliability.",
+        ],
+        "fake_progress_risk": "A small successful sample can still hide long-run gaps, stale discovery, endpoint schema drift, and missed intervals. Reliability remains gated on the future 24h soak test.",
+        "next_step": "Checkpoint 5 should normalize this raw sample while preserving raw file references, or rerun a fresh short sample if the orchestrator wants more raw evidence first.",
+    }
+    return manifest, output_file
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Collect a bounded raw gzip JSONL sample of Polymarket BTC order books."
+    )
+    parser.add_argument("--config", type=Path, default=DEFAULT_CONFIG_PATH)
+    parser.add_argument("--discovery-path", type=Path, default=None)
+    parser.add_argument("--output-dir", type=Path, default=None)
+    parser.add_argument("--manifest-path", type=Path, default=None)
+    parser.add_argument("--market-limit", type=int, default=None)
+    parser.add_argument("--interval-seconds", type=float, default=None)
+    parser.add_argument("--duration-seconds", type=float, default=None)
+    parser.add_argument("--request-timeout-seconds", type=float, default=None)
+    parser.add_argument("--max-retries", type=int, default=None)
+    parser.add_argument("--backoff-seconds", type=float, default=None)
+    parser.add_argument("--market-end-safety-seconds", type=int, default=None)
+    parser.add_argument("--clob-books-url", type=str, default=None)
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    command = " ".join([Path(sys.argv[0]).as_posix(), *sys.argv[1:]])
+    runtime = build_runtime_config(args)
+    manifest, output_file = run_collection(runtime, command)
+    write_manifest(runtime["manifest_path"], manifest)
+    print(
+        json.dumps(
+            {
+                "gate_status": manifest["gate_status"],
+                "manifest_path": runtime["manifest_path"].as_posix(),
+                "output_file": output_file.as_posix(),
+                "markets_tracked": manifest["markets_tracked"],
+                "tokens_tracked": len(manifest["tokens_tracked"]),
+                "request_count": manifest["request_count"],
+                "success_count": manifest["success_count"],
+                "failure_count": manifest["failure_count"],
+                "rows_written": manifest["rows_written"],
+                "warnings": manifest["warnings"],
+            },
+            indent=2,
+            sort_keys=True,
+        )
+    )
+    return 0 if manifest["gate_status"] == "PASS" else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/deploy/bootstrap_orderbooks_k8s.sh b/scripts/deploy/bootstrap_orderbooks_k8s.sh
new file mode 100755
index 0000000..a807022
--- /dev/null
+++ b/scripts/deploy/bootstrap_orderbooks_k8s.sh
@@ -0,0 +1,146 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
+PLATFORM_REPO_DIR="${PLATFORM_REPO_DIR:-/home/philipp/dev/ae/nuri/unrip3}"
+PLATFORM_ENV_FILE="${PLATFORM_ENV_FILE:-$PLATFORM_REPO_DIR/scripts/hetzner/bootstrap-secrets.env}"
+PLATFORM_RESOLVED_ENV_FILE="${PLATFORM_RESOLVED_ENV_FILE:-$PLATFORM_REPO_DIR/.state/hetzner/bootstrap-secrets.resolved.env}"
+KUBECONFIG_PATH="${KUBECONFIG_PATH:-$PLATFORM_REPO_DIR/.state/hetzner/kubeconfig.yaml}"
+CI_KUBECONFIG_PATH="${CI_KUBECONFIG_PATH:-$PLATFORM_REPO_DIR/.state/hetzner/kubeconfig.incluster.yaml}"
+
+PROJECT_NAME="${PROJECT_NAME:-orderbooks}"
+PROJECT_NAMESPACE="${PROJECT_NAMESPACE:-orderbooks}"
+PROJECT_DEPLOYMENTS="${PROJECT_DEPLOYMENTS:-orderbooks-collector}"
+PROJECT_REGISTRY_SECRET_NAME="${PROJECT_REGISTRY_SECRET_NAME:-orderbooks-registry-creds}"
+RCLONE_SECRET_NAME="${RCLONE_SECRET_NAME:-orderbooks-rclone-config}"
+RCLONE_SECRET_KEY="${RCLONE_SECRET_KEY:-rclone.conf}"
+FORGEJO_REPO_OWNER="${FORGEJO_REPO_OWNER:-philipp}"
+FORGEJO_REPO_NAME="${FORGEJO_REPO_NAME:-orderbooks}"
+FORGEJO_REPO_PRIVATE="${FORGEJO_REPO_PRIVATE:-0}"
+
+require() {
+  command -v "$1" >/dev/null 2>&1 || {
+    echo "missing required command: $1" >&2
+    exit 1
+  }
+}
+
+load_env_defaults() {
+  local file="$1"
+  [[ -f "$file" ]] || return 0
+  eval "$(
+    python3 - "$file" <<'PY_LOAD_ENV'
+import os
+import shlex
+import sys
+
+for raw in open(sys.argv[1], 'r', encoding='utf-8'):
+    line = raw.strip()
+    if not line or line.startswith('#'):
+        continue
+    if line.startswith('export '):
+        line = line[len('export '):]
+    if '=' not in line:
+        continue
+    key, value = line.split('=', 1)
+    key = key.strip()
+    value = value.strip()
+    if len(value) >= 2 and value[0] == value[-1] and value[0] in {'\"', "'"}:
+        value = value[1:-1]
+    if key in os.environ:
+        continue
+    print(f'export {key}={shlex.quote(value)}')
+PY_LOAD_ENV
+  )"
+}
+
+require kubectl
+require python3
+require base64
+
+load_env_defaults "$PLATFORM_ENV_FILE"
+load_env_defaults "$PLATFORM_RESOLVED_ENV_FILE"
+
+# Force orderbooks app identity after loading platform defaults. The platform
+# env file may describe the platform repo itself, not this app repo.
+PROJECT_NAME="${ORDERBOOKS_PROJECT_NAME:-orderbooks}"
+PROJECT_NAMESPACE="${ORDERBOOKS_PROJECT_NAMESPACE:-orderbooks}"
+PROJECT_DEPLOYMENTS="${ORDERBOOKS_PROJECT_DEPLOYMENTS:-orderbooks-collector}"
+PROJECT_REGISTRY_SECRET_NAME="${ORDERBOOKS_PROJECT_REGISTRY_SECRET_NAME:-orderbooks-registry-creds}"
+RCLONE_SECRET_NAME="${ORDERBOOKS_RCLONE_SECRET_NAME:-orderbooks-rclone-config}"
+RCLONE_SECRET_KEY="${ORDERBOOKS_RCLONE_SECRET_KEY:-rclone.conf}"
+FORGEJO_REPO_OWNER="${ORDERBOOKS_FORGEJO_REPO_OWNER:-philipp}"
+FORGEJO_REPO_NAME="${ORDERBOOKS_FORGEJO_REPO_NAME:-orderbooks}"
+FORGEJO_REPO_PRIVATE="${ORDERBOOKS_FORGEJO_REPO_PRIVATE:-0}"
+
+: "${KUBECONFIG_PATH:?missing kubeconfig path}"
+: "${CI_KUBECONFIG_PATH:?missing CI kubeconfig path}"
+[[ -f "$KUBECONFIG_PATH" ]] || { echo "missing kubeconfig file" >&2; exit 1; }
+[[ -f "$CI_KUBECONFIG_PATH" ]] || { echo "missing in-cluster kubeconfig file" >&2; exit 1; }
+export KUBECONFIG="$KUBECONFIG_PATH"
+
+if [[ -z "${FORGEJO_URL:-}" ]]; then
+  if [[ -n "${FORGEJO_ROOT_URL:-}" ]]; then
+    FORGEJO_URL="$FORGEJO_ROOT_URL"
+  elif [[ -n "${FORGEJO_DOMAIN:-}" ]]; then
+    FORGEJO_URL="https://${FORGEJO_DOMAIN}"
+  else
+    echo "missing Forgejo URL" >&2
+    exit 1
+  fi
+fi
+
+: "${FORGEJO_ADMIN_USERNAME:?missing Forgejo admin username}"
+if [[ -z "${FORGEJO_TOKEN:-}" ]]; then
+  : "${FORGEJO_ADMIN_PASSWORD:?missing Forgejo password or token}"
+fi
+
+if [[ -z "${REGISTRY_HOST:-}" ]]; then
+  if [[ -n "${REGISTRY_DOMAIN:-}" ]]; then
+    REGISTRY_HOST="$REGISTRY_DOMAIN"
+  else
+    echo "missing registry host" >&2
+    exit 1
+  fi
+fi
+: "${REGISTRY_USERNAME:?missing registry username}"
+: "${REGISTRY_PASSWORD:?missing registry password}"
+
+echo "ensuring namespace ${PROJECT_NAMESPACE}"
+kubectl create namespace "$PROJECT_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
+
+echo "upserting registry secret ${PROJECT_REGISTRY_SECRET_NAME}"
+kubectl -n "$PROJECT_NAMESPACE" create secret docker-registry "$PROJECT_REGISTRY_SECRET_NAME" \
+  --docker-server="$REGISTRY_HOST" \
+  --docker-username="$REGISTRY_USERNAME" \
+  --docker-password="$REGISTRY_PASSWORD" \
+  --dry-run=client -o yaml | kubectl apply -f -
+
+echo "checking rclone secret key presence"
+kubectl -n "$PROJECT_NAMESPACE" get secret "$RCLONE_SECRET_NAME" \
+  -o "go-template={{if index .data \"${RCLONE_SECRET_KEY}\"}}rclone_secret_key_present{{else}}rclone_secret_key_missing{{end}}{{\"\\n\"}}"
+
+echo "upserting Forgejo repo and Actions settings"
+forgejo_args=()
+if [[ -n "${FORGEJO_TOKEN:-}" ]]; then
+  forgejo_args+=(--token "$FORGEJO_TOKEN")
+else
+  forgejo_args+=(--admin-username "$FORGEJO_ADMIN_USERNAME" --admin-password "$FORGEJO_ADMIN_PASSWORD")
+fi
+if [[ "$FORGEJO_REPO_PRIVATE" == "1" || "$FORGEJO_REPO_PRIVATE" == "true" ]]; then
+  forgejo_args+=(--repo-private)
+fi
+
+python3 "$ROOT_DIR/scripts/deploy/forgejo_repo_bootstrap.py" \
+  --forgejo-url "$FORGEJO_URL" \
+  --repo-owner "$FORGEJO_REPO_OWNER" \
+  --repo-name "$FORGEJO_REPO_NAME" \
+  --ci-kubeconfig "$CI_KUBECONFIG_PATH" \
+  --registry-host "$REGISTRY_HOST" \
+  --project-name "$PROJECT_NAME" \
+  --project-namespace "$PROJECT_NAMESPACE" \
+  --project-deployments "$PROJECT_DEPLOYMENTS" \
+  --project-registry-secret-name "$PROJECT_REGISTRY_SECRET_NAME" \
+  "${forgejo_args[@]}"
+
+echo "bootstrap complete for ${FORGEJO_REPO_OWNER}/${FORGEJO_REPO_NAME} in namespace ${PROJECT_NAMESPACE}"
diff --git a/scripts/deploy/forgejo_repo_bootstrap.py b/scripts/deploy/forgejo_repo_bootstrap.py
new file mode 100755
index 0000000..7c67c9b
--- /dev/null
+++ b/scripts/deploy/forgejo_repo_bootstrap.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+"""Orderbooks-specific Forgejo repo bootstrap.
+
+Creates/updates the Forgejo repository plus Actions settings for the Kubernetes
+orderbooks deployment. This script deliberately does not print secret values.
+"""
+
+from __future__ import annotations
+
+import argparse
+import base64
+import json
+import ssl
+import urllib.error
+import urllib.parse
+import urllib.request
+from pathlib import Path
+
+
+class ForgejoClient:
+    def __init__(self, base_url: str, username: str | None = None, password: str | None = None, token: str | None = None):
+        self.base_url = base_url.rstrip('/')
+        self.username = username or ''
+        self.headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
+        if token:
+            self.headers['Authorization'] = f'token {token}'
+        elif username is not None and password is not None:
+            credentials = base64.b64encode(f'{username}:{password}'.encode()).decode()
+            self.headers['Authorization'] = f'Basic {credentials}'
+        else:
+            raise ValueError('ForgejoClient requires either token auth or username/password auth')
+        self.ssl_context = ssl.create_default_context()
+
+    def request(self, method: str, path: str, payload=None, expected=(200, 201, 204)):
+        data = json.dumps(payload).encode() if payload is not None else None
+        req = urllib.request.Request(f'{self.base_url}{path}', data=data, method=method)
+        for key, value in self.headers.items():
+            req.add_header(key, value)
+        try:
+            with urllib.request.urlopen(req, context=self.ssl_context) as response:
+                body = response.read().decode() if response.length != 0 else ''
+                if response.status not in expected:
+                    raise RuntimeError(f'{method} {path} returned {response.status}: {body[:500]}')
+                return json.loads(body) if body else None
+        except urllib.error.HTTPError as exc:
+            body = exc.read().decode()
+            if exc.code not in expected:
+                raise RuntimeError(f'{method} {path} returned {exc.code}: {body[:500]}') from exc
+            return json.loads(body) if body else None
+
+    def get_repo(self, owner: str, repo: str):
+        try:
+            return self.request('GET', f'/api/v1/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(repo)}')
+        except RuntimeError as exc:
+            if ' returned 404:' in str(exc):
+                return None
+            raise
+
+    def create_repo(self, owner: str, name: str, private: bool):
+        payload = {'name': name, 'private': private, 'auto_init': False, 'default_branch': 'main'}
+        if owner == self.username:
+            return self.request('POST', '/api/v1/user/repos', payload, expected=(201,))
+        return self.request('POST', f'/api/v1/orgs/{urllib.parse.quote(owner)}/repos', payload, expected=(201,))
+
+    def upsert_variable(self, owner: str, repo: str, name: str, value: str):
+        path = f'/api/v1/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(repo)}/actions/variables/{urllib.parse.quote(name)}'
+        try:
+            self.request('POST', path, {'value': value}, expected=(201, 204))
+        except RuntimeError as exc:
+            if ' returned 409:' not in str(exc) and ' returned 422:' not in str(exc):
+                raise
+            self.request('PUT', path, {'value': value}, expected=(201, 204))
+
+    def upsert_secret(self, owner: str, repo: str, name: str, value: str):
+        path = f'/api/v1/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(repo)}/actions/secrets/{urllib.parse.quote(name)}'
+        self.request('PUT', path, {'data': value}, expected=(201, 204))
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description='Bootstrap Forgejo Actions settings for orderbooks')
+    parser.add_argument('--forgejo-url', required=True)
+    parser.add_argument('--admin-username')
+    parser.add_argument('--admin-password')
+    parser.add_argument('--token')
+    parser.add_argument('--repo-owner', required=True)
+    parser.add_argument('--repo-name', required=True)
+    parser.add_argument('--repo-private', action='store_true')
+    parser.add_argument('--ci-kubeconfig', required=True)
+    parser.add_argument('--registry-host', required=True)
+    parser.add_argument('--project-name', required=True)
+    parser.add_argument('--project-namespace', required=True)
+    parser.add_argument('--project-deployments', required=True)
+    parser.add_argument('--project-registry-secret-name', required=True)
+    args = parser.parse_args()
+
+    client = ForgejoClient(args.forgejo_url, args.admin_username, args.admin_password, args.token)
+    repo = client.get_repo(args.repo_owner, args.repo_name)
+    if repo is None:
+        created = client.create_repo(args.repo_owner, args.repo_name, args.repo_private)
+        print(f'created repo {created["full_name"]}')
+    else:
+        print(f'repo already exists: {repo["full_name"]}')
+
+    kubeconfig_b64 = base64.b64encode(Path(args.ci_kubeconfig).read_bytes()).decode()
+    client.upsert_secret(args.repo_owner, args.repo_name, 'KUBECONFIG_B64', kubeconfig_b64)
+    print('upserted repo action secret KUBECONFIG_B64')
+
+    variables = {
+        'REGISTRY_HOST': args.registry_host,
+        'PROJECT_NAME': args.project_name,
+        'PROJECT_NAMESPACE': args.project_namespace,
+        'PROJECT_DEPLOYMENTS': args.project_deployments,
+        'PROJECT_REGISTRY_SECRET_NAME': args.project_registry_secret_name,
+    }
+    for name, value in variables.items():
+        client.upsert_variable(args.repo_owner, args.repo_name, name, value)
+    print('upserted repo action variables')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/discover_polymarket_btc_markets.py b/scripts/discover_polymarket_btc_markets.py
new file mode 100755
index 0000000..7d3afa5
--- /dev/null
+++ b/scripts/discover_polymarket_btc_markets.py
@@ -0,0 +1,752 @@
+#!/usr/bin/env python3
+"""Discover active Polymarket BTC up/down markets.
+
+Checkpoint 3 scope: fetch bounded public Gamma metadata, preserve raw responses,
+and write normalized market records with outcome-token mappings. This is not an
+order-book collector.
+"""
+
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import hashlib
+import json
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+
+GAMMA_EVENTS_URL = "https://gamma-api.polymarket.com/events"
+BTC_TAG_ID = 235
+
+DEFAULT_OUTPUT_JSON = Path("data/discovery/polymarket_btc_markets_latest.json")
+DEFAULT_MANIFEST = Path("data/discovery/polymarket_btc_markets_manifest.json")
+DEFAULT_MARKDOWN = Path("data/discovery/polymarket_btc_markets.md")
+
+SAFE_RESPONSE_HEADERS = {
+    "age",
+    "cache-control",
+    "cf-cache-status",
+    "cf-ray",
+    "content-encoding",
+    "content-length",
+    "content-type",
+    "date",
+    "expires",
+    "last-modified",
+    "ratelimit-limit",
+    "ratelimit-remaining",
+    "ratelimit-reset",
+    "retry-after",
+    "server",
+    "strict-transport-security",
+    "x-ratelimit-limit",
+    "x-ratelimit-remaining",
+    "x-ratelimit-reset",
+}
+
+FILTER_RULES = [
+    "Use public Gamma /events with tag_id=235, related_tags=true, active=true, closed=false.",
+    "Require event.active=true and event.closed=false.",
+    "Require market.active=true and market.closed=false.",
+    "Require market.enableOrderBook=true.",
+    "Require market.acceptingOrders=true unless --allow-non-accepting-orders is used.",
+    "Require market end time to be after the fetch time unless --allow-expired is used.",
+    "Require outcomes to resolve to exactly Up and Down.",
+    "Require clobTokenIds to resolve to exactly two token IDs.",
+    "Require BTC/up-down evidence from seriesSlug, title/slug text, or tags.",
+]
+
+
+def utc_now() -> dt.datetime:
+    return dt.datetime.now(dt.UTC)
+
+
+def iso_z(value: dt.datetime | None = None) -> str:
+    value = value or utc_now()
+    return value.astimezone(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def parse_iso(value: Any) -> dt.datetime | None:
+    if not isinstance(value, str) or not value.strip():
+        return None
+    text = value.strip()
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        parsed = dt.datetime.fromisoformat(text)
+    except ValueError:
+        return None
+    if parsed.tzinfo is None:
+        parsed = parsed.replace(tzinfo=dt.UTC)
+    return parsed.astimezone(dt.UTC)
+
+
+def sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def filter_headers(headers: Any) -> dict[str, str]:
+    safe: dict[str, str] = {}
+    for key, value in dict(headers).items():
+        if key.lower() in SAFE_RESPONSE_HEADERS:
+            safe[key] = value
+    return safe
+
+
+def normalize_params(params: dict[str, Any]) -> dict[str, Any]:
+    normalized: dict[str, Any] = {}
+    for key, value in params.items():
+        if isinstance(value, bool):
+            normalized[key] = "true" if value else "false"
+        else:
+            normalized[key] = value
+    return normalized
+
+
+def build_url(url: str, params: dict[str, Any]) -> str:
+    query = urllib.parse.urlencode(normalize_params(params), doseq=True)
+    return f"{url}?{query}"
+
+
+def fetch_json_page(
+    *,
+    name: str,
+    url: str,
+    params: dict[str, Any],
+    timeout_seconds: float,
+) -> dict[str, Any]:
+    started_monotonic = time.monotonic()
+    started_at_utc = iso_z()
+    full_url = build_url(url, params)
+    request = urllib.request.Request(
+        full_url,
+        headers={
+            "Accept": "application/json",
+            "User-Agent": "orderbooks-checkpoint-3-discovery/1.0",
+        },
+        method="GET",
+    )
+    status_code: int | None = None
+    response_headers: dict[str, str] = {}
+    response_text = ""
+    error: str | None = None
+    try:
+        with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
+            status_code = response.status
+            response_headers = filter_headers(response.headers)
+            response_text = response.read().decode("utf-8", errors="replace")
+    except urllib.error.HTTPError as exc:
+        status_code = exc.code
+        response_headers = filter_headers(exc.headers)
+        response_text = exc.read().decode("utf-8", errors="replace")
+        error = f"HTTPError: {exc}"
+    except Exception as exc:  # noqa: BLE001 - preserve probe failure evidence
+        error = f"{type(exc).__name__}: {exc}"
+
+    response_json: Any | None = None
+    json_error: str | None = None
+    if response_text:
+        try:
+            response_json = json.loads(response_text)
+        except json.JSONDecodeError as exc:
+            json_error = str(exc)
+
+    return {
+        "name": name,
+        "started_at_utc": started_at_utc,
+        "ended_at_utc": iso_z(),
+        "duration_ms": round((time.monotonic() - started_monotonic) * 1000, 3),
+        "request": {
+            "method": "GET",
+            "url": url,
+            "full_url": full_url,
+            "params": normalize_params(params),
+        },
+        "response": {
+            "status_code": status_code,
+            "headers": response_headers,
+            "json": response_json,
+            "json_error": json_error,
+            "text_preview": response_text[:1000] if response_json is None else None,
+        },
+        "ok": error is None and status_code is not None and 200 <= status_code < 300,
+        "error": error,
+    }
+
+
+def coerce_json_array(value: Any) -> list[Any]:
+    if isinstance(value, list):
+        return value
+    if isinstance(value, str):
+        try:
+            parsed = json.loads(value)
+        except json.JSONDecodeError:
+            return []
+        return parsed if isinstance(parsed, list) else []
+    return []
+
+
+def lower_text(value: Any) -> str:
+    return str(value or "").lower()
+
+
+def event_tag_text(event: dict[str, Any]) -> str:
+    parts: list[str] = []
+    for tag in event.get("tags") or []:
+        if isinstance(tag, dict):
+            parts.append(str(tag.get("slug") or ""))
+            parts.append(str(tag.get("label") or ""))
+    return " ".join(parts).lower()
+
+
+def has_btc_up_down_evidence(event: dict[str, Any], market: dict[str, Any]) -> bool:
+    series_slug = lower_text(event.get("seriesSlug"))
+    text = " ".join(
+        lower_text(event.get(key))
+        for key in ("title", "slug", "ticker", "description")
+    )
+    text += " " + " ".join(
+        lower_text(market.get(key))
+        for key in ("question", "slug", "description")
+    )
+    tags = event_tag_text(event)
+    series_match = series_slug.startswith("btc-up-or-down")
+    text_match = ("bitcoin" in text or "btc" in text) and "up" in text and "down" in text
+    tag_match = ("bitcoin" in tags or "btc" in tags) and "up-or-down" in tags
+    return bool(series_match or text_match or tag_match)
+
+
+def is_up_down_outcomes(outcomes: list[str]) -> bool:
+    return len(outcomes) == 2 and {item.lower() for item in outcomes} == {"up", "down"}
+
+
+def normalize_market(
+    *,
+    event: dict[str, Any],
+    market: dict[str, Any],
+    page_index: int,
+    event_index: int,
+    market_index: int,
+    fetched_at_utc: str,
+    output_json_path: Path,
+) -> dict[str, Any]:
+    outcomes = [str(item) for item in coerce_json_array(market.get("outcomes"))]
+    token_ids = [str(item) for item in coerce_json_array(market.get("clobTokenIds"))]
+    tokens = [
+        {
+            "outcome": outcomes[index],
+            "token_id": token_ids[index],
+            "outcome_index": index,
+        }
+        for index in range(min(len(outcomes), len(token_ids)))
+    ]
+    start_time = (
+        market.get("startDate")
+        or market.get("startDateIso")
+        or event.get("startDate")
+        or event.get("creationDate")
+    )
+    end_time = market.get("endDate") or market.get("endDateIso") or event.get("endDate")
+    event_slug = event.get("slug")
+    market_slug = market.get("slug") or event_slug
+    return {
+        "market_name": "polymarket",
+        "market_slug": market_slug,
+        "event_slug": event_slug,
+        "title": event.get("title") or market.get("question"),
+        "question": market.get("question") or event.get("title"),
+        "condition_id": market.get("conditionId"),
+        "tokens": tokens,
+        "outcomes": outcomes,
+        "start_time_utc": iso_z(parse_iso(start_time)) if parse_iso(start_time) else start_time,
+        "end_time_utc": iso_z(parse_iso(end_time)) if parse_iso(end_time) else end_time,
+        "active": market.get("active"),
+        "closed": market.get("closed"),
+        "event_active": event.get("active"),
+        "event_closed": event.get("closed"),
+        "accepting_orders": market.get("acceptingOrders"),
+        "enable_order_book": market.get("enableOrderBook"),
+        "endpoint_source": {
+            "name": "gamma_events_bitcoin_tag",
+            "method": "GET",
+            "url": GAMMA_EVENTS_URL,
+            "params_basis": {
+                "tag_id": BTC_TAG_ID,
+                "related_tags": "true",
+                "active": "true",
+                "closed": "false",
+                "order": "endDate",
+                "ascending": "true",
+            },
+        },
+        "fetched_at_utc": fetched_at_utc,
+        "raw_ref": {
+            "artifact_path": output_json_path.as_posix(),
+            "section": "raw.gamma_events_pages",
+            "page_index": page_index,
+            "event_index": event_index,
+            "market_index": market_index,
+            "json_path": f"raw.gamma_events_pages[{page_index}].response.json[{event_index}].markets[{market_index}]",
+        },
+    }
+
+
+def rejection_reasons(
+    *,
+    event: dict[str, Any],
+    market: dict[str, Any],
+    fetched_at: dt.datetime,
+    require_accepting_orders: bool,
+    require_future_end: bool,
+) -> list[str]:
+    reasons: list[str] = []
+    outcomes = [str(item) for item in coerce_json_array(market.get("outcomes"))]
+    token_ids = [str(item) for item in coerce_json_array(market.get("clobTokenIds"))]
+    end_time = parse_iso(market.get("endDate") or event.get("endDate"))
+
+    if event.get("active") is not True:
+        reasons.append("event_not_active")
+    if event.get("closed") is not False:
+        reasons.append("event_closed")
+    if market.get("active") is not True:
+        reasons.append("market_not_active")
+    if market.get("closed") is not False:
+        reasons.append("market_closed")
+    if market.get("enableOrderBook") is not True:
+        reasons.append("order_book_not_enabled")
+    if require_accepting_orders and market.get("acceptingOrders") is not True:
+        reasons.append("not_accepting_orders")
+    if require_future_end and (end_time is None or end_time <= fetched_at):
+        reasons.append("not_future_end")
+    if not is_up_down_outcomes(outcomes):
+        reasons.append("not_up_down_outcomes")
+    if len(token_ids) != 2:
+        reasons.append("missing_two_clob_token_ids")
+    if not has_btc_up_down_evidence(event, market):
+        reasons.append("missing_btc_up_down_evidence")
+    return reasons
+
+
+def discover(args: argparse.Namespace) -> dict[str, Any]:
+    started_at_utc = iso_z()
+    fetched_at = utc_now()
+    fetched_at_utc = iso_z(fetched_at)
+    raw_pages: list[dict[str, Any]] = []
+    normalized: list[dict[str, Any]] = []
+    rejected_counts: dict[str, int] = {}
+    warnings: list[str] = []
+    seen_conditions: set[str] = set()
+
+    for page_index in range(args.max_pages):
+        offset = page_index * args.limit
+        params = {
+            "tag_id": BTC_TAG_ID,
+            "related_tags": True,
+            "active": True,
+            "closed": False,
+            "limit": args.limit,
+            "offset": offset,
+            "order": "endDate",
+            "ascending": True,
+        }
+        page = fetch_json_page(
+            name=f"gamma_events_bitcoin_tag_page_{page_index}",
+            url=GAMMA_EVENTS_URL,
+            params=params,
+            timeout_seconds=args.timeout,
+        )
+        raw_pages.append(page)
+        payload = page["response"]["json"]
+        if not page["ok"]:
+            warnings.append(
+                f"Page {page_index} request failed with status {page['response']['status_code']}: {page['error']}"
+            )
+            break
+        if not isinstance(payload, list):
+            warnings.append(f"Page {page_index} response was not a JSON list.")
+            break
+
+        for event_index, event in enumerate(payload):
+            if not isinstance(event, dict):
+                rejected_counts["event_not_object"] = rejected_counts.get("event_not_object", 0) + 1
+                continue
+            markets = event.get("markets") or []
+            if not isinstance(markets, list) or not markets:
+                rejected_counts["missing_markets"] = rejected_counts.get("missing_markets", 0) + 1
+                continue
+            for market_index, market in enumerate(markets):
+                if not isinstance(market, dict):
+                    rejected_counts["market_not_object"] = rejected_counts.get("market_not_object", 0) + 1
+                    continue
+                reasons = rejection_reasons(
+                    event=event,
+                    market=market,
+                    fetched_at=fetched_at,
+                    require_accepting_orders=not args.allow_non_accepting_orders,
+                    require_future_end=not args.allow_expired,
+                )
+                if reasons:
+                    for reason in reasons:
+                        rejected_counts[reason] = rejected_counts.get(reason, 0) + 1
+                    continue
+                condition_id = str(market.get("conditionId") or "")
+                if condition_id in seen_conditions:
+                    rejected_counts["duplicate_condition_id"] = rejected_counts.get(
+                        "duplicate_condition_id", 0
+                    ) + 1
+                    continue
+                seen_conditions.add(condition_id)
+                normalized.append(
+                    normalize_market(
+                        event=event,
+                        market=market,
+                        page_index=page_index,
+                        event_index=event_index,
+                        market_index=market_index,
+                        fetched_at_utc=fetched_at_utc,
+                        output_json_path=args.output_json,
+                    )
+                )
+
+        if len(payload) < args.limit:
+            break
+
+    normalized.sort(key=lambda item: (item.get("end_time_utc") or "", item.get("market_slug") or ""))
+    if raw_pages:
+        last_payload = raw_pages[-1]["response"].get("json")
+        if isinstance(last_payload, list) and len(last_payload) == args.limit and len(raw_pages) >= args.max_pages:
+            warnings.append(
+                "Discovery stopped at max_pages before exhausting Gamma pagination; output is bounded to the fetched pages."
+            )
+    if len(normalized) < args.min_markets:
+        warnings.append(
+            f"Only {len(normalized)} markets passed filters; min_markets={args.min_markets}."
+        )
+
+    status = "PASS" if len(normalized) >= args.min_markets else "FAIL"
+    status_reason = (
+        f"Discovered {len(normalized)} active BTC up/down markets with condition IDs and two token IDs."
+        if status == "PASS"
+        else "Did not discover enough active BTC up/down markets with condition IDs and two token IDs."
+    )
+    return {
+        "schema_name": "polymarket_btc_market_discovery",
+        "schema_version": 1,
+        "artifact_status": "valid" if status == "PASS" else "partial",
+        "checkpoint_id": 3,
+        "checkpoint_name": "Minimal BTC Market Discovery",
+        "started_at_utc": started_at_utc,
+        "ended_at_utc": iso_z(),
+        "fetched_at_utc": fetched_at_utc,
+        "scope": "Bounded public Gamma metadata discovery only; no order-book collector.",
+        "endpoint_basis": {
+            "source_checkpoint": "Checkpoint 2",
+            "source_report": "reports/checkpoints/checkpoint_002_polymarket_public_sources.md",
+            "endpoint": GAMMA_EVENTS_URL,
+            "method": "GET",
+            "base_params": {
+                "tag_id": BTC_TAG_ID,
+                "related_tags": True,
+                "active": True,
+                "closed": False,
+                "limit": args.limit,
+                "order": "endDate",
+                "ascending": True,
+            },
+        },
+        "filter_rules": FILTER_RULES,
+        "normalized_markets": normalized,
+        "raw": {
+            "gamma_events_pages": raw_pages,
+        },
+        "summary": {
+            "status": status,
+            "status_reason": status_reason,
+            "raw_pages_fetched": len(raw_pages),
+            "raw_events_fetched": sum(
+                len(page["response"].get("json") or [])
+                for page in raw_pages
+                if isinstance(page["response"].get("json"), list)
+            ),
+            "normalized_market_count": len(normalized),
+            "rejected_counts": dict(sorted(rejected_counts.items())),
+            "warnings": warnings,
+        },
+        "fake_progress_risk": "Discovery can appear successful while silently missing markets if filters rely on stale text assumptions or bounded pagination. Raw pages and rejection counts are preserved so missed-market risk can be audited.",
+        "next_step": "Checkpoint 4 should use this discovery output as input for a short, raw-first order-book snapshot sample; do not claim reliability until the later 24h soak test.",
+    }
+
+
+def markdown_table_row(values: list[Any]) -> str:
+    return "| " + " | ".join(str(value).replace("\n", " ") for value in values) + " |"
+
+
+def write_markdown(discovery: dict[str, Any], path: Path) -> None:
+    summary = discovery["summary"]
+    rows = discovery["normalized_markets"]
+    lines = [
+        "# Polymarket BTC Markets Discovery",
+        "",
+        f"Artifact status: `{discovery['artifact_status']}`",
+        "",
+        "## Gate",
+        "",
+        f"Status: `{summary['status']}`",
+        "",
+        summary["status_reason"],
+        "",
+        "## Scope",
+        "",
+        "Bounded public Gamma metadata discovery only. No order-book collection, no trading, no private endpoints, no secrets.",
+        "",
+        "## Endpoint",
+        "",
+        f"- `GET {GAMMA_EVENTS_URL}`",
+        "- Params: `tag_id=235`, `related_tags=true`, `active=true`, `closed=false`, `order=endDate`, `ascending=true`, bounded by `limit` and `max_pages`.",
+        "",
+        "## Summary",
+        "",
+        markdown_table_row(["Metric", "Value"]),
+        markdown_table_row(["---", "---"]),
+        markdown_table_row(["fetched_at_utc", discovery["fetched_at_utc"]]),
+        markdown_table_row(["raw_pages_fetched", summary["raw_pages_fetched"]]),
+        markdown_table_row(["raw_events_fetched", summary["raw_events_fetched"]]),
+        markdown_table_row(["normalized_market_count", summary["normalized_market_count"]]),
+        "",
+        "## Markets",
+        "",
+        markdown_table_row(
+            [
+                "market_slug",
+                "end_time_utc",
+                "condition_id",
+                "outcomes",
+                "token_ids",
+                "accepting_orders",
+            ]
+        ),
+        markdown_table_row(["---", "---", "---", "---", "---", "---"]),
+    ]
+    for row in rows:
+        token_ids = [token["token_id"] for token in row["tokens"]]
+        lines.append(
+            markdown_table_row(
+                [
+                    row.get("market_slug"),
+                    row.get("end_time_utc"),
+                    row.get("condition_id"),
+                    json.dumps(row.get("outcomes")),
+                    json.dumps(token_ids),
+                    row.get("accepting_orders"),
+                ]
+            )
+        )
+    lines.extend(
+        [
+            "",
+            "## Warnings",
+            "",
+        ]
+    )
+    if summary["warnings"]:
+        for warning in summary["warnings"]:
+            lines.append(f"- {warning}")
+    else:
+        lines.append("- None.")
+    lines.extend(
+        [
+            "",
+            "## Rejection Counts",
+            "",
+            "```json",
+            json.dumps(summary["rejected_counts"], indent=2, sort_keys=True),
+            "```",
+            "",
+            "## Raw Preservation",
+            "",
+            "The latest JSON artifact stores raw Gamma response envelopes under `raw.gamma_events_pages`. Each normalized record has a `raw_ref` pointing back to the source event market.",
+            "",
+            "## Strongest Fake-Progress Risk",
+            "",
+            discovery["fake_progress_risk"],
+            "",
+            "## Next Smallest Step",
+            "",
+            discovery["next_step"],
+            "",
+        ]
+    )
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text("\n".join(lines), encoding="utf-8")
+
+
+def write_manifest(
+    *,
+    discovery: dict[str, Any],
+    manifest_path: Path,
+    output_json: Path,
+    markdown_path: Path,
+    command: str,
+) -> None:
+    status = discovery["summary"]["status"]
+    output_files = [
+        {
+            "path": output_json.as_posix(),
+            "kind": "latest_discovery_json",
+            "status": "valid" if output_json.exists() and output_json.stat().st_size else "missing",
+            "sha256": sha256_file(output_json) if output_json.exists() else None,
+        },
+        {
+            "path": markdown_path.as_posix(),
+            "kind": "discovery_markdown",
+            "status": "valid" if markdown_path.exists() and markdown_path.stat().st_size else "missing",
+            "sha256": sha256_file(markdown_path) if markdown_path.exists() else None,
+        },
+    ]
+    script_path = Path("scripts/discover_polymarket_btc_markets.py")
+    if script_path.exists():
+        output_files.append(
+            {
+                "path": script_path.as_posix(),
+                "kind": "discovery_script",
+                "status": "valid",
+                "sha256": sha256_file(script_path),
+            }
+        )
+    status_codes: dict[str, int] = {}
+    for page in discovery["raw"]["gamma_events_pages"]:
+        code = str(page["response"].get("status_code"))
+        status_codes[code] = status_codes.get(code, 0) + 1
+
+    manifest = {
+        "schema_name": "polymarket_btc_markets_manifest",
+        "schema_version": 1,
+        "checkpoint_id": 3,
+        "checkpoint_name": "Minimal BTC Market Discovery",
+        "status": status,
+        "started_at_utc": discovery["started_at_utc"],
+        "ended_at_utc": discovery["ended_at_utc"],
+        "scope": discovery["scope"],
+        "command": command,
+        "endpoint": discovery["endpoint_basis"],
+        "request_counts": {
+            "gamma_events_pages": discovery["summary"]["raw_pages_fetched"],
+            "status_code_counts": dict(sorted(status_codes.items())),
+        },
+        "row_counts": {
+            "raw_events_fetched": discovery["summary"]["raw_events_fetched"],
+            "normalized_markets": discovery["summary"]["normalized_market_count"],
+        },
+        "market_ids": [
+            {
+                "market_slug": row.get("market_slug"),
+                "condition_id": row.get("condition_id"),
+                "token_ids": [token.get("token_id") for token in row.get("tokens", [])],
+            }
+            for row in discovery["normalized_markets"]
+        ],
+        "output_files": output_files,
+        "warnings": discovery["summary"]["warnings"],
+        "validation": {
+            "summary": discovery["summary"]["status_reason"],
+            "required_record_fields": [
+                "market_name",
+                "market_slug",
+                "question",
+                "condition_id",
+                "tokens",
+                "outcomes",
+                "start_time_utc",
+                "end_time_utc",
+                "active",
+                "closed",
+                "accepting_orders",
+                "enable_order_book",
+                "endpoint_source",
+                "fetched_at_utc",
+                "raw_ref",
+            ],
+        },
+        "fake_progress_risk": discovery["fake_progress_risk"],
+        "next_step": discovery["next_step"],
+    }
+    manifest_path.parent.mkdir(parents=True, exist_ok=True)
+    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+
+def write_outputs(args: argparse.Namespace, discovery: dict[str, Any]) -> None:
+    args.output_json.parent.mkdir(parents=True, exist_ok=True)
+    args.output_json.write_text(
+        json.dumps(discovery, indent=2, sort_keys=True) + "\n",
+        encoding="utf-8",
+    )
+    write_markdown(discovery, args.markdown)
+    command = " ".join([Path(sys.argv[0]).as_posix(), *sys.argv[1:]])
+    write_manifest(
+        discovery=discovery,
+        manifest_path=args.manifest,
+        output_json=args.output_json,
+        markdown_path=args.markdown,
+        command=command,
+    )
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Discover active BTC up/down Polymarket markets from public Gamma metadata."
+    )
+    parser.add_argument("--output-json", type=Path, default=DEFAULT_OUTPUT_JSON)
+    parser.add_argument("--manifest", type=Path, default=DEFAULT_MANIFEST)
+    parser.add_argument("--markdown", type=Path, default=DEFAULT_MARKDOWN)
+    parser.add_argument("--limit", type=int, default=100)
+    parser.add_argument("--max-pages", type=int, default=3)
+    parser.add_argument("--timeout", type=float, default=15.0)
+    parser.add_argument("--min-markets", type=int, default=1)
+    parser.add_argument("--allow-expired", action="store_true")
+    parser.add_argument("--allow-non-accepting-orders", action="store_true")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    discovery = discover(args)
+    write_outputs(args, discovery)
+    print(
+        json.dumps(
+            {
+                "status": discovery["summary"]["status"],
+                "status_reason": discovery["summary"]["status_reason"],
+                "output_json": args.output_json.as_posix(),
+                "manifest": args.manifest.as_posix(),
+                "markdown": args.markdown.as_posix(),
+                "normalized_market_count": discovery["summary"]["normalized_market_count"],
+                "markets": [
+                    {
+                        "market_slug": row.get("market_slug"),
+                        "condition_id": row.get("condition_id"),
+                        "token_ids": [token.get("token_id") for token in row.get("tokens", [])],
+                        "end_time_utc": row.get("end_time_utc"),
+                    }
+                    for row in discovery["normalized_markets"]
+                ],
+                "warnings": discovery["summary"]["warnings"],
+            },
+            indent=2,
+            sort_keys=True,
+        )
+    )
+    return 0 if discovery["summary"]["status"] == "PASS" else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/k8s_runtime_smoke_check.sh b/scripts/k8s_runtime_smoke_check.sh
new file mode 100755
index 0000000..9bff5d5
--- /dev/null
+++ b/scripts/k8s_runtime_smoke_check.sh
@@ -0,0 +1,466 @@
+#!/usr/bin/env bash
+set -uo pipefail
+
+NAMESPACE="${ORDERBOOKS_K8S_NAMESPACE:-orderbooks}"
+DEPLOYMENT="${ORDERBOOKS_K8S_COLLECTOR_DEPLOYMENT:-orderbooks-collector}"
+CRONJOB="${ORDERBOOKS_K8S_UPLOADER_CRONJOB:-orderbooks-uploader}"
+RAW_DIR="${ORDERBOOKS_K8S_RAW_DIR:-/var/lib/orderbooks/raw_orderbooks}"
+MANIFEST_DIR="${ORDERBOOKS_K8S_MANIFEST_DIR:-/var/lib/orderbooks/manifests}"
+WAIT_SECONDS="${ORDERBOOKS_K8S_SMOKE_WAIT_SECONDS:-1200}"
+UPLOAD_MIN_AGE_SECONDS="${ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS:-600}"
+KUBECTL_BIN="${ORDERBOOKS_KUBECTL:-kubectl}"
+RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)"
+EVIDENCE_PATH="${ORDERBOOKS_K8S_SMOKE_EVIDENCE_PATH:-data/manifests/k8s_runtime_smoke_${RUN_ID}.json}"
+
+usage() {
+  cat <<'EOF'
+Usage: scripts/k8s_runtime_smoke_check.sh [options]
+
+Run after the orderbooks Kubernetes workload is deployed. The script uses
+kubectl, writes local JSON evidence, deletes one collector pod to force a
+Deployment restart, verifies raw gzip JSONL files and manifests on the PVC,
+then triggers the uploader CronJob and requires a verified upload manifest.
+
+Options:
+  --namespace NAME       Namespace. Default: orderbooks.
+  --deployment NAME      Collector deployment. Default: orderbooks-collector.
+  --cronjob NAME         Uploader CronJob. Default: orderbooks-uploader.
+  --raw-dir PATH         Raw path inside collector pod. Default: /var/lib/orderbooks/raw_orderbooks.
+  --manifest-dir PATH    Manifest path inside collector pod. Default: /var/lib/orderbooks/manifests.
+  --wait-seconds N       Max wait for collector/upload evidence. Default: 1200.
+  --upload-min-age-seconds N
+                         Wait for at least one raw/manifest file to be this old before upload. Default: 600.
+  --evidence-path PATH   Local JSON evidence path.
+  --kubectl PATH         kubectl binary. Default: kubectl.
+  --help                 Show this help.
+
+This script does not read or print rclone config contents.
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --namespace) NAMESPACE="$2"; shift 2 ;;
+    --deployment) DEPLOYMENT="$2"; shift 2 ;;
+    --cronjob) CRONJOB="$2"; shift 2 ;;
+    --raw-dir) RAW_DIR="$2"; shift 2 ;;
+    --manifest-dir) MANIFEST_DIR="$2"; shift 2 ;;
+    --wait-seconds) WAIT_SECONDS="$2"; shift 2 ;;
+    --upload-min-age-seconds) UPLOAD_MIN_AGE_SECONDS="$2"; shift 2 ;;
+    --evidence-path) EVIDENCE_PATH="$2"; shift 2 ;;
+    --kubectl) KUBECTL_BIN="$2"; shift 2 ;;
+    --help) usage; exit 0 ;;
+    *) echo "Unknown argument: $1" >&2; usage >&2; exit 2 ;;
+  esac
+done
+
+mkdir -p "$(dirname "${EVIDENCE_PATH}")"
+
+PYTHONDONTWRITEBYTECODE=1 python3 - "$KUBECTL_BIN" "$NAMESPACE" "$DEPLOYMENT" "$CRONJOB" "$RAW_DIR" "$MANIFEST_DIR" "$WAIT_SECONDS" "$UPLOAD_MIN_AGE_SECONDS" "$EVIDENCE_PATH" <<'PY_SMOKE'
+import datetime as dt
+import json
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+kubectl = sys.argv[1]
+namespace = sys.argv[2]
+deployment = sys.argv[3]
+cronjob = sys.argv[4]
+raw_dir = sys.argv[5]
+manifest_dir = sys.argv[6]
+wait_seconds = int(sys.argv[7])
+upload_min_age_seconds = int(sys.argv[8])
+evidence_path = Path(sys.argv[9])
+started_at = dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace('+00:00', 'Z')
+checks = []
+failures = []
+
+def iso_now():
+    return dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace('+00:00', 'Z')
+
+
+def capture(command, input_text=None, timeout=None):
+    proc = subprocess.run(command, input=input_text, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
+    item = {
+        'command': command,
+        'exit_code': proc.returncode,
+        'stdout_tail': proc.stdout[-6000:],
+        'stderr_tail': proc.stderr[-6000:],
+        'ran_at_utc': iso_now(),
+    }
+    checks.append(item)
+    return proc, item
+
+
+def run(command, input_text=None, timeout=None):
+    _proc, item = capture(command, input_text=input_text, timeout=timeout)
+    return item
+
+
+def run_json(command, input_text=None, timeout=None):
+    proc, item = capture(command, input_text=input_text, timeout=timeout)
+    if item['exit_code'] != 0:
+        raise RuntimeError(f"command failed: {' '.join(command)}")
+    return json.loads(proc.stdout)
+
+
+def pod_ready(pod):
+    if pod.get('status', {}).get('phase') != 'Running':
+        return False
+    statuses = pod.get('status', {}).get('containerStatuses') or []
+    return bool(statuses) and all(status.get('ready') for status in statuses)
+
+
+def get_collector_pod():
+    selector = 'app.kubernetes.io/name=orderbooks,app.kubernetes.io/component=collector'
+    deadline = time.time() + wait_seconds
+    last = None
+    while time.time() <= deadline:
+        pods = run_json([kubectl, '-n', namespace, 'get', 'pods', '-l', selector, '-o', 'json'])
+        items = pods.get('items', [])
+        ready = [pod for pod in items if pod_ready(pod)]
+        if ready:
+            ready.sort(key=lambda pod: pod.get('metadata', {}).get('creationTimestamp', ''))
+            return ready[-1]['metadata']['name'], ready[-1]
+        last = items
+        time.sleep(10)
+    raise TimeoutError(f'no ready collector pod found; last pods={last}')
+
+
+def exec_python(pod, code, args):
+    command = [kubectl, '-n', namespace, 'exec', '-i', pod, '--', 'python3', '-', *args]
+    proc, item = capture(command, input_text=code, timeout=wait_seconds + 60)
+    if item['exit_code'] != 0:
+        raise RuntimeError(f"pod python command failed in {pod}: {item['stderr_tail']}")
+    return json.loads(proc.stdout)
+
+
+def wait_for_valid_collector(pod, after_mtime, label):
+    deadline = time.time() + wait_seconds
+    last_error = None
+    while time.time() <= deadline:
+        try:
+            result = exec_python(pod, collector_validation_code, [manifest_dir, raw_dir, str(after_mtime)])
+            if result.get('valid'):
+                result['wait_label'] = label
+                return result
+            last_error = result
+        except Exception as exc:
+            last_error = repr(exc)
+        time.sleep(15)
+    raise TimeoutError(f'no valid {label} collector manifest found before timeout: {last_error}')
+
+
+def wait_for_upload_eligible_files(pod):
+    deadline = time.time() + wait_seconds
+    last = None
+    while time.time() <= deadline:
+        result = exec_python(pod, upload_eligibility_code, [raw_dir, manifest_dir, str(upload_min_age_seconds)])
+        if result.get('eligible'):
+            return result
+        last = result
+        time.sleep(15)
+    raise TimeoutError(f'no upload-eligible raw/manifest files before timeout: {last}')
+
+collector_validation_code = r'''
+import gzip
+import hashlib
+import json
+import sys
+from pathlib import Path
+
+manifest_dir = Path(sys.argv[1])
+raw_dir = Path(sys.argv[2])
+after_mtime = float(sys.argv[3])
+
+def sha256(path):
+    digest = hashlib.sha256()
+    with path.open('rb') as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b''):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def parse_raw(path):
+    rows = 0
+    first_keys = []
+    with gzip.open(path, 'rt', encoding='utf-8') as handle:
+        for line in handle:
+            if not line.strip():
+                continue
+            obj = json.loads(line)
+            if rows == 0:
+                first_keys = sorted(obj.keys())
+            rows += 1
+    return rows, first_keys
+
+
+def validate(path):
+    manifest = json.loads(path.read_text(encoding='utf-8'))
+    output_files = []
+    for item in manifest.get('output_files', []):
+        raw_path = Path(item['path'])
+        rows, first_keys = parse_raw(raw_path)
+        actual_sha = sha256(raw_path)
+        output_files.append({
+            'path': str(raw_path),
+            'bytes': raw_path.stat().st_size,
+            'mtime': raw_path.stat().st_mtime,
+            'manifest_rows': item.get('rows'),
+            'rows_parsed': rows,
+            'row_count_matches_manifest': rows == item.get('rows'),
+            'manifest_sha256': item.get('sha256'),
+            'actual_sha256': actual_sha,
+            'sha256_matches_manifest': actual_sha == item.get('sha256'),
+            'under_raw_dir': raw_path.resolve().is_relative_to(raw_dir.resolve()),
+            'first_row_keys': first_keys,
+        })
+    valid = (
+        manifest.get('gate_status') == 'PASS'
+        and manifest.get('rows_written', 0) > 0
+        and manifest.get('failure_count') == 0
+        and not manifest.get('failures')
+        and bool(output_files)
+        and all(item['rows_parsed'] > 0 and item['row_count_matches_manifest'] and item['sha256_matches_manifest'] and item['under_raw_dir'] for item in output_files)
+    )
+    return {
+        'path': str(path),
+        'mtime': path.stat().st_mtime,
+        'manifest_summary': {
+            'gate_status': manifest.get('gate_status'),
+            'rows_written': manifest.get('rows_written'),
+            'failure_count': manifest.get('failure_count'),
+            'failures_present': bool(manifest.get('failures')),
+            'output_file_count': len(manifest.get('output_files', [])),
+            'started_at_utc': manifest.get('started_at_utc'),
+            'ended_at_utc': manifest.get('ended_at_utc'),
+        },
+        'output_files': output_files,
+        'valid': valid,
+    }
+
+candidates = sorted(manifest_dir.glob('polymarket_orderbook_collector_*.json'), key=lambda p: p.stat().st_mtime)
+candidates = [path for path in candidates if path.stat().st_mtime > after_mtime]
+latest = None
+for path in reversed(candidates):
+    try:
+        result = validate(path)
+    except Exception as exc:
+        latest = {'path': str(path), 'valid': False, 'error': repr(exc)}
+        continue
+    latest = result
+    if result['valid']:
+        print(json.dumps(result, sort_keys=True))
+        sys.exit(0)
+print(json.dumps(latest or {'valid': False, 'error': 'no collector manifest candidates'}, sort_keys=True))
+sys.exit(2)
+'''
+
+raw_check_code = r'''
+import gzip
+import hashlib
+import json
+import sys
+from pathlib import Path
+
+path = Path(sys.argv[1])
+expected_sha = sys.argv[2]
+expected_rows = int(sys.argv[3])
+
+def sha256(path):
+    digest = hashlib.sha256()
+    with path.open('rb') as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b''):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+rows = 0
+with gzip.open(path, 'rt', encoding='utf-8') as handle:
+    for line in handle:
+        if line.strip():
+            json.loads(line)
+            rows += 1
+actual_sha = sha256(path)
+print(json.dumps({
+    'path': str(path),
+    'expected_sha256': expected_sha,
+    'actual_sha256': actual_sha,
+    'sha256_matches': actual_sha == expected_sha,
+    'expected_rows': expected_rows,
+    'actual_rows': rows,
+    'row_count_matches': rows == expected_rows,
+}, sort_keys=True))
+'''
+
+upload_validation_code = r'''
+import json
+import sys
+from pathlib import Path
+
+manifest_dir = Path(sys.argv[1])
+after_mtime = float(sys.argv[2])
+candidates = sorted(manifest_dir.glob('upload_archive_*.json'), key=lambda p: p.stat().st_mtime)
+candidates = [path for path in candidates if path.stat().st_mtime >= after_mtime]
+if not candidates:
+    print(json.dumps({'valid': False, 'error': 'no upload manifest candidates'}, sort_keys=True))
+    sys.exit(2)
+path = candidates[-1]
+manifest = json.loads(path.read_text(encoding='utf-8'))
+verified_count = manifest.get('counts', {}).get('verified', len(manifest.get('verified_files', [])))
+valid = (
+    manifest.get('operation_status') == 'UPLOAD_VERIFIED'
+    and manifest.get('gate_status') == 'PASS'
+    and manifest.get('rclone', {}).get('copy_exit_code') == 0
+    and manifest.get('rclone', {}).get('check_exit_code') == 0
+    and verified_count > 0
+)
+verified_files = manifest.get('verified_files', [])
+print(json.dumps({
+    'path': str(path),
+    'mtime': path.stat().st_mtime,
+    'manifest_summary': {
+        'operation_status': manifest.get('operation_status'),
+        'gate_status': manifest.get('gate_status'),
+        'counts': manifest.get('counts', {}),
+        'planned_file_count': len(manifest.get('planned_files', [])),
+        'attempted_file_count': len(manifest.get('attempted_files', [])),
+        'uploaded_file_count': len(manifest.get('uploaded_files', [])),
+        'verified_file_count': verified_count,
+        'rclone_copy_exit_code': manifest.get('rclone', {}).get('copy_exit_code'),
+        'rclone_check_exit_code': manifest.get('rclone', {}).get('check_exit_code'),
+        'started_at_utc': manifest.get('started_at_utc'),
+        'ended_at_utc': manifest.get('ended_at_utc'),
+    },
+    'verified_count': verified_count,
+    'verified_file_samples': [
+        {
+            'relative_path': item.get('relative_path'),
+            'bytes': item.get('bytes'),
+            'sha256': item.get('sha256'),
+            'kind': item.get('kind'),
+        }
+        for item in verified_files[:5]
+    ],
+    'valid': valid,
+}, sort_keys=True))
+if not valid:
+    sys.exit(2)
+'''
+
+upload_eligibility_code = r'''
+import json
+import sys
+import time
+from pathlib import Path
+
+raw_dir = Path(sys.argv[1])
+manifest_dir = Path(sys.argv[2])
+min_age_seconds = int(sys.argv[3])
+now = time.time()
+
+def eligible_files(root, pattern):
+    if not root.exists():
+        return []
+    items = []
+    for path in sorted(root.rglob(pattern)):
+        if not path.is_file():
+            continue
+        age = max(0, int(now - path.stat().st_mtime))
+        if age >= min_age_seconds:
+            items.append({'path': str(path), 'bytes': path.stat().st_size, 'age_seconds': age})
+    return items
+
+raw_files = eligible_files(raw_dir, '*.jsonl.gz')
+manifest_files = eligible_files(manifest_dir, 'polymarket_orderbook_collector_*.json')
+print(json.dumps({
+    'eligible': bool(raw_files) and bool(manifest_files),
+    'min_age_seconds': min_age_seconds,
+    'raw_eligible_count': len(raw_files),
+    'manifest_eligible_count': len(manifest_files),
+    'raw_sample': raw_files[:3],
+    'manifest_sample': manifest_files[:3],
+}, sort_keys=True))
+'''
+
+summary = {
+    'schema_name': 'k8s_runtime_smoke_result',
+    'schema_version': 1,
+    'started_at_utc': started_at,
+    'ended_at_utc': None,
+    'gate_status': 'ERROR',
+    'production_ready': False,
+    'namespace': namespace,
+    'deployment': deployment,
+    'cronjob': cronjob,
+    'raw_dir': raw_dir,
+    'manifest_dir': manifest_dir,
+    'upload_min_age_seconds': upload_min_age_seconds,
+    'checks': checks,
+    'failures': failures,
+}
+
+try:
+    rollout = run([kubectl, '-n', namespace, 'rollout', 'status', f'deployment/{deployment}', f'--timeout={wait_seconds}s'])
+    if rollout['exit_code'] != 0:
+        raise RuntimeError('collector deployment rollout is not healthy')
+    pod_name, pod_obj = get_collector_pod()
+    before = wait_for_valid_collector(pod_name, 0, 'initial')
+    before_mtime = before['mtime']
+    old_file = before['output_files'][0]
+
+    delete_pod = run([kubectl, '-n', namespace, 'delete', 'pod', pod_name, '--wait=false'])
+    if delete_pod['exit_code'] != 0:
+        raise RuntimeError('failed to delete collector pod for restart test')
+    rollout_after = run([kubectl, '-n', namespace, 'rollout', 'status', f'deployment/{deployment}', f'--timeout={wait_seconds}s'])
+    if rollout_after['exit_code'] != 0:
+        raise RuntimeError('collector deployment did not recover after pod delete')
+    new_pod, new_pod_obj = get_collector_pod()
+    old_check = exec_python(new_pod, raw_check_code, [old_file['path'], old_file['actual_sha256'], str(old_file['rows_parsed'])])
+    if not old_check.get('sha256_matches') or not old_check.get('row_count_matches'):
+        raise RuntimeError('old raw file changed or stopped parsing after pod restart')
+
+    after = wait_for_valid_collector(new_pod, before_mtime, 'post_restart')
+    upload_eligibility = wait_for_upload_eligible_files(new_pod)
+
+    upload_start_mtime = time.time() - 2
+    job_name = 'orderbooks-uploader-smoke-' + dt.datetime.now(dt.UTC).strftime('%Y%m%dt%H%M%Sz').lower()
+    run([kubectl, '-n', namespace, 'delete', 'job', job_name, '--ignore-not-found=true'])
+    create_job = run([kubectl, '-n', namespace, 'create', 'job', job_name, f'--from=cronjob/{cronjob}'])
+    if create_job['exit_code'] != 0:
+        raise RuntimeError('failed to create uploader smoke job from CronJob')
+    wait_upload = run([kubectl, '-n', namespace, 'wait', '--for=condition=Complete', f'--timeout={wait_seconds}s', f'job/{job_name}'])
+    logs = run([kubectl, '-n', namespace, 'logs', f'job/{job_name}'])
+    if wait_upload['exit_code'] != 0:
+        raise RuntimeError('uploader smoke job did not complete')
+    upload = exec_python(new_pod, upload_validation_code, [manifest_dir, str(upload_start_mtime)])
+    if not upload.get('valid'):
+        raise RuntimeError('upload manifest did not verify at least one file')
+
+    summary.update({
+        'initial_collector_pod': pod_name,
+        'post_restart_collector_pod': new_pod,
+        'before_restart_collector': before,
+        'old_raw_file_after_restart': old_check,
+        'after_restart_collector': after,
+        'upload_eligibility': upload_eligibility,
+        'uploader_job': job_name,
+        'upload_result': upload,
+        'uploader_log_check_exit_code': logs['exit_code'],
+    })
+    summary['gate_status'] = 'PASS'
+except Exception as exc:
+    failures.append(str(exc))
+    summary['exception'] = repr(exc)
+    summary['gate_status'] = 'FAIL'
+finally:
+    summary['ended_at_utc'] = iso_now()
+    evidence_path.parent.mkdir(parents=True, exist_ok=True)
+    evidence_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + '\n', encoding='utf-8')
+
+print(f'K8S_SMOKE_EVIDENCE={evidence_path}')
+print(f'K8S_SMOKE_GATE={summary["gate_status"]}')
+if summary['gate_status'] != 'PASS':
+    sys.exit(1)
+PY_SMOKE
diff --git a/scripts/normalize_polymarket_orderbooks.py b/scripts/normalize_polymarket_orderbooks.py
new file mode 100644
index 0000000..5af88eb
--- /dev/null
+++ b/scripts/normalize_polymarket_orderbooks.py
@@ -0,0 +1,496 @@
+#!/usr/bin/env python3
+"""Normalize raw Polymarket order-book snapshots from the sample collector.
+
+Checkpoint 5 scope: derive a bounded normalized gzip JSONL sample from the raw
+Checkpoint 4 sample. Raw files remain the source of truth; every normalized row
+keeps the raw file path and gzip JSONL line number.
+"""
+
+from __future__ import annotations
+
+import argparse
+import datetime as dt
+import gzip
+import hashlib
+import json
+import sys
+from decimal import Decimal, InvalidOperation, getcontext
+from pathlib import Path
+from typing import Any
+
+
+NORMALIZER_NAME = "polymarket_orderbook_normalizer"
+NORMALIZER_VERSION = "0.1.0"
+SCHEMA_NAME = "normalized_orderbook_snapshot"
+SCHEMA_VERSION = 1
+
+DEFAULT_INPUT_MANIFEST = Path("data/manifests/orderbook_collector_sample_manifest.json")
+DEFAULT_OUTPUT_DIR = Path("data/normalized_sample")
+DEFAULT_MANIFEST_PATH = Path("data/manifests/orderbook_normalization_sample_manifest.json")
+
+CENT_OFFSETS = {
+    "1c": Decimal("0.01"),
+    "2c": Decimal("0.02"),
+    "5c": Decimal("0.05"),
+}
+
+SECRET_PATTERNS = (
+    "set-" "coo" "kie",
+    "__cf" "_bm",
+    "cf" "_bm",
+    "author" "ization",
+    "private" "_key",
+    "api" "_secret",
+    "poly" "_signature",
+    "poly" "_passphrase",
+    "poly" "_address",
+    "bear" "er",
+    "coo" "kie",
+    "wallet" " material",
+)
+
+
+getcontext().prec = 50
+
+
+def utc_now() -> dt.datetime:
+    return dt.datetime.now(dt.UTC)
+
+
+def iso_z(value: dt.datetime | None = None) -> str:
+    value = value or utc_now()
+    return value.astimezone(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def compact_timestamp(value: dt.datetime | None = None) -> str:
+    value = value or utc_now()
+    return value.astimezone(dt.UTC).strftime("%Y%m%dT%H%M%SZ")
+
+
+def sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def decimal_from_raw(value: Any, field_name: str) -> Decimal:
+    if not isinstance(value, str):
+        raise ValueError(f"{field_name} is not a string: {value!r}")
+    try:
+        parsed = Decimal(value)
+    except InvalidOperation as exc:
+        raise ValueError(f"{field_name} is not a decimal: {value!r}") from exc
+    if not parsed.is_finite():
+        raise ValueError(f"{field_name} is not finite: {value!r}")
+    return parsed
+
+
+def decimal_to_json(value: Decimal | None) -> str | None:
+    if value is None:
+        return None
+    if value == 0:
+        return "0"
+    return format(value.normalize(), "f")
+
+
+def load_json(path: Path) -> dict[str, Any]:
+    with path.open("r", encoding="utf-8") as handle:
+        data = json.load(handle)
+    if not isinstance(data, dict):
+        raise ValueError(f"{path} did not contain a JSON object")
+    return data
+
+
+def resolve_repo_path(path_text: str) -> Path:
+    path = Path(path_text)
+    if path.is_absolute():
+        return path
+    return Path.cwd() / path
+
+
+def normalize_side(levels: Any, side_name: str) -> list[tuple[Decimal, Decimal]]:
+    if not isinstance(levels, list):
+        raise ValueError(f"raw.{side_name} is not a list")
+    normalized: list[tuple[Decimal, Decimal]] = []
+    for index, level in enumerate(levels):
+        if not isinstance(level, dict):
+            raise ValueError(f"raw.{side_name}[{index}] is not an object")
+        price = decimal_from_raw(level.get("price"), f"raw.{side_name}[{index}].price")
+        size = decimal_from_raw(level.get("size"), f"raw.{side_name}[{index}].size")
+        if size < 0:
+            raise ValueError(f"raw.{side_name}[{index}].size is negative")
+        normalized.append((price, size))
+    return normalized
+
+
+def sum_sizes(levels: list[tuple[Decimal, Decimal]]) -> Decimal:
+    return sum((size for _, size in levels), Decimal("0"))
+
+
+def normalize_raw_row(raw_row: dict[str, Any], raw_file: str, raw_line_number: int) -> dict[str, Any]:
+    raw_book = raw_row.get("raw")
+    market = raw_row.get("market")
+    collection = raw_row.get("collection")
+    if not isinstance(raw_book, dict):
+        raise ValueError("raw is not an object")
+    if not isinstance(market, dict):
+        raise ValueError("market is not an object")
+    if not isinstance(collection, dict):
+        raise ValueError("collection is not an object")
+
+    bids = normalize_side(raw_book.get("bids"), "bids")
+    asks = normalize_side(raw_book.get("asks"), "asks")
+
+    best_bid = max((price for price, _ in bids), default=None)
+    best_ask = min((price for price, _ in asks), default=None)
+    spread = None
+    midpoint = None
+    if best_bid is not None and best_ask is not None:
+        spread = best_ask - best_bid
+        midpoint = (best_bid + best_ask) / Decimal("2")
+
+    bid_depth_total = sum_sizes(bids)
+    ask_depth_total = sum_sizes(asks)
+
+    row: dict[str, Any] = {
+        "schema_name": SCHEMA_NAME,
+        "schema_version": SCHEMA_VERSION,
+        "market_name": market.get("market_name"),
+        "market_slug": market.get("market_slug"),
+        "condition_id": market.get("condition_id"),
+        "token_id": market.get("token_id"),
+        "outcome": market.get("outcome"),
+        "collected_at_utc": collection.get("collected_at_utc"),
+        "best_bid": decimal_to_json(best_bid),
+        "best_ask": decimal_to_json(best_ask),
+        "spread": decimal_to_json(spread),
+        "midpoint": decimal_to_json(midpoint),
+        "bid_depth_total": decimal_to_json(bid_depth_total),
+        "ask_depth_total": decimal_to_json(ask_depth_total),
+        "raw_file": raw_file,
+        "raw_line_number": raw_line_number,
+    }
+
+    for label, offset in CENT_OFFSETS.items():
+        bid_depth = Decimal("0")
+        if best_bid is not None:
+            threshold = best_bid - offset
+            bid_depth = sum((size for price, size in bids if price >= threshold), Decimal("0"))
+        ask_depth = Decimal("0")
+        if best_ask is not None:
+            threshold = best_ask + offset
+            ask_depth = sum((size for price, size in asks if price <= threshold), Decimal("0"))
+        row[f"bid_depth_within_{label}"] = decimal_to_json(bid_depth)
+        row[f"ask_depth_within_{label}"] = decimal_to_json(ask_depth)
+
+    return row
+
+
+def summarize_output(path: Path, rows: int) -> dict[str, Any]:
+    return {
+        "path": str(path.relative_to(Path.cwd()) if path.is_absolute() else path),
+        "rows": rows,
+        "bytes": path.stat().st_size,
+        "sha256": sha256_file(path),
+        "status": "valid",
+    }
+
+
+def build_input_file_summary(manifest: dict[str, Any]) -> list[dict[str, Any]]:
+    files = manifest.get("output_files")
+    if not isinstance(files, list) or not files:
+        raise ValueError("input manifest has no output_files")
+    summaries: list[dict[str, Any]] = []
+    for file_entry in files:
+        if not isinstance(file_entry, dict):
+            raise ValueError("input manifest output_files entry is not an object")
+        path_text = file_entry.get("path")
+        if not isinstance(path_text, str) or not path_text:
+            raise ValueError("input manifest output_files entry lacks path")
+        path = resolve_repo_path(path_text)
+        if not path.exists():
+            raise FileNotFoundError(path)
+        actual_sha = sha256_file(path)
+        expected_sha = file_entry.get("sha256")
+        checksum_match = expected_sha == actual_sha
+        summaries.append(
+            {
+                "path": path_text,
+                "rows_expected": file_entry.get("rows"),
+                "bytes": path.stat().st_size,
+                "sha256": actual_sha,
+                "input_manifest_sha256": expected_sha,
+                "checksum_match": checksum_match,
+                "status": "valid" if checksum_match else "invalid",
+            }
+        )
+    return summaries
+
+
+def read_and_normalize(
+    input_files: list[dict[str, Any]],
+    output_path: Path,
+) -> tuple[int, int, list[dict[str, Any]], dict[str, Any]]:
+    raw_rows_read = 0
+    normalized_rows_written = 0
+    errors: list[dict[str, Any]] = []
+    sanity = {
+        "raw_file_refs_present": True,
+        "raw_files_exist": True,
+        "spread_non_negative": True,
+        "midpoint_between_bid_ask": True,
+        "depth_totals_non_negative": True,
+        "outcomes_seen": [],
+        "gzip_jsonl_parseable": True,
+        "row_count_match": None,
+    }
+    outcomes_seen: set[str] = set()
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with gzip.open(output_path, "wt", encoding="utf-8", compresslevel=9) as output:
+        for file_entry in input_files:
+            raw_file = file_entry["path"]
+            raw_path = resolve_repo_path(raw_file)
+            if not raw_path.exists():
+                sanity["raw_files_exist"] = False
+                errors.append({"raw_file": raw_file, "error": "raw file missing"})
+                continue
+
+            with gzip.open(raw_path, "rt", encoding="utf-8") as raw_handle:
+                for raw_line_number, line in enumerate(raw_handle, 1):
+                    raw_rows_read += 1
+                    try:
+                        raw_row = json.loads(line)
+                        normalized = normalize_raw_row(raw_row, raw_file, raw_line_number)
+                        output.write(json.dumps(normalized, sort_keys=True, separators=(",", ":")) + "\n")
+                        normalized_rows_written += 1
+
+                        if not normalized.get("raw_file") or not normalized.get("raw_line_number"):
+                            sanity["raw_file_refs_present"] = False
+                        if not resolve_repo_path(str(normalized["raw_file"])).exists():
+                            sanity["raw_files_exist"] = False
+                        outcome = normalized.get("outcome")
+                        if isinstance(outcome, str):
+                            outcomes_seen.add(outcome)
+
+                        best_bid = Decimal(normalized["best_bid"]) if normalized["best_bid"] is not None else None
+                        best_ask = Decimal(normalized["best_ask"]) if normalized["best_ask"] is not None else None
+                        spread = Decimal(normalized["spread"]) if normalized["spread"] is not None else None
+                        midpoint = Decimal(normalized["midpoint"]) if normalized["midpoint"] is not None else None
+                        if best_bid is not None and best_ask is not None:
+                            if spread is None or spread < 0:
+                                sanity["spread_non_negative"] = False
+                            if midpoint is None or midpoint < best_bid or midpoint > best_ask:
+                                sanity["midpoint_between_bid_ask"] = False
+                        depth_fields = [
+                            "bid_depth_total",
+                            "ask_depth_total",
+                            "bid_depth_within_1c",
+                            "ask_depth_within_1c",
+                            "bid_depth_within_2c",
+                            "ask_depth_within_2c",
+                            "bid_depth_within_5c",
+                            "ask_depth_within_5c",
+                        ]
+                        for field in depth_fields:
+                            if Decimal(normalized[field]) < 0:
+                                sanity["depth_totals_non_negative"] = False
+                    except Exception as exc:  # noqa: BLE001 - preserve row-level failure evidence.
+                        errors.append(
+                            {
+                                "raw_file": raw_file,
+                                "raw_line_number": raw_line_number,
+                                "error": str(exc),
+                            }
+                        )
+
+    sanity["outcomes_seen"] = sorted(outcomes_seen)
+    sanity["has_up_and_down"] = {"Up", "Down"}.issubset(outcomes_seen)
+    sanity["row_count_match"] = raw_rows_read == normalized_rows_written + len(errors)
+    return raw_rows_read, normalized_rows_written, errors, sanity
+
+
+def validate_output_gzip_jsonl(path: Path) -> tuple[bool, int, list[str]]:
+    errors: list[str] = []
+    parsed_rows = 0
+    try:
+        with gzip.open(path, "rt", encoding="utf-8") as handle:
+            for line_number, line in enumerate(handle, 1):
+                json.loads(line)
+                parsed_rows = line_number
+    except Exception as exc:  # noqa: BLE001 - validation result belongs in manifest.
+        errors.append(str(exc))
+    return not errors, parsed_rows, errors
+
+
+def scan_for_secret_terms(paths: list[Path]) -> dict[str, Any]:
+    matches: list[dict[str, Any]] = []
+    lowered_patterns = tuple(pattern.lower() for pattern in SECRET_PATTERNS)
+    for path in paths:
+        if not path.exists():
+            continue
+        if path.suffix == ".gz":
+            opener = gzip.open
+        else:
+            opener = open
+        with opener(path, "rt", encoding="utf-8", errors="replace") as handle:  # type: ignore[arg-type]
+            for line_number, line in enumerate(handle, 1):
+                lower = line.lower()
+                for pattern_index, pattern in enumerate(lowered_patterns, 1):
+                    if pattern in lower:
+                        matches.append(
+                            {
+                                "path": str(path.relative_to(Path.cwd()) if path.is_absolute() else path),
+                                "line_number": line_number,
+                                "term_index": pattern_index,
+                            }
+                        )
+                        break
+    return {
+        "passed": not matches,
+        "checked_term_count": len(SECRET_PATTERNS),
+        "matches": matches,
+    }
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Normalize Checkpoint 4 raw Polymarket order-book snapshots.",
+    )
+    parser.add_argument(
+        "--input-manifest",
+        type=Path,
+        default=DEFAULT_INPUT_MANIFEST,
+        help=f"Raw collector manifest path. Default: {DEFAULT_INPUT_MANIFEST}",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=DEFAULT_OUTPUT_DIR,
+        help=f"Normalized sample base directory. Default: {DEFAULT_OUTPUT_DIR}",
+    )
+    parser.add_argument(
+        "--manifest-path",
+        type=Path,
+        default=DEFAULT_MANIFEST_PATH,
+        help=f"Normalization manifest path. Default: {DEFAULT_MANIFEST_PATH}",
+    )
+    return parser.parse_args(argv)
+
+
+def main(argv: list[str]) -> int:
+    args = parse_args(argv)
+    started = utc_now()
+    input_manifest = load_json(args.input_manifest)
+    input_files = build_input_file_summary(input_manifest)
+
+    run_id = compact_timestamp(started)
+    output_path = (
+        args.output_dir
+        / "polymarket"
+        / "orderbooks"
+        / run_id
+        / f"polymarket_orderbooks_normalized_{run_id}.jsonl.gz"
+    )
+
+    raw_rows_read, normalized_rows_written, row_errors, sanity = read_and_normalize(input_files, output_path)
+    gzip_ok, gzip_rows, gzip_errors = validate_output_gzip_jsonl(output_path)
+    output_summary = summarize_output(output_path, normalized_rows_written)
+
+    sanity.update(
+        {
+            "output_row_count_equals_raw_input_row_count": normalized_rows_written == raw_rows_read
+            if not row_errors
+            else False,
+            "gzip_jsonl_decompresses_and_parses": gzip_ok,
+            "gzip_jsonl_rows_parsed": gzip_rows,
+            "gzip_jsonl_errors": gzip_errors,
+            "manifest_checksum_matches_output": output_summary["sha256"] == sha256_file(output_path),
+            "all_input_file_checksums_match": all(file_entry["checksum_match"] for file_entry in input_files),
+        }
+    )
+
+    secret_scan = scan_for_secret_terms([Path(__file__), output_path])
+    sanity["checkpoint5_secret_scan_passed"] = secret_scan["passed"]
+
+    gate_checks = [
+        normalized_rows_written == raw_rows_read,
+        not row_errors,
+        sanity["raw_file_refs_present"],
+        sanity["raw_files_exist"],
+        sanity["spread_non_negative"],
+        sanity["midpoint_between_bid_ask"],
+        sanity["depth_totals_non_negative"],
+        sanity["has_up_and_down"],
+        gzip_ok,
+        sanity["manifest_checksum_matches_output"],
+        secret_scan["passed"],
+        all(file_entry["checksum_match"] for file_entry in input_files),
+    ]
+    gate_status = "PASS" if all(gate_checks) and normalized_rows_written > 0 else "FAIL"
+    ended = utc_now()
+
+    manifest = {
+        "schema_name": "orderbook_normalization_sample_manifest",
+        "schema_version": 1,
+        "checkpoint_id": 5,
+        "checkpoint_name": "Normalized Snapshot Extract",
+        "normalizer": {
+            "name": NORMALIZER_NAME,
+            "version": NORMALIZER_VERSION,
+        },
+        "started_at_utc": iso_z(started),
+        "ended_at_utc": iso_z(ended),
+        "run_duration_seconds": round((ended - started).total_seconds(), 3),
+        "command": "scripts/normalize_polymarket_orderbooks.py",
+        "input_manifest": {
+            "path": str(args.input_manifest),
+            "sha256": sha256_file(args.input_manifest),
+            "collector_manifest_schema_name": input_manifest.get("schema_name"),
+            "collector_gate_status": input_manifest.get("gate_status"),
+        },
+        "input_files": input_files,
+        "output_files": [output_summary],
+        "raw_rows_read": raw_rows_read,
+        "normalized_rows_written": normalized_rows_written,
+        "skipped_rows": len(row_errors),
+        "error_rows": row_errors,
+        "numeric_encoding": "Exact decimal values are emitted as JSON strings; missing price-derived values are null.",
+        "sanity_checks": sanity,
+        "secret_scan": secret_scan,
+        "warnings": [],
+        "known_gaps": [
+            "This is a derived sample extract only; raw gzip JSONL remains the source of truth.",
+            "No upload, daemon runtime, systemd unit, dashboard, database, strategy, backtest, or trading behavior is included.",
+            "The sample proves normalization logic on one bounded raw run, not long-run schema stability.",
+        ],
+        "fake_progress_risk": "A clean normalized sample can hide raw collection gaps and endpoint schema drift; every row is therefore traceable to raw_file and raw_line_number, and reliability remains gated on later soak testing.",
+        "next_step": "Checkpoint 6 should package the raw collector for a VPS runtime, or the orchestrator can request review of this normalized sample first.",
+        "gate_status": gate_status,
+    }
+
+    args.manifest_path.parent.mkdir(parents=True, exist_ok=True)
+    args.manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+    print(
+        json.dumps(
+            {
+                "gate_status": gate_status,
+                "manifest_path": str(args.manifest_path),
+                "output_path": str(output_path),
+                "raw_rows_read": raw_rows_read,
+                "normalized_rows_written": normalized_rows_written,
+                "skipped_rows": len(row_errors),
+                "sha256": output_summary["sha256"],
+            },
+            indent=2,
+            sort_keys=True,
+        )
+    )
+    return 0 if gate_status == "PASS" else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))
diff --git a/scripts/probe_polymarket_public_sources.py b/scripts/probe_polymarket_public_sources.py
new file mode 100755
index 0000000..5fb7375
--- /dev/null
+++ b/scripts/probe_polymarket_public_sources.py
@@ -0,0 +1,1369 @@
+#!/usr/bin/env python3
+"""Bounded public-source probe for Polymarket Checkpoint 2.
+
+This is not a collector. It performs a small, finite set of public requests to
+prove which endpoints can support a future raw-first order book archive.
+"""
+
+from __future__ import annotations
+
+import argparse
+import base64
+import datetime as dt
+import hashlib
+import json
+import os
+import socket
+import ssl
+import struct
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+
+GAMMA_BASE = "https://gamma-api.polymarket.com"
+CLOB_BASE = "https://clob.polymarket.com"
+DATA_API_BASE = "https://data-api.polymarket.com"
+MARKET_WS_URL = "wss://ws-subscriptions-clob.polymarket.com/ws/market"
+
+DEFAULT_PROBE_JSON = Path("data/probes/polymarket_public_sources_probe_v1.json")
+DEFAULT_PROBE_MD = Path("data/probes/polymarket_public_sources_probe_v1.md")
+DEFAULT_CHECKPOINT_REPORT = Path(
+    "reports/checkpoints/checkpoint_002_polymarket_public_sources.md"
+)
+DEFAULT_CHECKPOINT_MANIFEST = Path(
+    "data/manifests/checkpoint_002_polymarket_public_sources.json"
+)
+
+OFFICIAL_SOURCES = [
+    {
+        "name": "Fetching Markets",
+        "url": "https://docs.polymarket.com/market-data/fetching-markets.md",
+        "finding": "Use Gamma events with active=true&closed=false for active market discovery; events contain markets.",
+    },
+    {
+        "name": "List markets",
+        "url": "https://docs.polymarket.com/api-reference/markets/list-markets.md",
+        "finding": "Gamma /markets supports active/closed, slug, tag_id, condition_ids, clob_token_ids, end_date, limit, offset, and sorting parameters.",
+    },
+    {
+        "name": "Public search",
+        "url": "https://docs.polymarket.com/api-reference/search/search-markets-events-and-profiles.md",
+        "finding": "Gamma /public-search supports q, events_status, limit_per_type, search_tags, recurrence, and tag filters.",
+    },
+    {
+        "name": "Get order book",
+        "url": "https://docs.polymarket.com/api-reference/market-data/get-order-book.md",
+        "finding": "CLOB GET /book takes token_id and returns an order book summary.",
+    },
+    {
+        "name": "Get order books",
+        "url": "https://docs.polymarket.com/api-reference/market-data/get-order-books-request-body.md",
+        "finding": "CLOB POST /books takes an array of token_id objects and returns multiple book summaries.",
+    },
+    {
+        "name": "Market websocket",
+        "url": "https://docs.polymarket.com/market-data/websocket/market-channel.md",
+        "finding": "Public websocket supports market subscriptions by outcome token asset IDs.",
+    },
+    {
+        "name": "Recent trades",
+        "url": "https://docs.polymarket.com/api-reference/core/get-trades-for-a-user-or-markets.md",
+        "finding": "Data API GET /trades is public and can filter by condition ID in the market query parameter.",
+    },
+    {
+        "name": "Authenticated CLOB trades",
+        "url": "https://docs.polymarket.com/api-reference/trade/get-trades.md",
+        "finding": "CLOB GET /trades exists but requires API-key authentication, so it is not used for this public-data checkpoint.",
+    },
+    {
+        "name": "Rate limits",
+        "url": "https://docs.polymarket.com/api-reference/rate-limits.md",
+        "finding": "Official rate limits are documented for Gamma, Data API, CLOB market data, and websocket-adjacent endpoints.",
+    },
+]
+
+DOCUMENTED_RATE_LIMITS = {
+    "gamma": {
+        "base_url": GAMMA_BASE,
+        "general": "4,000 req / 10s",
+        "/events": "500 req / 10s",
+        "/markets": "300 req / 10s",
+        "/markets + /events listing": "900 req / 10s",
+        "/public-search": "350 req / 10s",
+    },
+    "data_api": {
+        "base_url": DATA_API_BASE,
+        "general": "1,000 req / 10s",
+        "/trades": "200 req / 10s",
+    },
+    "clob": {
+        "base_url": CLOB_BASE,
+        "general": "9,000 req / 10s",
+        "/book": "1,500 req / 10s",
+        "/books": "500 req / 10s",
+        "/price": "1,500 req / 10s",
+        "/prices": "500 req / 10s",
+        "/midpoint": "1,500 req / 10s",
+        "/midpoints": "500 req / 10s",
+        "/prices-history": "1,000 req / 10s",
+    },
+}
+
+SAFE_RESPONSE_HEADERS = {
+    "age",
+    "cache-control",
+    "cf-cache-status",
+    "cf-ray",
+    "content-encoding",
+    "content-length",
+    "content-type",
+    "date",
+    "expires",
+    "last-modified",
+    "ratelimit-limit",
+    "ratelimit-remaining",
+    "ratelimit-reset",
+    "retry-after",
+    "server",
+    "strict-transport-security",
+    "x-ratelimit-limit",
+    "x-ratelimit-remaining",
+    "x-ratelimit-reset",
+}
+
+
+def utc_now() -> dt.datetime:
+    return dt.datetime.now(dt.UTC)
+
+
+def iso_z(value: dt.datetime | None = None) -> str:
+    value = value or utc_now()
+    return value.astimezone(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def parse_iso(value: Any) -> dt.datetime | None:
+    if not value or not isinstance(value, str):
+        return None
+    text = value.strip()
+    if not text:
+        return None
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        parsed = dt.datetime.fromisoformat(text)
+    except ValueError:
+        return None
+    if parsed.tzinfo is None:
+        parsed = parsed.replace(tzinfo=dt.UTC)
+    return parsed.astimezone(dt.UTC)
+
+
+def sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def normalize_params(params: dict[str, Any] | None) -> dict[str, Any] | None:
+    if not params:
+        return None
+    normalized: dict[str, Any] = {}
+    for key, value in params.items():
+        if isinstance(value, bool):
+            normalized[key] = "true" if value else "false"
+        elif isinstance(value, list):
+            normalized[key] = [
+                "true" if item is True else "false" if item is False else item
+                for item in value
+            ]
+        else:
+            normalized[key] = value
+    return normalized
+
+
+def filter_headers(headers: Any) -> dict[str, str]:
+    safe: dict[str, str] = {}
+    for key, value in dict(headers).items():
+        lower = key.lower()
+        if lower in SAFE_RESPONSE_HEADERS:
+            safe[key] = value
+    return safe
+
+
+def rate_limit_headers(headers: dict[str, str]) -> dict[str, str]:
+    result: dict[str, str] = {}
+    for key, value in headers.items():
+        lower = key.lower()
+        if "ratelimit" in lower or lower == "retry-after":
+            result[key] = value
+    return result
+
+
+def decode_json_maybe(text: str) -> tuple[Any | None, str | None]:
+    try:
+        return json.loads(text), None
+    except json.JSONDecodeError as exc:
+        return None, str(exc)
+
+
+def encode_url(url: str, params: dict[str, Any] | None = None) -> str:
+    params = normalize_params(params)
+    if not params:
+        return url
+    query = urllib.parse.urlencode(params, doseq=True)
+    separator = "&" if urllib.parse.urlparse(url).query else "?"
+    return f"{url}{separator}{query}"
+
+
+def http_json_request(
+    name: str,
+    method: str,
+    url: str,
+    *,
+    params: dict[str, Any] | None = None,
+    json_body: Any | None = None,
+    timeout_seconds: float = 15.0,
+) -> dict[str, Any]:
+    started_monotonic = time.monotonic()
+    started_at = iso_z()
+    full_url = encode_url(url, params)
+    headers = {
+        "Accept": "application/json",
+        "User-Agent": "orderbooks-checkpoint-2-probe/1.0",
+    }
+    data = None
+    if json_body is not None:
+        data = json.dumps(json_body, separators=(",", ":")).encode("utf-8")
+        headers["Content-Type"] = "application/json"
+    request = urllib.request.Request(
+        full_url,
+        data=data,
+        headers=headers,
+        method=method.upper(),
+    )
+
+    status_code: int | None = None
+    response_headers: dict[str, str] = {}
+    response_text = ""
+    error: str | None = None
+    try:
+        with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
+            status_code = response.status
+            response_headers = filter_headers(response.headers)
+            response_text = response.read().decode("utf-8", errors="replace")
+    except urllib.error.HTTPError as exc:
+        status_code = exc.code
+        response_headers = filter_headers(exc.headers)
+        response_text = exc.read().decode("utf-8", errors="replace")
+        error = f"HTTPError: {exc}"
+    except Exception as exc:  # noqa: BLE001 - preserve probe failure evidence
+        error = f"{type(exc).__name__}: {exc}"
+
+    duration_ms = round((time.monotonic() - started_monotonic) * 1000, 3)
+    parsed_json, json_error = decode_json_maybe(response_text) if response_text else (None, None)
+
+    return {
+        "name": name,
+        "started_at_utc": started_at,
+        "ended_at_utc": iso_z(),
+        "duration_ms": duration_ms,
+        "request": {
+            "method": method.upper(),
+            "url": url,
+            "full_url": full_url,
+            "params": normalize_params(params),
+            "json_body": json_body,
+        },
+        "response": {
+            "status_code": status_code,
+            "headers": response_headers,
+            "observed_rate_limit_headers": rate_limit_headers(response_headers),
+            "json": parsed_json,
+            "json_error": json_error,
+            "text_preview": response_text[:1000] if parsed_json is None else None,
+        },
+        "ok": error is None and status_code is not None and 200 <= status_code < 300,
+        "error": error,
+    }
+
+
+def coerce_json_array(value: Any) -> list[Any]:
+    if isinstance(value, list):
+        return value
+    if isinstance(value, str):
+        try:
+            parsed = json.loads(value)
+        except json.JSONDecodeError:
+            return []
+        return parsed if isinstance(parsed, list) else []
+    return []
+
+
+def first_market(event: dict[str, Any]) -> dict[str, Any] | None:
+    markets = event.get("markets")
+    if isinstance(markets, list) and markets and isinstance(markets[0], dict):
+        return markets[0]
+    return None
+
+
+def event_matches_btc_up_down(event: dict[str, Any]) -> bool:
+    market = first_market(event) or {}
+    outcomes = [str(item).lower() for item in coerce_json_array(market.get("outcomes"))]
+    text = " ".join(
+        str(event.get(key, "") or "")
+        for key in ("title", "slug", "ticker", "seriesSlug", "description")
+    ).lower()
+    tag_text = " ".join(
+        str(tag.get("slug", "") or tag.get("label", "") or "")
+        for tag in (event.get("tags") or [])
+        if isinstance(tag, dict)
+    ).lower()
+    series_slug = str(event.get("seriesSlug") or "").lower()
+    has_btc_text = "bitcoin" in text or "btc" in text or "bitcoin" in tag_text
+    has_up_down_text = ("up" in text and "down" in text) or "up-or-down" in tag_text
+    has_up_down_outcomes = set(outcomes) == {"up", "down"}
+    return bool(
+        (series_slug.startswith("btc-up-or-down") or (has_btc_text and has_up_down_text))
+        and has_up_down_outcomes
+    )
+
+
+def candidate_record(event: dict[str, Any], now: dt.datetime, min_lead_seconds: int) -> dict[str, Any] | None:
+    market = first_market(event)
+    if not market:
+        return None
+    token_ids = [str(item) for item in coerce_json_array(market.get("clobTokenIds"))]
+    outcomes = [str(item) for item in coerce_json_array(market.get("outcomes"))]
+    if len(token_ids) < 1 or not outcomes:
+        return None
+    event_end = parse_iso(event.get("endDate"))
+    market_end = parse_iso(market.get("endDate"))
+    candidate_end = market_end or event_end
+    has_future_lead = bool(candidate_end and candidate_end >= now + dt.timedelta(seconds=min_lead_seconds))
+    accepting_orders = market.get("acceptingOrders") is True
+    active_open = (
+        event.get("active") is True
+        and event.get("closed") is False
+        and market.get("active") is True
+        and market.get("closed") is False
+    )
+    score = 0
+    score += 1000 if accepting_orders else 0
+    score += 500 if has_future_lead else 0
+    score += 200 if active_open else 0
+    score += 100 if str(event.get("seriesSlug") or "").startswith("btc-up-or-down") else 0
+    score += 50 if len(token_ids) >= 2 and len(outcomes) >= 2 else 0
+    if not active_open:
+        score -= 1000
+    if candidate_end and candidate_end < now:
+        score -= 250
+    return {
+        "score": score,
+        "event": event,
+        "market": market,
+        "event_end_utc": iso_z(event_end) if event_end else None,
+        "market_end_utc": iso_z(market_end) if market_end else None,
+        "has_future_lead": has_future_lead,
+        "accepting_orders": accepting_orders,
+        "active_open": active_open,
+        "token_ids": token_ids,
+        "outcomes": outcomes,
+    }
+
+
+def select_btc_up_down_market(
+    events: list[dict[str, Any]], now: dt.datetime, min_lead_seconds: int
+) -> tuple[dict[str, Any] | None, list[dict[str, Any]]]:
+    candidates = []
+    for event in events:
+        if not isinstance(event, dict) or not event_matches_btc_up_down(event):
+            continue
+        record = candidate_record(event, now, min_lead_seconds)
+        if record:
+            candidates.append(record)
+
+    def sort_key(record: dict[str, Any]) -> tuple[int, float]:
+        end = parse_iso(record["market"].get("endDate")) or parse_iso(record["event"].get("endDate"))
+        end_ts = end.timestamp() if end else float("inf")
+        return (-int(record["score"]), end_ts)
+
+    candidates.sort(key=sort_key)
+    summarized = [
+        summarize_candidate(candidate, include_tokens=False) for candidate in candidates[:20]
+    ]
+    return (candidates[0] if candidates else None), summarized
+
+
+def summarize_candidate(candidate: dict[str, Any], include_tokens: bool = True) -> dict[str, Any]:
+    event = candidate["event"]
+    market = candidate["market"]
+    summary = {
+        "score": candidate["score"],
+        "event_id": event.get("id"),
+        "event_slug": event.get("slug"),
+        "event_title": event.get("title"),
+        "event_end_utc": candidate.get("event_end_utc"),
+        "event_active": event.get("active"),
+        "event_closed": event.get("closed"),
+        "series_slug": event.get("seriesSlug"),
+        "market_id": market.get("id"),
+        "market_slug": market.get("slug"),
+        "condition_id": market.get("conditionId"),
+        "market_end_utc": candidate.get("market_end_utc"),
+        "market_active": market.get("active"),
+        "market_closed": market.get("closed"),
+        "accepting_orders": market.get("acceptingOrders"),
+        "enable_order_book": market.get("enableOrderBook"),
+        "outcomes": candidate.get("outcomes"),
+        "has_future_lead": candidate.get("has_future_lead"),
+    }
+    if include_tokens:
+        summary["clob_token_ids"] = candidate.get("token_ids")
+    return summary
+
+
+def field_names(payload: Any) -> list[str]:
+    keys: set[str] = set()
+    if isinstance(payload, dict):
+        keys.update(str(key) for key in payload.keys())
+    elif isinstance(payload, list):
+        for item in payload[:10]:
+            if isinstance(item, dict):
+                keys.update(str(key) for key in item.keys())
+    return sorted(keys)
+
+
+def nested_field_names(payload: Any, key: str) -> list[str]:
+    values: list[Any] = []
+    if isinstance(payload, dict):
+        candidate = payload.get(key)
+        if isinstance(candidate, list):
+            values.extend(candidate[:10])
+    elif isinstance(payload, list):
+        for item in payload[:10]:
+            if isinstance(item, dict) and isinstance(item.get(key), list):
+                values.extend(item[key][:10])
+    keys: set[str] = set()
+    for item in values:
+        if isinstance(item, dict):
+            keys.update(str(field) for field in item.keys())
+    return sorted(keys)
+
+
+def send_ws_frame(sock: ssl.SSLSocket, opcode: int, payload: bytes) -> None:
+    mask = os.urandom(4)
+    header = bytearray([0x80 | opcode])
+    length = len(payload)
+    if length < 126:
+        header.append(0x80 | length)
+    elif length < 65536:
+        header.append(0x80 | 126)
+        header.extend(struct.pack("!H", length))
+    else:
+        header.append(0x80 | 127)
+        header.extend(struct.pack("!Q", length))
+    masked = bytes(byte ^ mask[index % 4] for index, byte in enumerate(payload))
+    sock.sendall(header + mask + masked)
+
+
+def read_exact(sock: ssl.SSLSocket, length: int) -> bytes:
+    data = bytearray()
+    while len(data) < length:
+        chunk = sock.recv(length - len(data))
+        if not chunk:
+            raise EOFError("websocket connection closed while reading frame")
+        data.extend(chunk)
+    return bytes(data)
+
+
+def read_ws_frame(sock: ssl.SSLSocket) -> tuple[int, bytes]:
+    first, second = read_exact(sock, 2)
+    opcode = first & 0x0F
+    length = second & 0x7F
+    masked = bool(second & 0x80)
+    if length == 126:
+        length = struct.unpack("!H", read_exact(sock, 2))[0]
+    elif length == 127:
+        length = struct.unpack("!Q", read_exact(sock, 8))[0]
+    mask = read_exact(sock, 4) if masked else b""
+    payload = read_exact(sock, length) if length else b""
+    if masked:
+        payload = bytes(byte ^ mask[index % 4] for index, byte in enumerate(payload))
+    return opcode, payload
+
+
+def parse_ws_headers(raw_headers: str) -> tuple[str, dict[str, str]]:
+    lines = raw_headers.split("\r\n")
+    status_line = lines[0] if lines else ""
+    headers: dict[str, str] = {}
+    for line in lines[1:]:
+        if ":" not in line:
+            continue
+        key, value = line.split(":", 1)
+        headers[key.strip()] = value.strip()
+    return status_line, filter_headers(headers)
+
+
+def classify_ws_payload(payload: Any) -> list[str]:
+    event_types: list[str] = []
+    items = payload if isinstance(payload, list) else [payload]
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        event_type = item.get("event_type")
+        if event_type:
+            event_types.append(str(event_type))
+        elif {"market", "asset_id", "bids", "asks", "timestamp"}.issubset(item.keys()):
+            event_types.append("book_without_event_type")
+        else:
+            event_types.append("unknown_object")
+    return event_types
+
+
+def websocket_probe(
+    url: str,
+    token_ids: list[str],
+    *,
+    timeout_seconds: float,
+    max_messages: int,
+) -> dict[str, Any]:
+    started_monotonic = time.monotonic()
+    started_at = iso_z()
+    parsed = urllib.parse.urlparse(url)
+    host = parsed.hostname
+    if not host:
+        return {"ok": False, "error": "missing websocket host"}
+    port = parsed.port or 443
+    path = parsed.path or "/"
+    if parsed.query:
+        path = f"{path}?{parsed.query}"
+    subscription = {
+        "assets_ids": token_ids,
+        "type": "market",
+        "custom_feature_enabled": True,
+    }
+    result: dict[str, Any] = {
+        "name": "clob_market_websocket",
+        "started_at_utc": started_at,
+        "request": {
+            "url": url,
+            "subscription": subscription,
+            "max_messages": max_messages,
+            "timeout_seconds": timeout_seconds,
+        },
+        "handshake": {},
+        "messages": [],
+        "message_event_types": [],
+        "ok": False,
+        "error": None,
+    }
+    sock: ssl.SSLSocket | None = None
+    try:
+        raw_sock = socket.create_connection((host, port), timeout=timeout_seconds)
+        sock = ssl.create_default_context().wrap_socket(raw_sock, server_hostname=host)
+        sock.settimeout(timeout_seconds)
+        key = base64.b64encode(os.urandom(16)).decode("ascii")
+        request = (
+            f"GET {path} HTTP/1.1\r\n"
+            f"Host: {host}\r\n"
+            "Upgrade: websocket\r\n"
+            "Connection: Upgrade\r\n"
+            f"Sec-WebSocket-Key: {key}\r\n"
+            "Sec-WebSocket-Version: 13\r\n"
+            "User-Agent: orderbooks-checkpoint-2-probe/1.0\r\n"
+            "\r\n"
+        )
+        sock.sendall(request.encode("ascii"))
+        raw_headers = bytearray()
+        while b"\r\n\r\n" not in raw_headers:
+            raw_headers.extend(sock.recv(4096))
+            if len(raw_headers) > 65536:
+                raise ValueError("websocket handshake headers exceeded 64 KiB")
+        header_text = bytes(raw_headers).split(b"\r\n\r\n", 1)[0].decode(
+            "iso-8859-1", errors="replace"
+        )
+        status_line, response_headers = parse_ws_headers(header_text)
+        result["handshake"] = {
+            "status_line": status_line,
+            "headers": response_headers,
+            "observed_rate_limit_headers": rate_limit_headers(response_headers),
+        }
+        if " 101 " not in status_line:
+            raise ValueError(f"websocket upgrade failed: {status_line}")
+
+        send_ws_frame(sock, 0x1, json.dumps(subscription).encode("utf-8"))
+        deadline = time.monotonic() + timeout_seconds
+        while time.monotonic() < deadline and len(result["messages"]) < max_messages:
+            remaining = max(0.1, deadline - time.monotonic())
+            sock.settimeout(remaining)
+            opcode, payload_bytes = read_ws_frame(sock)
+            if opcode == 0x8:
+                result["messages"].append({"opcode": opcode, "close": True})
+                break
+            if opcode == 0x9:
+                send_ws_frame(sock, 0xA, payload_bytes)
+                continue
+            if opcode != 0x1:
+                result["messages"].append(
+                    {"opcode": opcode, "payload_length_bytes": len(payload_bytes)}
+                )
+                continue
+            text = payload_bytes.decode("utf-8", errors="replace")
+            parsed_payload, json_error = decode_json_maybe(text)
+            event_types = classify_ws_payload(parsed_payload)
+            result["message_event_types"].extend(event_types)
+            result["messages"].append(
+                {
+                    "opcode": opcode,
+                    "payload_length_bytes": len(payload_bytes),
+                    "json": parsed_payload,
+                    "json_error": json_error,
+                    "event_types": event_types,
+                    "text_preview": None if parsed_payload is not None else text[:1000],
+                }
+            )
+        result["ok"] = bool(result["messages"])
+    except Exception as exc:  # noqa: BLE001 - preserve probe failure evidence
+        result["error"] = f"{type(exc).__name__}: {exc}"
+    finally:
+        if sock is not None:
+            try:
+                send_ws_frame(sock, 0x8, b"")
+            except Exception:
+                pass
+            try:
+                sock.close()
+            except Exception:
+                pass
+        result["ended_at_utc"] = iso_z()
+        result["duration_ms"] = round((time.monotonic() - started_monotonic) * 1000, 3)
+        result["message_event_types"] = sorted(set(result["message_event_types"]))
+    return result
+
+
+def request_json_payload(record: dict[str, Any]) -> Any:
+    return (record.get("response") or {}).get("json")
+
+
+def top_level_field_summary(requests: dict[str, dict[str, Any]], websocket: dict[str, Any]) -> dict[str, Any]:
+    summary: dict[str, Any] = {}
+    for name, record in requests.items():
+        payload = request_json_payload(record)
+        summary[name] = {
+            "top_level_fields": field_names(payload),
+            "bid_ask_level_fields": sorted(
+                set(nested_field_names(payload, "bids") + nested_field_names(payload, "asks"))
+            ),
+        }
+    ws_fields: set[str] = set()
+    ws_level_fields: set[str] = set()
+    for message in websocket.get("messages", []):
+        payload = message.get("json")
+        ws_fields.update(field_names(payload))
+        ws_level_fields.update(nested_field_names(payload, "bids"))
+        ws_level_fields.update(nested_field_names(payload, "asks"))
+        if isinstance(payload, dict) and isinstance(payload.get("price_changes"), list):
+            for change in payload["price_changes"][:10]:
+                if isinstance(change, dict):
+                    ws_level_fields.update(str(key) for key in change.keys())
+    summary["clob_market_websocket"] = {
+        "top_level_fields": sorted(ws_fields),
+        "nested_level_or_change_fields": sorted(ws_level_fields),
+        "event_types_observed": websocket.get("message_event_types", []),
+    }
+    return summary
+
+
+def build_endpoint_findings(
+    selected: dict[str, Any],
+    requests: dict[str, dict[str, Any]],
+    websocket: dict[str, Any],
+) -> dict[str, Any]:
+    market = selected["market"]
+    outcomes = selected["outcomes"]
+    token_ids = selected["token_ids"]
+    outcome_tokens = [
+        {"outcome": outcome, "token_id": token_ids[index] if index < len(token_ids) else None}
+        for index, outcome in enumerate(outcomes)
+    ]
+    book_payload = request_json_payload(requests["clob_get_book"])
+    books_payload = request_json_payload(requests["clob_post_books"])
+    trades_payload = request_json_payload(requests["data_api_recent_trades"])
+
+    return {
+        "active_market_discovery": {
+            "endpoint": f"{GAMMA_BASE}/events",
+            "method": "GET",
+            "params_used": requests["gamma_events_bitcoin_tag"]["request"]["params"],
+            "answer": "Use Gamma /events with active=true&closed=false and pagination; events include their markets. /markets can fetch individual market records by slug, condition_ids, or clob_token_ids.",
+        },
+        "btc_up_down_filtering": {
+            "answer": "Filter Gamma events/markets by Bitcoin tag evidence, seriesSlug beginning btc-up-or-down, text containing BTC/Bitcoin plus Up/Down, and market outcomes exactly ['Up', 'Down'].",
+            "source_fields": [
+                "event.seriesSlug",
+                "event.tags[].slug or label",
+                "event.title",
+                "event.slug",
+                "market.outcomes",
+                "market.clobTokenIds",
+            ],
+        },
+        "condition_and_token_resolution": {
+            "condition_id_field": "market.conditionId",
+            "outcomes_field": "market.outcomes",
+            "token_ids_field": "market.clobTokenIds",
+            "mapping_rule": "Parse outcomes and clobTokenIds as arrays and map by index.",
+            "selected_condition_id": market.get("conditionId"),
+            "selected_outcome_tokens": outcome_tokens,
+        },
+        "single_order_book": {
+            "endpoint": f"{CLOB_BASE}/book",
+            "method": "GET",
+            "params": {"token_id": token_ids[0]},
+            "status_code": requests["clob_get_book"]["response"]["status_code"],
+            "field_count": len(field_names(book_payload)),
+            "bid_levels": len(book_payload.get("bids", [])) if isinstance(book_payload, dict) else None,
+            "ask_levels": len(book_payload.get("asks", [])) if isinstance(book_payload, dict) else None,
+        },
+        "batch_order_books": {
+            "endpoint": f"{CLOB_BASE}/books",
+            "method": "POST",
+            "json_body_shape": [{"token_id": "<token_id>"}],
+            "status_code": requests["clob_post_books"]["response"]["status_code"],
+            "book_count": len(books_payload) if isinstance(books_payload, list) else None,
+        },
+        "market_websocket": {
+            "endpoint": MARKET_WS_URL,
+            "subscription_shape": {
+                "assets_ids": ["<token_id_1>", "<token_id_2>"],
+                "type": "market",
+                "custom_feature_enabled": True,
+            },
+            "probe_ok": websocket.get("ok"),
+            "message_count": len(websocket.get("messages", [])),
+            "event_types_observed": websocket.get("message_event_types", []),
+            "note": "Initial observed snapshot may arrive as a JSON array of book objects without event_type, followed by event-typed updates.",
+        },
+        "trades": {
+            "public_recent_trades_endpoint": f"{DATA_API_BASE}/trades",
+            "public_recent_trades_method": "GET",
+            "params": {"market": market.get("conditionId"), "limit": 10, "offset": 0},
+            "public_recent_trades_status_code": requests["data_api_recent_trades"]["response"]["status_code"],
+            "public_recent_trade_count": len(trades_payload) if isinstance(trades_payload, list) else None,
+            "websocket_trade_event": "market websocket documents and can emit last_trade_price for subscribed assets",
+            "excluded_endpoint": "CLOB GET /trades requires readonly or level 2 API key authentication and was not used.",
+        },
+        "rate_limits": {
+            "documented": DOCUMENTED_RATE_LIMITS,
+            "observed_headers": {
+                name: record["response"].get("observed_rate_limit_headers", {})
+                for name, record in requests.items()
+            }
+            | {
+                "clob_market_websocket": websocket.get("handshake", {}).get(
+                    "observed_rate_limit_headers", {}
+                )
+            },
+            "observed_note": "The bounded probe did not intentionally approach limits; absence of rate-limit headers is not a limit test.",
+        },
+        "timestamps": {
+            "gamma_metadata": [
+                "startDate",
+                "creationDate",
+                "endDate",
+                "createdAt",
+                "updatedAt",
+                "acceptingOrdersTimestamp",
+                "eventStartTime",
+            ],
+            "clob_book": "timestamp string in order-book payload; observed as Unix epoch milliseconds.",
+            "market_websocket": "timestamp string in websocket book/price/trade updates; observed as Unix epoch milliseconds.",
+            "data_api_trades": "timestamp integer in recent trade payload; observed as Unix epoch seconds.",
+            "probe_metadata": ["started_at_utc", "ended_at_utc", "duration_ms"],
+        },
+    }
+
+
+def markdown_table_row(values: list[Any]) -> str:
+    return "| " + " | ".join(str(value).replace("\n", " ") for value in values) + " |"
+
+
+def write_probe_markdown(probe: dict[str, Any], path: Path) -> None:
+    selected = probe["selected_market"]
+    gate = probe["gate"]
+    endpoint_findings = probe["endpoint_findings"]
+    validation = probe["validation_summary"]
+    lines = [
+        "# Polymarket Public Sources Probe v1",
+        "",
+        f"Artifact status: `{probe['artifact_status']}`",
+        "",
+        "## Gate",
+        "",
+        f"Status: `{gate['status']}`",
+        "",
+        gate["reason"],
+        "",
+        "## Scope",
+        "",
+        "Bounded public endpoint probe only. No collector, no trading, no private endpoints, no secrets.",
+        "",
+        "## Selected Market",
+        "",
+        markdown_table_row(["Field", "Value"]),
+        markdown_table_row(["---", "---"]),
+        markdown_table_row(["event_slug", selected.get("event_slug")]),
+        markdown_table_row(["event_title", selected.get("event_title")]),
+        markdown_table_row(["series_slug", selected.get("series_slug")]),
+        markdown_table_row(["market_id", selected.get("market_id")]),
+        markdown_table_row(["condition_id", selected.get("condition_id")]),
+        markdown_table_row(["market_end_utc", selected.get("market_end_utc")]),
+        markdown_table_row(["accepting_orders", selected.get("accepting_orders")]),
+        markdown_table_row(["outcomes", json.dumps(selected.get("outcomes"))]),
+        markdown_table_row(["clob_token_ids", json.dumps(selected.get("clob_token_ids"))]),
+        "",
+        "## Questions Answered",
+        "",
+        markdown_table_row(["Question", "Answer"]),
+        markdown_table_row(["---", "---"]),
+        markdown_table_row(
+            [
+                "How are active markets discovered?",
+                endpoint_findings["active_market_discovery"]["answer"],
+            ]
+        ),
+        markdown_table_row(
+            [
+                "How can BTC up/down markets be filtered?",
+                endpoint_findings["btc_up_down_filtering"]["answer"],
+            ]
+        ),
+        markdown_table_row(
+            [
+                "How are conditionId and token IDs resolved?",
+                endpoint_findings["condition_and_token_resolution"]["mapping_rule"],
+            ]
+        ),
+        markdown_table_row(
+            [
+                "How is the current order book fetched?",
+                f"GET {CLOB_BASE}/book?token_id=<token_id>",
+            ]
+        ),
+        markdown_table_row(
+            [
+                "Is there a batch order-book endpoint?",
+                f"Yes: POST {CLOB_BASE}/books with an array of token_id objects.",
+            ]
+        ),
+        markdown_table_row(
+            [
+                "Is there a market websocket?",
+                f"Yes: {MARKET_WS_URL}; bounded probe ok={endpoint_findings['market_websocket']['probe_ok']}.",
+            ]
+        ),
+        markdown_table_row(
+            [
+                "Is there a trade websocket or recent trades endpoint?",
+                f"Market websocket can emit last_trade_price; public recent trades are at GET {DATA_API_BASE}/trades?market=<conditionId>.",
+            ]
+        ),
+        markdown_table_row(
+            [
+                "What rate limits are documented or observed?",
+                "Official docs list Gamma, Data API, and CLOB limits; this bounded probe observed no Retry-After or rate-limit headers.",
+            ]
+        ),
+        markdown_table_row(
+            [
+                "What fields are returned?",
+                "See field summary in the JSON artifact; key fields include conditionId, outcomes, clobTokenIds, bids, asks, timestamp, hash, price, size.",
+            ]
+        ),
+        markdown_table_row(
+            [
+                "What timestamps exist?",
+                "Gamma ISO date fields, CLOB/websocket epoch-millisecond strings, Data API trade epoch seconds, and probe request timestamps.",
+            ]
+        ),
+        "",
+        "## Endpoint Evidence",
+        "",
+        markdown_table_row(["Name", "Method", "URL", "Status", "Duration ms"]),
+        markdown_table_row(["---", "---", "---", "---", "---"]),
+    ]
+    for name, record in probe["requests"].items():
+        request = record["request"]
+        response = record["response"]
+        lines.append(
+            markdown_table_row(
+                [
+                    name,
+                    request["method"],
+                    request["full_url"],
+                    response["status_code"],
+                    record["duration_ms"],
+                ]
+            )
+        )
+    ws = probe["websocket_probe"]
+    lines.extend(
+        [
+            markdown_table_row(
+                [
+                    "clob_market_websocket",
+                    "WSS",
+                    MARKET_WS_URL,
+                    ws.get("handshake", {}).get("status_line"),
+                    ws.get("duration_ms"),
+                ]
+            ),
+            "",
+            "## Field Summary",
+            "",
+            "The full raw JSON payloads and websocket messages are preserved in the JSON probe artifact.",
+            "",
+            "```json",
+            json.dumps(probe["field_summary"], indent=2, sort_keys=True),
+            "```",
+            "",
+            "## Rate Limits",
+            "",
+            "Documented limits from official docs:",
+            "",
+            "```json",
+            json.dumps(DOCUMENTED_RATE_LIMITS, indent=2, sort_keys=True),
+            "```",
+            "",
+            "Observed rate-limit headers in this bounded run:",
+            "",
+            "```json",
+            json.dumps(endpoint_findings["rate_limits"]["observed_headers"], indent=2, sort_keys=True),
+            "```",
+            "",
+            "## Validation Evidence",
+            "",
+            markdown_table_row(["Check", "Result"]),
+            markdown_table_row(["---", "---"]),
+            markdown_table_row(["market_metadata_fetched", validation["market_metadata_fetched"]]),
+            markdown_table_row(["single_order_book_fetched", validation["single_order_book_fetched"]]),
+            markdown_table_row(["batch_order_books_fetched", validation["batch_order_books_fetched"]]),
+            markdown_table_row(["recent_trades_checked", validation["recent_trades_checked"]]),
+            markdown_table_row(["websocket_checked", validation["websocket_checked"]]),
+            "",
+            "## Official Sources",
+            "",
+        ]
+    )
+    for source in OFFICIAL_SOURCES:
+        lines.append(f"- [{source['name']}]({source['url']}): {source['finding']}")
+    lines.extend(
+        [
+            "",
+            "## Strongest Fake-Progress Risk",
+            "",
+            probe["fake_progress_risk"],
+            "",
+            "## Next Smallest Step",
+            "",
+            probe["next_step"],
+            "",
+        ]
+    )
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text("\n".join(lines), encoding="utf-8")
+
+
+def write_checkpoint_report(probe: dict[str, Any], path: Path) -> None:
+    gate = probe["gate"]
+    files = [
+        "scripts/probe_polymarket_public_sources.py",
+        str(DEFAULT_PROBE_JSON),
+        str(DEFAULT_PROBE_MD),
+        str(DEFAULT_CHECKPOINT_MANIFEST),
+        str(DEFAULT_CHECKPOINT_REPORT),
+    ]
+    lines = [
+        "# Checkpoint 2: Polymarket Public Data Source Probe",
+        "",
+        f"Gate: {gate['status']}",
+        "",
+        f"Started at UTC: {probe['started_at_utc']}",
+        f"Ended at UTC: {probe['ended_at_utc']}",
+        "",
+        "## Scope",
+        "",
+        "Built a bounded public Polymarket source probe. Explicitly excluded collector implementation, polling, dashboards, databases, trading, order placement, wallet logic, private endpoints, and secrets.",
+        "",
+        "## Files Created Or Changed",
+        "",
+        markdown_table_row(["Path", "Kind", "Status"]),
+        markdown_table_row(["---", "---", "---"]),
+    ]
+    kind_by_path = {
+        "scripts/probe_polymarket_public_sources.py": "bounded probe script",
+        str(DEFAULT_PROBE_JSON): "raw probe evidence",
+        str(DEFAULT_PROBE_MD): "probe report",
+        str(DEFAULT_CHECKPOINT_MANIFEST): "checkpoint manifest",
+        str(DEFAULT_CHECKPOINT_REPORT): "checkpoint report",
+    }
+    for file_path in files:
+        lines.append(markdown_table_row([file_path, kind_by_path[file_path], "valid"]))
+    lines.extend(
+        [
+            "",
+            "## Validation",
+            "",
+            "Commands run by the builder:",
+            "",
+            "```sh",
+            probe["command"],
+            "```",
+            "",
+            f"Result: {gate['status']} - {gate['reason']}",
+            "",
+            "Evidence summary:",
+            "",
+            markdown_table_row(["Evidence", "Result"]),
+            markdown_table_row(["---", "---"]),
+            markdown_table_row(["selected_condition_id", probe["selected_market"].get("condition_id")]),
+            markdown_table_row(["selected_tokens", json.dumps(probe["selected_market"].get("clob_token_ids"))]),
+            markdown_table_row(["GET /book status", probe["requests"]["clob_get_book"]["response"]["status_code"]]),
+            markdown_table_row(["POST /books status", probe["requests"]["clob_post_books"]["response"]["status_code"]]),
+            markdown_table_row(["GET /trades status", probe["requests"]["data_api_recent_trades"]["response"]["status_code"]]),
+            markdown_table_row(["websocket ok", probe["websocket_probe"].get("ok")]),
+            "",
+            "## Endpoint Findings",
+            "",
+            "- Active discovery: Gamma `GET /events?active=true&closed=false`, with pagination. Events include market records.",
+            "- BTC up/down filtering: Bitcoin tag plus `seriesSlug` beginning `btc-up-or-down`, text containing BTC/Bitcoin and Up/Down, and outcomes `Up`/`Down`.",
+            "- Token resolution: parse market `outcomes` and `clobTokenIds`, then map by index; condition ID is `conditionId`.",
+            "- Single book: CLOB `GET /book?token_id=<token_id>`.",
+            "- Batch books: CLOB `POST /books` with `[{'token_id': '<token_id>'}, ...]`.",
+            "- Market websocket: public `wss://ws-subscriptions-clob.polymarket.com/ws/market` subscription by `assets_ids`.",
+            "- Recent trades: public Data API `GET /trades?market=<conditionId>`; authenticated CLOB `GET /trades` was excluded.",
+            "",
+            "## Strongest Fake-Progress Risk",
+            "",
+            probe["fake_progress_risk"],
+            "",
+            "## Next Smallest Step",
+            "",
+            probe["next_step"],
+            "",
+        ]
+    )
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text("\n".join(lines), encoding="utf-8")
+
+
+def write_checkpoint_manifest(
+    probe: dict[str, Any],
+    path: Path,
+    artifacts: list[dict[str, Any]],
+) -> None:
+    manifest = {
+        "schema_name": "checkpoint_manifest",
+        "schema_version": 1,
+        "checkpoint_id": 2,
+        "checkpoint_name": "Polymarket Public Data Source Probe",
+        "status": probe["gate"]["status"],
+        "started_at_utc": probe["started_at_utc"],
+        "ended_at_utc": probe["ended_at_utc"],
+        "scope": "Bounded public endpoint probe only; no collector, trading, wallet, private endpoint, database, dashboard, or generic multi-market implementation.",
+        "artifacts": artifacts,
+        "validation": {
+            "commands": [
+                {
+                    "command": probe["command"],
+                    "result": "exit_code_0" if probe["gate"]["status"] == "PASS" else "completed",
+                    "summary": probe["gate"]["reason"],
+                }
+            ],
+            "summary": probe["validation_summary"],
+        },
+        "decisions": [
+            {
+                "decision": "Use Gamma events plus market records for discovery instead of adding a generic discovery framework.",
+                "reason": "Checkpoint 2 only needs source identification; Checkpoint 3 can turn this into a small BTC discovery script.",
+            },
+            {
+                "decision": "Use public CLOB market-data endpoints and public Data API trades; exclude authenticated CLOB trade endpoints.",
+                "reason": "Project rules require public data only and no secrets.",
+            },
+        ],
+        "assumptions": [
+            "Gamma market outcomes and clobTokenIds arrays align by index.",
+            "CLOB/websocket order-book timestamps observed as epoch milliseconds should be preserved raw until later normalization confirms semantics.",
+            "Data API public trade timestamps observed as epoch seconds should be preserved raw until later normalization confirms semantics.",
+        ],
+        "fake_progress_risk": probe["fake_progress_risk"],
+        "next_step": probe["next_step"],
+    }
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+
+def build_probe(args: argparse.Namespace) -> dict[str, Any]:
+    started_at = iso_z()
+    now = utc_now()
+    command = " ".join([Path(sys.argv[0]).as_posix(), *sys.argv[1:]]) or str(sys.argv[0])
+
+    requests: dict[str, dict[str, Any]] = {}
+    requests["gamma_events_bitcoin_tag"] = http_json_request(
+        "gamma_events_bitcoin_tag",
+        "GET",
+        f"{GAMMA_BASE}/events",
+        params={
+            "tag_id": 235,
+            "related_tags": True,
+            "active": True,
+            "closed": False,
+            "limit": args.events_limit,
+            "order": "endDate",
+            "ascending": True,
+        },
+        timeout_seconds=args.http_timeout,
+    )
+    requests["gamma_public_search_btc_up_down"] = http_json_request(
+        "gamma_public_search_btc_up_down",
+        "GET",
+        f"{GAMMA_BASE}/public-search",
+        params={
+            "q": "bitcoin up or down",
+            "events_status": "active",
+            "limit_per_type": args.search_limit,
+            "keep_closed_markets": 0,
+            "search_tags": True,
+        },
+        timeout_seconds=args.http_timeout,
+    )
+
+    event_sources: list[dict[str, Any]] = []
+    events_payload = request_json_payload(requests["gamma_events_bitcoin_tag"])
+    if isinstance(events_payload, list):
+        event_sources.extend(events_payload)
+    search_payload = request_json_payload(requests["gamma_public_search_btc_up_down"])
+    if isinstance(search_payload, dict) and isinstance(search_payload.get("events"), list):
+        event_sources.extend(search_payload["events"])
+
+    selected, candidates = select_btc_up_down_market(
+        event_sources, now, args.min_future_lead_seconds
+    )
+    if selected is None:
+        raise RuntimeError("No BTC up/down market candidate with CLOB token IDs found")
+
+    selected_summary = summarize_candidate(selected)
+    market_slug = selected["market"].get("slug") or selected["event"].get("slug")
+    requests["gamma_market_by_slug"] = http_json_request(
+        "gamma_market_by_slug",
+        "GET",
+        f"{GAMMA_BASE}/markets",
+        params={"slug": market_slug},
+        timeout_seconds=args.http_timeout,
+    )
+
+    token_ids = selected["token_ids"]
+    condition_id = selected["market"].get("conditionId")
+    requests["clob_get_book"] = http_json_request(
+        "clob_get_book",
+        "GET",
+        f"{CLOB_BASE}/book",
+        params={"token_id": token_ids[0]},
+        timeout_seconds=args.http_timeout,
+    )
+    requests["clob_post_books"] = http_json_request(
+        "clob_post_books",
+        "POST",
+        f"{CLOB_BASE}/books",
+        json_body=[{"token_id": token_id} for token_id in token_ids[:2]],
+        timeout_seconds=args.http_timeout,
+    )
+    requests["data_api_recent_trades"] = http_json_request(
+        "data_api_recent_trades",
+        "GET",
+        f"{DATA_API_BASE}/trades",
+        params={"market": condition_id, "limit": args.trades_limit, "offset": 0},
+        timeout_seconds=args.http_timeout,
+    )
+
+    if args.skip_websocket:
+        ws_probe = {
+            "name": "clob_market_websocket",
+            "started_at_utc": iso_z(),
+            "ended_at_utc": iso_z(),
+            "duration_ms": 0,
+            "request": {
+                "url": MARKET_WS_URL,
+                "subscription": {
+                    "assets_ids": token_ids[:2],
+                    "type": "market",
+                    "custom_feature_enabled": True,
+                },
+                "max_messages": args.websocket_messages,
+                "timeout_seconds": args.websocket_timeout,
+            },
+            "handshake": {},
+            "messages": [],
+            "message_event_types": [],
+            "ok": False,
+            "error": "Skipped by --skip-websocket",
+        }
+    else:
+        ws_probe = websocket_probe(
+            MARKET_WS_URL,
+            token_ids[:2],
+            timeout_seconds=args.websocket_timeout,
+            max_messages=args.websocket_messages,
+        )
+
+    market_payload = request_json_payload(requests["gamma_market_by_slug"])
+    book_payload = request_json_payload(requests["clob_get_book"])
+    books_payload = request_json_payload(requests["clob_post_books"])
+    trades_payload = request_json_payload(requests["data_api_recent_trades"])
+    validation_summary = {
+        "market_metadata_fetched": bool(
+            requests["gamma_market_by_slug"]["ok"]
+            and isinstance(market_payload, list)
+            and len(market_payload) >= 1
+        ),
+        "single_order_book_fetched": bool(
+            requests["clob_get_book"]["ok"]
+            and isinstance(book_payload, dict)
+            and book_payload.get("asset_id")
+            and isinstance(book_payload.get("bids"), list)
+            and isinstance(book_payload.get("asks"), list)
+        ),
+        "batch_order_books_fetched": bool(
+            requests["clob_post_books"]["ok"]
+            and isinstance(books_payload, list)
+            and len(books_payload) >= 1
+        ),
+        "recent_trades_checked": bool(
+            requests["data_api_recent_trades"]["ok"] and isinstance(trades_payload, list)
+        ),
+        "websocket_checked": bool(ws_probe.get("ok")),
+    }
+    pass_condition_met = (
+        validation_summary["market_metadata_fetched"]
+        and validation_summary["single_order_book_fetched"]
+    )
+    gate_status = "PASS" if pass_condition_met else "FAIL"
+    reason = (
+        "Fetched at least one active market metadata record and one current CLOB order book."
+        if pass_condition_met
+        else "Did not fetch both required active market metadata and current order book evidence."
+    )
+
+    probe: dict[str, Any] = {
+        "schema_name": "polymarket_public_sources_probe",
+        "schema_version": 1,
+        "artifact_status": "valid" if pass_condition_met else "partial",
+        "checkpoint_id": 2,
+        "checkpoint_name": "Polymarket Public Data Source Probe",
+        "started_at_utc": started_at,
+        "ended_at_utc": iso_z(),
+        "command": command,
+        "scope": "Bounded public endpoint probe only; no collector implementation.",
+        "official_sources": OFFICIAL_SOURCES,
+        "selected_market": selected_summary,
+        "candidate_markets_considered": candidates,
+        "requests": requests,
+        "websocket_probe": ws_probe,
+        "validation_summary": validation_summary,
+        "gate": {"status": gate_status, "reason": reason},
+        "fake_progress_risk": "Mistaking one successful short probe for a reliable collector. This checkpoint only proves endpoint availability and payload shape at probe time.",
+        "next_step": "Checkpoint 3: build a small BTC market discovery script that reliably outputs current active BTC up/down markets with condition IDs and both outcome token IDs.",
+    }
+    probe["field_summary"] = top_level_field_summary(requests, ws_probe)
+    probe["endpoint_findings"] = build_endpoint_findings(selected, requests, ws_probe)
+    return probe
+
+
+def write_outputs(args: argparse.Namespace, probe: dict[str, Any]) -> None:
+    args.output_json.parent.mkdir(parents=True, exist_ok=True)
+    args.output_json.write_text(
+        json.dumps(probe, indent=2, sort_keys=True) + "\n", encoding="utf-8"
+    )
+    write_probe_markdown(probe, args.output_markdown)
+    write_checkpoint_report(probe, args.checkpoint_report)
+
+    artifact_paths = [
+        ("scripts/probe_polymarket_public_sources.py", "bounded_probe_script"),
+        (args.output_json.as_posix(), "raw_probe_evidence"),
+        (args.output_markdown.as_posix(), "probe_report"),
+        (args.checkpoint_report.as_posix(), "checkpoint_report"),
+    ]
+    artifacts = []
+    for artifact_path, kind in artifact_paths:
+        path = Path(artifact_path)
+        artifacts.append(
+            {
+                "path": artifact_path,
+                "kind": kind,
+                "status": "valid" if path.exists() and path.stat().st_size > 0 else "missing",
+                "sha256": sha256_file(path) if path.exists() and path.is_file() else None,
+            }
+        )
+    artifacts.append(
+        {
+            "path": args.checkpoint_manifest.as_posix(),
+            "kind": "checkpoint_manifest",
+            "status": "valid",
+        }
+    )
+    write_checkpoint_manifest(probe, args.checkpoint_manifest, artifacts)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Probe public Polymarket data sources for Checkpoint 2."
+    )
+    parser.add_argument("--output-json", type=Path, default=DEFAULT_PROBE_JSON)
+    parser.add_argument("--output-markdown", type=Path, default=DEFAULT_PROBE_MD)
+    parser.add_argument("--checkpoint-report", type=Path, default=DEFAULT_CHECKPOINT_REPORT)
+    parser.add_argument(
+        "--checkpoint-manifest", type=Path, default=DEFAULT_CHECKPOINT_MANIFEST
+    )
+    parser.add_argument("--events-limit", type=int, default=100)
+    parser.add_argument("--search-limit", type=int, default=20)
+    parser.add_argument("--trades-limit", type=int, default=10)
+    parser.add_argument("--http-timeout", type=float, default=15.0)
+    parser.add_argument("--websocket-timeout", type=float, default=8.0)
+    parser.add_argument("--websocket-messages", type=int, default=3)
+    parser.add_argument("--min-future-lead-seconds", type=int, default=60)
+    parser.add_argument("--skip-websocket", action="store_true")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    probe = build_probe(args)
+    write_outputs(args, probe)
+    print(
+        json.dumps(
+            {
+                "status": probe["gate"]["status"],
+                "output_json": args.output_json.as_posix(),
+                "output_markdown": args.output_markdown.as_posix(),
+                "checkpoint_report": args.checkpoint_report.as_posix(),
+                "checkpoint_manifest": args.checkpoint_manifest.as_posix(),
+                "selected_market": probe["selected_market"],
+                "validation_summary": probe["validation_summary"],
+            },
+            indent=2,
+            sort_keys=True,
+        )
+    )
+    return 0 if probe["gate"]["status"] == "PASS" else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/run_polymarket_24h_soak.sh b/scripts/run_polymarket_24h_soak.sh
new file mode 100755
index 0000000..db1da7a
--- /dev/null
+++ b/scripts/run_polymarket_24h_soak.sh
@@ -0,0 +1,362 @@
+#!/usr/bin/env bash
+set -uo pipefail
+
+APP_DIR="${ORDERBOOKS_APP_DIR:-$(pwd)}"
+PYTHON_BIN="${ORDERBOOKS_PYTHON:-python3}"
+RCLONE_BIN="${ORDERBOOKS_RCLONE_BIN:-/usr/bin/rclone}"
+RCLONE_DEST_BASE="${ORDERBOOKS_RCLONE_DEST:-gdrive:orderbooks/polymarket/soak-test}"
+
+SOAK_DATE="${ORDERBOOKS_SOAK_DATE:-$(date -u +%F)}"
+SOAK_ID="${ORDERBOOKS_SOAK_ID:-soak_test_${SOAK_DATE}}"
+SOAK_SECONDS="${ORDERBOOKS_SOAK_SECONDS:-86400}"
+CYCLE_SECONDS="${ORDERBOOKS_SOAK_CYCLE_SECONDS:-300}"
+INTERVAL_SECONDS="${ORDERBOOKS_SOAK_INTERVAL_SECONDS:-30}"
+MARKET_LIMIT="${ORDERBOOKS_SOAK_MARKET_LIMIT:-2}"
+MARKET_END_SAFETY_SECONDS="${ORDERBOOKS_SOAK_MARKET_END_SAFETY_SECONDS:-420}"
+REQUEST_TIMEOUT_SECONDS="${ORDERBOOKS_SOAK_REQUEST_TIMEOUT_SECONDS:-15}"
+MAX_RETRIES="${ORDERBOOKS_SOAK_MAX_RETRIES:-2}"
+BACKOFF_SECONDS="${ORDERBOOKS_SOAK_BACKOFF_SECONDS:-2}"
+DISCOVERY_LIMIT="${ORDERBOOKS_SOAK_DISCOVERY_LIMIT:-100}"
+DISCOVERY_MAX_PAGES="${ORDERBOOKS_SOAK_DISCOVERY_MAX_PAGES:-3}"
+DISCOVERY_TIMEOUT="${ORDERBOOKS_SOAK_DISCOVERY_TIMEOUT:-15}"
+
+LOCAL_ROOT="${ORDERBOOKS_SOAK_LOCAL_ROOT:-data/soak_test/${SOAK_DATE}}"
+MANIFEST_ROOT="${ORDERBOOKS_SOAK_MANIFEST_ROOT:-data/manifests/${SOAK_ID}}"
+START_MANIFEST="${ORDERBOOKS_SOAK_START_MANIFEST:-data/manifests/${SOAK_ID}_start.json}"
+FINAL_MANIFEST="${ORDERBOOKS_SOAK_FINAL_MANIFEST:-data/manifests/${SOAK_ID}_final.json}"
+
+DISCOVERY_DIR="${LOCAL_ROOT}/discovery"
+LIVE_DIR="${LOCAL_ROOT}/live_sample"
+LOG_DIR="${LOCAL_ROOT}/logs"
+PID_FILE="${LOCAL_ROOT}/soak.pid"
+CYCLES_JSONL="${MANIFEST_ROOT}/cycles.jsonl"
+LOG_FILE="${LOG_DIR}/soak.log"
+REMOTE_DEST="${RCLONE_DEST_BASE%/}/${SOAK_DATE}"
+
+STOP_REQUESTED=0
+STOP_SIGNAL=""
+CURRENT_CHILD_PID=""
+CURRENT_PHASE="initializing"
+CURRENT_CYCLE_ID=""
+START_WRITTEN=0
+FINAL_WRITTEN=0
+
+cd "${APP_DIR}" || exit 2
+mkdir -p "${DISCOVERY_DIR}" "${LIVE_DIR}" "${LOG_DIR}" "${MANIFEST_ROOT}" "$(dirname "${START_MANIFEST}")" "$(dirname "${FINAL_MANIFEST}")"
+
+STARTED_AT="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+START_EPOCH="$(date -u +%s)"
+END_EPOCH="$((START_EPOCH + SOAK_SECONDS))"
+EXPECTED_COMPLETION_AT="$(date -u -d "@${END_EPOCH}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || python3 - <<PY
+import datetime as dt
+print(dt.datetime.fromtimestamp(${END_EPOCH}, dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"))
+PY
+)"
+
+safe_log() {
+  printf '%s %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$*" >> "${LOG_FILE}" 2>/dev/null || true
+}
+
+log() {
+  printf '%s %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$*" | tee -a "${LOG_FILE}"
+}
+
+handle_signal() {
+  local signal_name="$1"
+  STOP_REQUESTED=1
+  STOP_SIGNAL="${signal_name}"
+  safe_log "SIGNAL received=${signal_name} phase=${CURRENT_PHASE} cycle_id=${CURRENT_CYCLE_ID:-none}"
+  if [[ -n "${CURRENT_CHILD_PID}" ]] && kill -0 "${CURRENT_CHILD_PID}" 2>/dev/null; then
+    case "${signal_name}" in
+      SIGINT) kill -INT "${CURRENT_CHILD_PID}" 2>/dev/null || true ;;
+      SIGTERM) kill -TERM "${CURRENT_CHILD_PID}" 2>/dev/null || true ;;
+      SIGHUP) kill -HUP "${CURRENT_CHILD_PID}" 2>/dev/null || true ;;
+    esac
+  fi
+}
+
+write_start_manifest() {
+  local tmp_path="${START_MANIFEST}.tmp"
+  python3 - "$tmp_path" "$START_MANIFEST" <<PY
+import json
+import os
+import pathlib
+
+tmp_path = pathlib.Path(os.sys.argv[1])
+final_path = pathlib.Path(os.sys.argv[2])
+manifest = {
+    "schema_name": "soak_test_start_manifest",
+    "schema_version": 1,
+    "checkpoint_id": 8,
+    "checkpoint_name": "24h Soak Test Plan",
+    "status": "STARTED",
+    "started_at_utc": "${STARTED_AT}",
+    "expected_completion_at_utc": "${EXPECTED_COMPLETION_AT}",
+    "soak_seconds": int("${SOAK_SECONDS}"),
+    "cycle_seconds": int("${CYCLE_SECONDS}"),
+    "pid": int("$$"),
+    "pid_file": "${PID_FILE}",
+    "log_file": "${LOG_FILE}",
+    "local_root": "${LOCAL_ROOT}",
+    "manifest_root": "${MANIFEST_ROOT}",
+    "remote_dest": "${REMOTE_DEST}",
+    "raw_output_dir": "${LIVE_DIR}",
+    "discovery_dir": "${DISCOVERY_DIR}",
+    "cycles_jsonl": "${CYCLES_JSONL}",
+    "gate_status": "IN_PROGRESS",
+    "production_ready": False,
+    "notes": [
+        "This is a real 24h soak start marker, not a completion report.",
+        "Checkpoint 8 cannot pass until 24 real hours elapse and final metrics are validated.",
+    ],
+}
+tmp_path.parent.mkdir(parents=True, exist_ok=True)
+tmp_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+os.replace(tmp_path, final_path)
+PY
+  START_WRITTEN=1
+}
+
+write_cycle_record() {
+  local record="$1"
+  printf '%s\n' "${record}" >> "${CYCLES_JSONL}"
+}
+
+write_final_manifest() {
+  local final_status="$1"
+  local gate_status="$2"
+  local exit_reason="$3"
+  local ended_at
+  local tmp_path
+  ended_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+  tmp_path="${FINAL_MANIFEST}.tmp"
+  python3 - "$tmp_path" "$FINAL_MANIFEST" <<PY
+import json
+import os
+import pathlib
+
+tmp_path = pathlib.Path(os.sys.argv[1])
+final_path = pathlib.Path(os.sys.argv[2])
+cycles_path = pathlib.Path("${CYCLES_JSONL}")
+cycles = []
+if cycles_path.exists():
+    cycles = [json.loads(line) for line in cycles_path.read_text(encoding="utf-8").splitlines() if line.strip()]
+manifest = {
+    "schema_name": "soak_test_final_manifest",
+    "schema_version": 1,
+    "checkpoint_id": 8,
+    "checkpoint_name": "24h Soak Test Plan",
+    "status": "${final_status}",
+    "gate_status": "${gate_status}",
+    "exit_reason": "${exit_reason}",
+    "started_at_utc": "${STARTED_AT}",
+    "ended_at_utc": "${ended_at}",
+    "expected_completion_at_utc": "${EXPECTED_COMPLETION_AT}",
+    "soak_seconds": int("${SOAK_SECONDS}"),
+    "cycle_seconds": int("${CYCLE_SECONDS}"),
+    "cycles": cycles,
+    "cycle_count": len(cycles),
+    "ok_cycle_count": sum(1 for cycle in cycles if cycle.get("status") == "OK"),
+    "error_cycle_count": sum(1 for cycle in cycles if cycle.get("status") == "ERROR"),
+    "interrupted_cycle_count": sum(1 for cycle in cycles if cycle.get("status") == "INTERRUPTED"),
+    "pid": int("$$"),
+    "pid_file": "${PID_FILE}",
+    "log_file": "${LOG_FILE}",
+    "local_root": "${LOCAL_ROOT}",
+    "manifest_root": "${MANIFEST_ROOT}",
+    "remote_dest": "${REMOTE_DEST}",
+    "stop_requested": bool(int("${STOP_REQUESTED}")),
+    "stop_signal": "${STOP_SIGNAL}",
+    "current_phase_at_exit": "${CURRENT_PHASE}",
+    "current_cycle_id_at_exit": "${CURRENT_CYCLE_ID}",
+    "production_ready": False,
+    "notes": [
+        "This marker is written by the soak controller on completion, interruption, or error.",
+        "Checkpoint 8 cannot be PASS until 24 real hours elapse and final metrics are validated.",
+    ],
+}
+tmp_path.parent.mkdir(parents=True, exist_ok=True)
+tmp_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+os.replace(tmp_path, final_path)
+PY
+  FINAL_WRITTEN=1
+}
+
+cleanup_on_exit() {
+  local rc=$?
+  if [[ "${START_WRITTEN}" -eq 1 && "${FINAL_WRITTEN}" -eq 0 ]]; then
+    if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
+      write_final_manifest "INTERRUPTED" "INTERRUPTED" "${STOP_SIGNAL:-signal}"
+    elif [[ "${rc}" -ne 0 ]]; then
+      write_final_manifest "ERROR" "ERROR" "exit_code_${rc}"
+    else
+      write_final_manifest "ERROR" "ERROR" "exited_without_final_marker"
+    fi
+  fi
+  if [[ -f "${PID_FILE}" ]] && [[ "$(cat "${PID_FILE}" 2>/dev/null)" == "$$" ]]; then
+    rm -f "${PID_FILE}"
+  fi
+  exit "${rc}"
+}
+
+run_logged() {
+  "$@" >> "${LOG_FILE}" 2>&1 &
+  CURRENT_CHILD_PID="$!"
+  wait "${CURRENT_CHILD_PID}"
+  local rc=$?
+  if [[ "${STOP_REQUESTED}" -eq 1 ]] && kill -0 "${CURRENT_CHILD_PID}" 2>/dev/null; then
+    wait "${CURRENT_CHILD_PID}"
+    rc=$?
+  fi
+  CURRENT_CHILD_PID=""
+  return "${rc}"
+}
+
+trap 'handle_signal SIGINT' INT
+trap 'handle_signal SIGTERM' TERM
+trap 'handle_signal SIGHUP' HUP
+trap cleanup_on_exit EXIT
+
+echo "$$" > "${PID_FILE}"
+write_start_manifest
+test -s "${START_MANIFEST}" || exit 3
+
+log "START soak_id=${SOAK_ID} pid=$$ expected_completion=${EXPECTED_COMPLETION_AT}"
+
+cycle_index=0
+error_seen=0
+while true; do
+  now_epoch="$(date -u +%s)"
+  remaining="$((END_EPOCH - now_epoch))"
+  if [[ "${remaining}" -le 0 ]]; then
+    break
+  fi
+  if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
+    break
+  fi
+  if [[ "${remaining}" -lt 30 ]]; then
+    log "SKIP final tiny remaining window seconds=${remaining}"
+    break
+  fi
+
+  cycle_index="$((cycle_index + 1))"
+  cycle_id="$(date -u +%Y%m%dT%H%M%SZ)"
+  CURRENT_CYCLE_ID="${cycle_id}"
+  run_seconds="${CYCLE_SECONDS}"
+  if [[ "${remaining}" -lt "${run_seconds}" ]]; then
+    run_seconds="${remaining}"
+  fi
+
+  discovery_json="${DISCOVERY_DIR}/polymarket_btc_markets_${cycle_id}.json"
+  discovery_manifest="${DISCOVERY_DIR}/polymarket_btc_markets_manifest_${cycle_id}.json"
+  discovery_markdown="${DISCOVERY_DIR}/polymarket_btc_markets_${cycle_id}.md"
+  collector_manifest="${MANIFEST_ROOT}/collector_${cycle_id}.json"
+  upload_manifest="${MANIFEST_ROOT}/upload_${cycle_id}.json"
+  cycle_started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+
+  log "CYCLE ${cycle_index} start id=${cycle_id} run_seconds=${run_seconds}"
+
+  discovery_exit=0
+  CURRENT_PHASE="discovery"
+  run_logged "${PYTHON_BIN}" scripts/discover_polymarket_btc_markets.py \
+    --output-json "${discovery_json}" \
+    --manifest "${discovery_manifest}" \
+    --markdown "${discovery_markdown}" \
+    --limit "${DISCOVERY_LIMIT}" \
+    --max-pages "${DISCOVERY_MAX_PAGES}" \
+    --timeout "${DISCOVERY_TIMEOUT}" || discovery_exit=$?
+
+  collector_exit=0
+  if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
+    collector_exit=98
+  elif [[ "${discovery_exit}" -eq 0 ]]; then
+    CURRENT_PHASE="collector"
+    run_logged "${PYTHON_BIN}" scripts/collect_polymarket_orderbooks.py \
+      --config config/polymarket_collector.vps.example.yaml \
+      --discovery-path "${discovery_json}" \
+      --output-dir "${LIVE_DIR}" \
+      --manifest-path "${collector_manifest}" \
+      --market-limit "${MARKET_LIMIT}" \
+      --interval-seconds "${INTERVAL_SECONDS}" \
+      --duration-seconds "${run_seconds}" \
+      --request-timeout-seconds "${REQUEST_TIMEOUT_SECONDS}" \
+      --max-retries "${MAX_RETRIES}" \
+      --backoff-seconds "${BACKOFF_SECONDS}" \
+      --market-end-safety-seconds "${MARKET_END_SAFETY_SECONDS}" || collector_exit=$?
+  else
+    collector_exit=99
+  fi
+
+  upload_exit=0
+  if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
+    upload_exit=98
+  elif [[ "${collector_exit}" -eq 0 ]]; then
+    CURRENT_PHASE="upload"
+    run_logged scripts/upload_archive_rclone.sh \
+      --execute \
+      --data-dir "${LOCAL_ROOT}" \
+      --raw-dir "${LIVE_DIR}" \
+      --source-manifest-dir "${MANIFEST_ROOT}" \
+      --manifest-dir "${MANIFEST_ROOT}" \
+      --manifest-path "${upload_manifest}" \
+      --dest "${REMOTE_DEST}" \
+      --min-age-seconds 0 \
+      --rclone-bin "${RCLONE_BIN}" || upload_exit=$?
+  else
+    upload_exit=99
+  fi
+
+  cycle_ended_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+  if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
+    cycle_status="INTERRUPTED"
+  elif [[ "${discovery_exit}" -eq 0 && "${collector_exit}" -eq 0 && "${upload_exit}" -eq 0 ]]; then
+    cycle_status="OK"
+  else
+    cycle_status="ERROR"
+    error_seen=1
+  fi
+
+  record="$(python3 - <<PY
+import json
+print(json.dumps({
+    "cycle_index": ${cycle_index},
+    "cycle_id": "${cycle_id}",
+    "started_at_utc": "${cycle_started_at}",
+    "ended_at_utc": "${cycle_ended_at}",
+    "run_seconds": int("${run_seconds}"),
+    "discovery_manifest": "${discovery_manifest}",
+    "collector_manifest": "${collector_manifest}",
+    "upload_manifest": "${upload_manifest}",
+    "discovery_exit": int("${discovery_exit}"),
+    "collector_exit": int("${collector_exit}"),
+    "upload_exit": int("${upload_exit}"),
+    "status": "${cycle_status}",
+    "stop_signal": "${STOP_SIGNAL}",
+}, sort_keys=True))
+PY
+)"
+  write_cycle_record "${record}"
+  log "CYCLE ${cycle_index} end id=${cycle_id} status=${cycle_status} discovery_exit=${discovery_exit} collector_exit=${collector_exit} upload_exit=${upload_exit}"
+
+  CURRENT_PHASE="sleep"
+  CURRENT_CYCLE_ID=""
+  if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
+    break
+  fi
+  sleep 5 &
+  CURRENT_CHILD_PID="$!"
+  wait "${CURRENT_CHILD_PID}" || true
+  CURRENT_CHILD_PID=""
+done
+
+CURRENT_PHASE="finalizing"
+CURRENT_CYCLE_ID=""
+if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
+  write_final_manifest "INTERRUPTED" "INTERRUPTED" "${STOP_SIGNAL:-signal}"
+elif [[ "${error_seen}" -eq 1 ]]; then
+  write_final_manifest "ERROR" "ERROR" "cycle_error"
+else
+  write_final_manifest "COMPLETED_NEEDS_REVIEW" "NEEDS_REVIEW" "elapsed"
+fi
+
+log "END soak_id=${SOAK_ID} final_manifest=${FINAL_MANIFEST} status_written=1"
diff --git a/scripts/run_polymarket_collector_cycle.sh b/scripts/run_polymarket_collector_cycle.sh
new file mode 100755
index 0000000..33045e2
--- /dev/null
+++ b/scripts/run_polymarket_collector_cycle.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+APP_DIR="${ORDERBOOKS_APP_DIR:-/opt/orderbooks}"
+PYTHON_BIN="${ORDERBOOKS_PYTHON:-${APP_DIR}/.venv/bin/python}"
+DATA_DIR="${ORDERBOOKS_DATA_DIR:-/var/lib/orderbooks}"
+COLLECTOR_CONFIG="${ORDERBOOKS_COLLECTOR_CONFIG:-/etc/orderbooks/polymarket_collector.vps.yaml}"
+
+DISCOVERY_DIR="${ORDERBOOKS_DISCOVERY_DIR:-${DATA_DIR}/discovery}"
+OUTPUT_DIR="${ORDERBOOKS_OUTPUT_DIR:-${DATA_DIR}/raw_orderbooks}"
+MANIFEST_DIR="${ORDERBOOKS_MANIFEST_DIR:-${DATA_DIR}/manifests}"
+
+DISCOVERY_JSON="${ORDERBOOKS_DISCOVERY_JSON:-${DISCOVERY_DIR}/polymarket_btc_markets_latest.json}"
+DISCOVERY_MANIFEST="${ORDERBOOKS_DISCOVERY_MANIFEST:-${DISCOVERY_DIR}/polymarket_btc_markets_manifest.json}"
+DISCOVERY_MARKDOWN="${ORDERBOOKS_DISCOVERY_MARKDOWN:-${DISCOVERY_DIR}/polymarket_btc_markets.md}"
+
+DISCOVERY_LIMIT="${ORDERBOOKS_DISCOVERY_LIMIT:-100}"
+DISCOVERY_MAX_PAGES="${ORDERBOOKS_DISCOVERY_MAX_PAGES:-3}"
+DISCOVERY_TIMEOUT="${ORDERBOOKS_DISCOVERY_TIMEOUT:-15}"
+
+cycle_id="$(date -u +%Y%m%dT%H%M%SZ)"
+COLLECTOR_MANIFEST="${ORDERBOOKS_COLLECTOR_MANIFEST:-${MANIFEST_DIR}/polymarket_orderbook_collector_${cycle_id}.json}"
+
+mkdir -p "${DISCOVERY_DIR}" "${OUTPUT_DIR}" "${MANIFEST_DIR}"
+cd "${APP_DIR}"
+
+"${PYTHON_BIN}" scripts/discover_polymarket_btc_markets.py \
+  --output-json "${DISCOVERY_JSON}" \
+  --manifest "${DISCOVERY_MANIFEST}" \
+  --markdown "${DISCOVERY_MARKDOWN}" \
+  --limit "${DISCOVERY_LIMIT}" \
+  --max-pages "${DISCOVERY_MAX_PAGES}" \
+  --timeout "${DISCOVERY_TIMEOUT}"
+
+exec "${PYTHON_BIN}" scripts/collect_polymarket_orderbooks.py \
+  --config "${COLLECTOR_CONFIG}" \
+  --discovery-path "${DISCOVERY_JSON}" \
+  --output-dir "${OUTPUT_DIR}" \
+  --manifest-path "${COLLECTOR_MANIFEST}"
diff --git a/scripts/run_polymarket_collector_loop.sh b/scripts/run_polymarket_collector_loop.sh
new file mode 100755
index 0000000..671fe4c
--- /dev/null
+++ b/scripts/run_polymarket_collector_loop.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+set -uo pipefail
+
+APP_DIR="${ORDERBOOKS_APP_DIR:-/app}"
+MANIFEST_DIR="${ORDERBOOKS_MANIFEST_DIR:-${ORDERBOOKS_DATA_DIR:-/var/lib/orderbooks}/manifests}"
+LOOP_SLEEP_SECONDS="${ORDERBOOKS_LOOP_SLEEP_SECONDS:-15}"
+STOP_REQUESTED=0
+CHILD_PID=""
+
+utc_compact() {
+  date -u +%Y%m%dT%H%M%SZ
+}
+
+utc_iso() {
+  date -u +%Y-%m-%dT%H:%M:%SZ
+}
+
+write_loop_event() {
+  local status="$1"
+  local exit_code="$2"
+  local message="$3"
+  local path="${MANIFEST_DIR%/}/collector_loop_$(utc_compact).json"
+  mkdir -p "${MANIFEST_DIR}"
+  PYTHONDONTWRITEBYTECODE=1 python3 - "$path" "$status" "$exit_code" "$message" <<'PY_LOOP_EVENT'
+import json
+import sys
+import datetime as dt
+from pathlib import Path
+
+path = Path(sys.argv[1])
+status = sys.argv[2]
+exit_code = int(sys.argv[3])
+message = sys.argv[4]
+now = dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace('+00:00', 'Z')
+path.write_text(json.dumps({
+    'schema_name': 'collector_loop_event',
+    'schema_version': 1,
+    'written_at_utc': now,
+    'status': status,
+    'exit_code': exit_code,
+    'message': message,
+}, indent=2, sort_keys=True) + '\n', encoding='utf-8')
+PY_LOOP_EVENT
+}
+
+request_stop() {
+  STOP_REQUESTED=1
+  if [[ -n "${CHILD_PID}" ]] && kill -0 "${CHILD_PID}" >/dev/null 2>&1; then
+    kill -TERM "${CHILD_PID}" >/dev/null 2>&1 || true
+  fi
+}
+
+trap request_stop INT TERM
+
+mkdir -p "${MANIFEST_DIR}"
+cd "${APP_DIR}" || exit 1
+
+echo "collector loop started at $(utc_iso)"
+
+while [[ "${STOP_REQUESTED}" -eq 0 ]]; do
+  cycle_started="$(utc_iso)"
+  echo "collector cycle starting at ${cycle_started}"
+
+  /bin/bash scripts/run_polymarket_collector_cycle.sh &
+  CHILD_PID="$!"
+  wait "${CHILD_PID}"
+  cycle_exit="$?"
+  CHILD_PID=""
+
+  if [[ "${STOP_REQUESTED}" -ne 0 ]]; then
+    write_loop_event "INTERRUPTED" "${cycle_exit}" "collector loop received stop request during or after cycle"
+    break
+  fi
+
+  if [[ "${cycle_exit}" -ne 0 ]]; then
+    write_loop_event "CYCLE_FAILED" "${cycle_exit}" "collector cycle exited nonzero; loop will continue after sleep"
+    echo "collector cycle failed with exit ${cycle_exit}; continuing after ${LOOP_SLEEP_SECONDS}s" >&2
+  else
+    echo "collector cycle completed at $(utc_iso)"
+  fi
+
+  for ((i = 0; i < LOOP_SLEEP_SECONDS; i++)); do
+    if [[ "${STOP_REQUESTED}" -ne 0 ]]; then
+      break
+    fi
+    sleep 1
+  done
+done
+
+echo "collector loop stopped at $(utc_iso)"
diff --git a/scripts/upload_archive_rclone.sh b/scripts/upload_archive_rclone.sh
new file mode 100755
index 0000000..c43d9a7
--- /dev/null
+++ b/scripts/upload_archive_rclone.sh
@@ -0,0 +1,462 @@
+#!/usr/bin/env bash
+set -uo pipefail
+
+SCRIPT_NAME="orderbooks_rclone_uploader"
+SCRIPT_VERSION="0.1.0"
+
+MODE="dry-run"
+CLEANUP_AFTER_VERIFY=0
+DATA_DIR="${ORDERBOOKS_UPLOAD_DATA_DIR:-${ORDERBOOKS_DATA_DIR:-/var/lib/orderbooks}}"
+RAW_DIR="${ORDERBOOKS_UPLOAD_RAW_DIR:-}"
+SOURCE_MANIFEST_DIR="${ORDERBOOKS_UPLOAD_SOURCE_MANIFEST_DIR:-}"
+MANIFEST_DIR="${ORDERBOOKS_UPLOAD_MANIFEST_DIR:-}"
+MANIFEST_PATH="${ORDERBOOKS_UPLOAD_MANIFEST_PATH:-}"
+DEST="${ORDERBOOKS_RCLONE_DEST:-}"
+RCLONE_BIN="${ORDERBOOKS_RCLONE_BIN:-rclone}"
+MIN_AGE_SECONDS="${ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS:-600}"
+RETENTION_DAYS="${ORDERBOOKS_UPLOAD_RETENTION_DAYS:-7}"
+TRANSFERS="${ORDERBOOKS_RCLONE_TRANSFERS:-4}"
+CHECKERS="${ORDERBOOKS_RCLONE_CHECKERS:-8}"
+
+usage() {
+  cat <<'EOF'
+Usage: scripts/upload_archive_rclone.sh [options]
+
+Uploads closed raw collector archive files and manifests with rclone.
+Default mode is dry-run. Real upload requires --execute and a destination.
+
+Options:
+  --dry-run                  Plan and run rclone copy with --dry-run (default).
+  --execute                  Run real rclone copy and rclone check.
+  --cleanup-after-verify     Delete uploaded local files older than retention only after verification.
+  --data-dir DIR             Base data directory. Default: /var/lib/orderbooks.
+  --raw-dir DIR              Raw collector output directory. Default: DATA_DIR/raw_orderbooks.
+  --source-manifest-dir DIR  Source collector manifest directory. Default: DATA_DIR/manifests.
+  --manifest-dir DIR         Upload manifest output directory. Default: DATA_DIR/manifests.
+  --manifest-path PATH       Exact upload manifest path.
+  --dest REMOTE:PATH         rclone destination. Or set ORDERBOOKS_RCLONE_DEST.
+  --min-age-seconds N        Skip files modified within N seconds. Default: 600.
+  --retention-days N         Keep at least N days locally. Default: 7.
+  --rclone-bin PATH          rclone binary path. Default: rclone.
+  --help                     Show this help.
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --dry-run)
+      MODE="dry-run"
+      shift
+      ;;
+    --execute)
+      MODE="execute"
+      shift
+      ;;
+    --cleanup-after-verify)
+      CLEANUP_AFTER_VERIFY=1
+      shift
+      ;;
+    --data-dir)
+      DATA_DIR="$2"
+      shift 2
+      ;;
+    --raw-dir)
+      RAW_DIR="$2"
+      shift 2
+      ;;
+    --source-manifest-dir)
+      SOURCE_MANIFEST_DIR="$2"
+      shift 2
+      ;;
+    --manifest-dir)
+      MANIFEST_DIR="$2"
+      shift 2
+      ;;
+    --manifest-path)
+      MANIFEST_PATH="$2"
+      shift 2
+      ;;
+    --dest)
+      DEST="$2"
+      shift 2
+      ;;
+    --min-age-seconds)
+      MIN_AGE_SECONDS="$2"
+      shift 2
+      ;;
+    --retention-days)
+      RETENTION_DAYS="$2"
+      shift 2
+      ;;
+    --rclone-bin)
+      RCLONE_BIN="$2"
+      shift 2
+      ;;
+    --help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "Unknown argument: $1" >&2
+      usage >&2
+      exit 2
+      ;;
+  esac
+done
+
+if [[ -z "${RAW_DIR}" ]]; then
+  RAW_DIR="${DATA_DIR%/}/raw_orderbooks"
+fi
+if [[ -z "${SOURCE_MANIFEST_DIR}" ]]; then
+  SOURCE_MANIFEST_DIR="${DATA_DIR%/}/manifests"
+fi
+if [[ -z "${MANIFEST_DIR}" ]]; then
+  MANIFEST_DIR="${DATA_DIR%/}/manifests"
+fi
+
+STARTED_AT="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)"
+if [[ -z "${MANIFEST_PATH}" ]]; then
+  MANIFEST_PATH="${MANIFEST_DIR%/}/upload_archive_${RUN_ID}.json"
+fi
+
+TMPDIR="$(mktemp -d)"
+trap 'rm -rf "${TMPDIR}"' EXIT
+
+PLAN_PATH="${TMPDIR}/plan.json"
+RCLONE_COPY_LOG="${TMPDIR}/rclone_copy.log"
+RCLONE_CHECK_LOG="${TMPDIR}/rclone_check.log"
+CLEANUP_PATH="${TMPDIR}/cleanup.json"
+STAGING_DIR="${TMPDIR}/stage"
+
+mkdir -p "$(dirname "${MANIFEST_PATH}")" "${STAGING_DIR}"
+
+python3 - "$DATA_DIR" "$RAW_DIR" "$SOURCE_MANIFEST_DIR" "$MANIFEST_PATH" "$MIN_AGE_SECONDS" "$STAGING_DIR" "$PLAN_PATH" <<'PY'
+import datetime as dt
+import hashlib
+import json
+import os
+import shutil
+import sys
+from pathlib import Path
+
+data_dir = Path(sys.argv[1])
+raw_dir = Path(sys.argv[2])
+source_manifest_dir = Path(sys.argv[3])
+manifest_path = Path(sys.argv[4]).resolve()
+min_age_seconds = int(sys.argv[5])
+staging_dir = Path(sys.argv[6])
+plan_path = Path(sys.argv[7])
+now = dt.datetime.now(dt.UTC)
+
+def iso_z_from_ts(ts: float) -> str:
+    return dt.datetime.fromtimestamp(ts, dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+def sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+def rel_for(path: Path) -> str:
+    resolved = path.resolve()
+    try:
+        return resolved.relative_to(data_dir.resolve()).as_posix()
+    except ValueError:
+        return resolved.name
+
+def iter_files(root: Path):
+    if not root.exists():
+        return
+    for path in sorted(root.rglob("*")):
+        if path.is_file():
+            yield path
+
+selected = []
+skipped = []
+warnings = []
+seen = set()
+
+for root, kind in [(raw_dir, "raw"), (source_manifest_dir, "manifest")]:
+    if not root.exists():
+        warnings.append(f"{kind} source directory does not exist: {root}")
+        continue
+    for path in iter_files(root):
+        resolved = path.resolve()
+        if resolved in seen:
+            continue
+        seen.add(resolved)
+        rel = rel_for(path)
+        stat = path.stat()
+        age_seconds = max(0, int(now.timestamp() - stat.st_mtime))
+        base = {
+            "local_path": str(path),
+            "relative_path": rel,
+            "kind": kind,
+            "bytes": stat.st_size,
+            "mtime_utc": iso_z_from_ts(stat.st_mtime),
+            "age_seconds": age_seconds,
+        }
+        if resolved == manifest_path:
+            skipped.append({**base, "reason": "current_upload_manifest"})
+            continue
+        if age_seconds < min_age_seconds:
+            skipped.append({**base, "reason": "modified_within_min_age_seconds"})
+            continue
+        checksum = sha256_file(path)
+        staged_path = staging_dir / rel
+        staged_path.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(path, staged_path)
+        selected.append({**base, "sha256": checksum, "staged_path": str(staged_path)})
+
+plan = {
+    "selected_files": selected,
+    "skipped_files": skipped,
+    "warnings": warnings,
+}
+plan_path.write_text(json.dumps(plan, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+PY
+
+RCLONE_AVAILABLE=0
+RCLONE_VERSION=""
+if command -v "${RCLONE_BIN}" >/dev/null 2>&1; then
+  RCLONE_AVAILABLE=1
+  RCLONE_VERSION="$("${RCLONE_BIN}" version 2>/dev/null | head -n 1 || true)"
+fi
+
+DEST_CONFIGURED=0
+if [[ -n "${DEST}" ]]; then
+  DEST_CONFIGURED=1
+fi
+
+COPY_EXIT_CODE=""
+CHECK_EXIT_CODE=""
+COPY_ATTEMPTED=0
+CHECK_ATTEMPTED=0
+OPERATION_STATUS="PLANNED"
+GATE_STATUS="BLOCKED_REAL_UPLOAD"
+
+if [[ "${DEST_CONFIGURED}" -eq 0 ]]; then
+  OPERATION_STATUS="BLOCKED_DEST_MISSING"
+elif [[ "${RCLONE_AVAILABLE}" -eq 0 ]]; then
+  OPERATION_STATUS="BLOCKED_RCLONE_UNAVAILABLE"
+else
+  COPY_ATTEMPTED=1
+  copy_args=(copy "${STAGING_DIR}/" "${DEST%/}/" --checksum --transfers "${TRANSFERS}" --checkers "${CHECKERS}")
+  if [[ "${MODE}" == "dry-run" ]]; then
+    copy_args+=(--dry-run)
+  fi
+  "${RCLONE_BIN}" "${copy_args[@]}" >"${RCLONE_COPY_LOG}" 2>&1
+  COPY_EXIT_CODE=$?
+  if [[ "${COPY_EXIT_CODE}" -eq 0 && "${MODE}" == "dry-run" ]]; then
+    OPERATION_STATUS="DRY_RUN_PASS"
+  elif [[ "${COPY_EXIT_CODE}" -eq 0 ]]; then
+    CHECK_ATTEMPTED=1
+    "${RCLONE_BIN}" check "${STAGING_DIR}/" "${DEST%/}/" --one-way --checksum >"${RCLONE_CHECK_LOG}" 2>&1
+    CHECK_EXIT_CODE=$?
+    if [[ "${CHECK_EXIT_CODE}" -eq 0 ]]; then
+      OPERATION_STATUS="UPLOAD_VERIFIED"
+      GATE_STATUS="PASS"
+    else
+      OPERATION_STATUS="VERIFY_FAILED"
+      GATE_STATUS="FAIL"
+    fi
+  else
+    OPERATION_STATUS="COPY_FAILED"
+    GATE_STATUS="FAIL"
+  fi
+fi
+
+python3 - "$PLAN_PATH" "$CLEANUP_PATH" "$MODE" "$CLEANUP_AFTER_VERIFY" "$RETENTION_DAYS" "$OPERATION_STATUS" "$GATE_STATUS" <<'PY'
+import datetime as dt
+import json
+import sys
+from pathlib import Path
+
+plan_path = Path(sys.argv[1])
+cleanup_path = Path(sys.argv[2])
+mode = sys.argv[3]
+cleanup_after_verify = sys.argv[4] == "1"
+retention_days = int(sys.argv[5])
+operation_status = sys.argv[6]
+gate_status = sys.argv[7]
+plan = json.loads(plan_path.read_text())
+now = dt.datetime.now(dt.UTC)
+cutoff = now - dt.timedelta(days=retention_days)
+retained = []
+deleted = []
+
+if mode == "execute" and cleanup_after_verify and operation_status == "UPLOAD_VERIFIED":
+    for item in plan["selected_files"]:
+        path = Path(item["local_path"])
+        mtime = dt.datetime.fromtimestamp(path.stat().st_mtime, dt.UTC) if path.exists() else now
+        if mtime < cutoff and path.exists():
+            path.unlink()
+            deleted.append({**item, "deleted_at_utc": now.replace(microsecond=0).isoformat().replace("+00:00", "Z")})
+        else:
+            retained.append({**item, "reason": "within_retention_window" if mtime >= cutoff else "missing_before_cleanup"})
+else:
+    reason = "cleanup_not_requested"
+    if mode != "execute":
+        reason = "dry_run"
+    elif operation_status != "UPLOAD_VERIFIED":
+        reason = "not_verified"
+    for item in plan["selected_files"]:
+        retained.append({**item, "reason": reason})
+
+cleanup_path.write_text(
+    json.dumps({"retained_local_files": retained, "deleted_local_files": deleted}, indent=2, sort_keys=True) + "\n",
+    encoding="utf-8",
+)
+PY
+
+ENDED_AT="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+
+export SCRIPT_NAME SCRIPT_VERSION STARTED_AT ENDED_AT
+export MODE OPERATION_STATUS GATE_STATUS
+export RCLONE_BIN RCLONE_AVAILABLE RCLONE_VERSION DEST
+export COPY_ATTEMPTED CHECK_ATTEMPTED COPY_EXIT_CODE CHECK_EXIT_CODE
+export DATA_DIR RAW_DIR SOURCE_MANIFEST_DIR MIN_AGE_SECONDS RETENTION_DAYS CLEANUP_AFTER_VERIFY
+
+python3 - "$PLAN_PATH" "$CLEANUP_PATH" "$MANIFEST_PATH" <<'PY'
+import json
+import os
+import sys
+from pathlib import Path
+
+plan = json.loads(Path(sys.argv[1]).read_text())
+cleanup = json.loads(Path(sys.argv[2]).read_text())
+manifest_path = Path(sys.argv[3])
+
+mode = os.environ["MODE"]
+operation_status = os.environ["OPERATION_STATUS"]
+gate_status = os.environ["GATE_STATUS"]
+copy_attempted = os.environ["COPY_ATTEMPTED"] == "1"
+check_attempted = os.environ["CHECK_ATTEMPTED"] == "1"
+copy_exit_code = os.environ["COPY_EXIT_CODE"]
+check_exit_code = os.environ["CHECK_EXIT_CODE"]
+dest = os.environ["DEST"]
+
+def public_item(item):
+    public = dict(item)
+    public.pop("staged_path", None)
+    return public
+
+selected = [public_item(item) for item in plan["selected_files"]]
+skipped = [public_item(item) for item in plan["skipped_files"]]
+retained_local = [public_item(item) for item in cleanup["retained_local_files"]]
+deleted_local = [public_item(item) for item in cleanup["deleted_local_files"]]
+attempted_files = selected if copy_attempted else []
+uploaded_files = selected if mode == "execute" and operation_status in {"UPLOAD_VERIFIED", "VERIFY_FAILED"} else []
+verified_files = selected if mode == "execute" and operation_status == "UPLOAD_VERIFIED" else []
+dry_run_files = selected if mode == "dry-run" and operation_status == "DRY_RUN_PASS" else []
+
+manifest = {
+    "schema_name": "upload_archive_manifest",
+    "schema_version": 1,
+    "checkpoint_id": 7,
+    "checkpoint_name": "Google Drive Offload",
+    "uploader": {
+        "name": os.environ["SCRIPT_NAME"],
+        "version": os.environ["SCRIPT_VERSION"],
+    },
+    "started_at_utc": os.environ["STARTED_AT"],
+    "ended_at_utc": os.environ["ENDED_AT"],
+    "command_mode": mode,
+    "operation_status": operation_status,
+    "gate_status": gate_status,
+    "rclone": {
+        "binary": os.environ["RCLONE_BIN"],
+        "available": os.environ["RCLONE_AVAILABLE"] == "1",
+        "version": os.environ["RCLONE_VERSION"],
+        "destination_configured": bool(dest),
+        "destination": dest if dest else None,
+        "copy_attempted": copy_attempted,
+        "copy_exit_code": int(copy_exit_code) if copy_exit_code else None,
+        "check_attempted": check_attempted,
+        "check_exit_code": int(check_exit_code) if check_exit_code else None,
+    },
+    "config": {
+        "data_dir": os.environ["DATA_DIR"],
+        "raw_dir": os.environ["RAW_DIR"],
+        "source_manifest_dir": os.environ["SOURCE_MANIFEST_DIR"],
+        "manifest_path": str(manifest_path),
+        "min_age_seconds": int(os.environ["MIN_AGE_SECONDS"]),
+        "retention_days": int(os.environ["RETENTION_DAYS"]),
+        "cleanup_after_verify": os.environ["CLEANUP_AFTER_VERIFY"] == "1",
+    },
+    "planned_files": selected,
+    "attempted_files": attempted_files,
+    "dry_run_files": dry_run_files,
+    "uploaded_files": uploaded_files,
+    "verified_files": verified_files,
+    "skipped_open_or_recent_files": [
+        item for item in skipped if item.get("reason") == "modified_within_min_age_seconds"
+    ],
+    "skipped_files": skipped,
+    "retained_local_files": retained_local,
+    "deleted_local_files": deleted_local,
+    "counts": {
+        "planned": len(selected),
+        "attempted": len(attempted_files),
+        "dry_run": len(dry_run_files),
+        "uploaded": len(uploaded_files),
+        "verified": len(verified_files),
+        "skipped": len(skipped),
+        "retained_local": len(retained_local),
+        "deleted_local": len(deleted_local),
+    },
+    "warnings": plan["warnings"],
+    "known_gaps": [
+        "A dry-run does not prove remote write access.",
+        "Real upload requires a configured rclone remote outside the repository.",
+        "Local files are retained unless --cleanup-after-verify is used after successful verification.",
+    ],
+}
+
+if operation_status == "BLOCKED_RCLONE_UNAVAILABLE":
+    manifest["warnings"].append("rclone binary was not available; copy and verification were not attempted.")
+if operation_status == "BLOCKED_DEST_MISSING":
+    manifest["warnings"].append("No rclone destination was configured; set --dest or ORDERBOOKS_RCLONE_DEST.")
+if mode == "dry-run":
+    manifest["warnings"].append("Dry-run mode does not perform a real upload; checkpoint real-upload gate remains blocked.")
+
+manifest_path.parent.mkdir(parents=True, exist_ok=True)
+manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+print(
+    json.dumps(
+        {
+            "gate_status": gate_status,
+            "operation_status": operation_status,
+            "manifest_path": str(manifest_path),
+            "planned_files": len(selected),
+            "attempted_files": len(attempted_files),
+            "uploaded_files": len(uploaded_files),
+            "verified_files": len(verified_files),
+            "skipped_files": len(plan["skipped_files"]),
+        },
+        indent=2,
+        sort_keys=True,
+    )
+)
+PY
+
+case "${OPERATION_STATUS}" in
+  UPLOAD_VERIFIED|DRY_RUN_PASS)
+    exit 0
+    ;;
+  BLOCKED_DEST_MISSING)
+    echo "No rclone destination configured. Set --dest or ORDERBOOKS_RCLONE_DEST." >&2
+    exit 2
+    ;;
+  BLOCKED_RCLONE_UNAVAILABLE)
+    echo "rclone is not available. Install rclone before running dry-run or execute mode." >&2
+    exit 3
+    ;;
+  *)
+    echo "Upload operation failed with status: ${OPERATION_STATUS}" >&2
+    exit 1
+    ;;
+esac
diff --git a/scripts/vps_preflight_check.sh b/scripts/vps_preflight_check.sh
new file mode 100755
index 0000000..741b102
--- /dev/null
+++ b/scripts/vps_preflight_check.sh
@@ -0,0 +1,285 @@
+#!/usr/bin/env bash
+set -uo pipefail
+
+APP_DIR="$(pwd)"
+PYTHON_BIN="${ORDERBOOKS_PYTHON:-python3}"
+RCLONE_BIN="${ORDERBOOKS_RCLONE_BIN:-rclone}"
+RCLONE_REMOTE="${ORDERBOOKS_RCLONE_DEST:-}"
+DATA_DIR=""
+MANIFEST_DIR=""
+LOG_DIR=""
+MIN_FREE_GIB="${ORDERBOOKS_PREFLIGHT_MIN_FREE_GIB:-5}"
+REMOTE_TIMEOUT_SECONDS="${ORDERBOOKS_PREFLIGHT_REMOTE_TIMEOUT_SECONDS:-30}"
+
+FAILURES=0
+WARNINGS=0
+
+usage() {
+  cat <<'EOF'
+Usage: scripts/vps_preflight_check.sh [options]
+
+Read-only VPS cutover preflight for the Polymarket order-book collector.
+
+Default behavior checks the repository, local tooling, unit syntax, disk space,
+and rclone availability. It does not print rclone config and does not require
+secrets.
+
+Options:
+  --app-dir DIR              Repository checkout path. Default: current directory.
+  --python-bin PATH          Python interpreter. Default: ORDERBOOKS_PYTHON or python3.
+  --rclone-bin PATH          rclone binary. Default: ORDERBOOKS_RCLONE_BIN or rclone.
+  --rclone-remote REMOTE     Optional remote/path to check read-only, e.g. gdrive:orderbooks/polymarket.
+  --data-dir DIR             Optional target data directory to create/check writable.
+  --manifest-dir DIR         Optional target manifest directory to create/check writable.
+  --log-dir DIR              Optional target log directory to create/check writable.
+  --min-free-gib N           Minimum free GiB for checked filesystems. Default: 5.
+  --remote-timeout-seconds N Timeout for rclone remote read check. Default: 30.
+  --help                     Show this help.
+
+Directory options intentionally create missing directories before checking
+writability. Omit them for a repo-only read-only check.
+EOF
+}
+
+log_pass() { printf 'PASS %s\n' "$*"; }
+log_info() { printf 'INFO %s\n' "$*"; }
+log_warn() { WARNINGS=$((WARNINGS + 1)); printf 'WARN %s\n' "$*"; }
+log_fail() { FAILURES=$((FAILURES + 1)); printf 'FAIL %s\n' "$*"; }
+run_quiet() { "$@" >/dev/null 2>&1; }
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --app-dir) APP_DIR="$2"; shift 2 ;;
+    --python-bin) PYTHON_BIN="$2"; shift 2 ;;
+    --rclone-bin) RCLONE_BIN="$2"; shift 2 ;;
+    --rclone-remote) RCLONE_REMOTE="$2"; shift 2 ;;
+    --data-dir) DATA_DIR="$2"; shift 2 ;;
+    --manifest-dir) MANIFEST_DIR="$2"; shift 2 ;;
+    --log-dir) LOG_DIR="$2"; shift 2 ;;
+    --min-free-gib) MIN_FREE_GIB="$2"; shift 2 ;;
+    --remote-timeout-seconds) REMOTE_TIMEOUT_SECONDS="$2"; shift 2 ;;
+    --help) usage; exit 0 ;;
+    *) log_fail "unknown argument: $1"; usage >&2; exit 2 ;;
+  esac
+done
+
+APP_DIR="${APP_DIR%/}"
+if [[ ! -d "${APP_DIR}" ]]; then
+  log_fail "app directory does not exist: ${APP_DIR}"
+  printf 'SUMMARY failures=%s warnings=%s\n' "${FAILURES}" "${WARNINGS}"
+  exit 1
+fi
+
+cd "${APP_DIR}" || {
+  log_fail "could not cd to app directory: ${APP_DIR}"
+  printf 'SUMMARY failures=%s warnings=%s\n' "${FAILURES}" "${WARNINGS}"
+  exit 1
+}
+
+check_python() {
+  if command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
+    version="$("${PYTHON_BIN}" --version 2>&1 || true)"
+    log_pass "python available: ${PYTHON_BIN} (${version})"
+  else
+    log_fail "python not found: ${PYTHON_BIN}"
+  fi
+}
+
+check_required_files() {
+  local missing=0 file
+  local required=(
+    "scripts/discover_polymarket_btc_markets.py"
+    "scripts/collect_polymarket_orderbooks.py"
+    "scripts/normalize_polymarket_orderbooks.py"
+    "scripts/run_polymarket_collector_cycle.sh"
+    "scripts/upload_archive_rclone.sh"
+    "scripts/vps_runtime_smoke_check.sh"
+    "config/polymarket_collector.vps.example.yaml"
+    "docs/VPS_DEPLOYMENT.md"
+    "docs/GOOGLE_DRIVE_OFFLOAD.md"
+    "systemd/polymarket-orderbook-collector.service"
+    "systemd/polymarket-orderbook-uploader.service"
+    "systemd/polymarket-orderbook-uploader.timer"
+  )
+  for file in "${required[@]}"; do
+    if [[ -f "${file}" ]]; then
+      log_pass "required file exists: ${file}"
+    else
+      missing=1
+      log_fail "required file missing: ${file}"
+    fi
+  done
+  return "${missing}"
+}
+
+check_python_compile() {
+  if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
+    log_fail "cannot compile Python scripts because Python is missing"
+    return
+  fi
+  if run_quiet "${PYTHON_BIN}" - <<'PY'
+from pathlib import Path
+
+paths = [
+    Path("scripts/discover_polymarket_btc_markets.py"),
+    Path("scripts/collect_polymarket_orderbooks.py"),
+    Path("scripts/normalize_polymarket_orderbooks.py"),
+]
+for path in paths:
+    source = path.read_text(encoding="utf-8")
+    compile(source, str(path), "exec")
+PY
+  then
+    log_pass "collector/discovery/normalization Python scripts compile without bytecode writes"
+  else
+    log_fail "Python no-bytecode compile check failed"
+  fi
+}
+
+check_shell_syntax() {
+  local failed=0 script
+  for script in scripts/*.sh; do
+    [[ -f "${script}" ]] || continue
+    if bash -n "${script}" >/dev/null 2>&1; then
+      log_pass "bash syntax ok: ${script}"
+    else
+      failed=1
+      log_fail "bash syntax failed: ${script}"
+    fi
+  done
+  return "${failed}"
+}
+
+check_systemd_units() {
+  local units=(
+    "systemd/polymarket-orderbook-collector.service"
+    "systemd/polymarket-orderbook-uploader.service"
+    "systemd/polymarket-orderbook-uploader.timer"
+  )
+  if command -v systemd-analyze >/dev/null 2>&1; then
+    if systemd-analyze verify "${units[@]}" >/dev/null 2>&1; then
+      log_pass "systemd units parse with systemd-analyze"
+    else
+      log_fail "systemd-analyze verify failed for one or more units"
+    fi
+  else
+    log_warn "systemd-analyze unavailable; skipped unit parse check"
+  fi
+}
+
+remote_name_from_dest() {
+  local dest="$1"
+  case "${dest}" in
+    *:*) printf '%s:\n' "${dest%%:*}" ;;
+    *) printf '\n' ;;
+  esac
+}
+
+run_with_timeout() {
+  if command -v timeout >/dev/null 2>&1; then
+    timeout "${REMOTE_TIMEOUT_SECONDS}" "$@"
+  else
+    "$@"
+  fi
+}
+
+check_rclone() {
+  if [[ -x "${RCLONE_BIN}" ]] || command -v "${RCLONE_BIN}" >/dev/null 2>&1; then
+    version="$("${RCLONE_BIN}" version 2>/dev/null | head -n 1 || true)"
+    log_pass "rclone available: ${RCLONE_BIN} (${version})"
+  else
+    log_fail "rclone not found: ${RCLONE_BIN}"
+    return
+  fi
+
+  if [[ -z "${RCLONE_REMOTE}" ]]; then
+    log_info "no rclone remote provided; skipped remote access check"
+    return
+  fi
+
+  local remote_name
+  remote_name="$(remote_name_from_dest "${RCLONE_REMOTE}")"
+  if [[ -z "${remote_name}" ]]; then
+    log_fail "rclone remote must include a remote name ending in ':': ${RCLONE_REMOTE}"
+    return
+  fi
+
+  if "${RCLONE_BIN}" listremotes 2>/dev/null | grep -Fxq "${remote_name}"; then
+    log_pass "rclone remote is configured: ${remote_name}"
+  else
+    log_fail "rclone remote is not configured or not visible to this user: ${remote_name}"
+    return
+  fi
+
+  if run_with_timeout "${RCLONE_BIN}" lsf --max-depth 1 "${RCLONE_REMOTE}" >/dev/null 2>&1; then
+    log_pass "rclone remote read check succeeded without printing config: ${RCLONE_REMOTE}"
+  else
+    log_fail "rclone remote read check failed or timed out: ${RCLONE_REMOTE}"
+  fi
+}
+
+check_target_dir() {
+  local label="$1" path="$2"
+  if [[ -z "${path}" ]]; then
+    log_info "no ${label} directory provided; skipped create/write check"
+    return
+  fi
+  if mkdir -p "${path}" >/dev/null 2>&1 && [[ -d "${path}" && -w "${path}" ]]; then
+    log_pass "${label} directory exists and is writable: ${path}"
+  else
+    log_fail "${label} directory cannot be created or is not writable: ${path}"
+  fi
+}
+
+check_disk_free() {
+  local target="$1" label="$2" available_kib min_kib
+  if [[ ! -e "${target}" ]]; then
+    log_warn "disk target does not exist, skipping ${label}: ${target}"
+    return
+  fi
+  available_kib="$(df -Pk "${target}" | awk 'NR==2 {print $4}')"
+  min_kib=$((MIN_FREE_GIB * 1024 * 1024))
+  if [[ -n "${available_kib}" && "${available_kib}" -ge "${min_kib}" ]]; then
+    log_pass "disk free ok for ${label}: available_kib=${available_kib} min_gib=${MIN_FREE_GIB}"
+  else
+    log_fail "disk free below threshold for ${label}: available_kib=${available_kib:-unknown} min_gib=${MIN_FREE_GIB}"
+  fi
+}
+
+check_secret_requirements() {
+  local files=(
+    "config/polymarket_collector.vps.example.yaml"
+    "systemd/polymarket-orderbook-collector.service"
+    "systemd/polymarket-orderbook-uploader.service"
+    "systemd/polymarket-orderbook-uploader.timer"
+    "scripts/run_polymarket_collector_cycle.sh"
+    "scripts/upload_archive_rclone.sh"
+  )
+  if grep -E -i '(api[_-]?key|private[_-]?key|mnemonic|wallet|password|client[_-]?secret|access[_-]?token|refresh[_-]?token)' "${files[@]}" >/dev/null 2>&1; then
+    log_fail "secret-like credential requirement found in runtime config, units, or scripts"
+  else
+    log_pass "no API keys, private keys, mnemonics, wallets, or passwords are required by runtime files"
+  fi
+  log_info "rclone credentials, if used, must remain machine-local outside the repository"
+}
+
+check_python
+check_required_files
+check_python_compile
+check_shell_syntax
+check_systemd_units
+check_rclone
+check_target_dir "data" "${DATA_DIR}"
+check_target_dir "manifest" "${MANIFEST_DIR}"
+check_target_dir "log" "${LOG_DIR}"
+check_disk_free "." "repository"
+if [[ -n "${DATA_DIR}" && -d "${DATA_DIR}" ]]; then
+  check_disk_free "${DATA_DIR}" "data directory"
+fi
+check_secret_requirements
+
+printf 'SUMMARY failures=%s warnings=%s\n' "${FAILURES}" "${WARNINGS}"
+if [[ "${FAILURES}" -eq 0 ]]; then
+  exit 0
+fi
+exit 1
diff --git a/scripts/vps_runtime_smoke_check.sh b/scripts/vps_runtime_smoke_check.sh
new file mode 100755
index 0000000..dc7982d
--- /dev/null
+++ b/scripts/vps_runtime_smoke_check.sh
@@ -0,0 +1,279 @@
+#!/usr/bin/env bash
+set -uo pipefail
+
+APP_DIR="${ORDERBOOKS_APP_DIR:-/opt/orderbooks}"
+DATA_DIR="${ORDERBOOKS_DATA_DIR:-/var/lib/orderbooks}"
+RAW_DIR="${ORDERBOOKS_OUTPUT_DIR:-${DATA_DIR}/raw_orderbooks}"
+MANIFEST_DIR="${ORDERBOOKS_MANIFEST_DIR:-${DATA_DIR}/manifests}"
+COLLECTOR_SERVICE="${ORDERBOOKS_COLLECTOR_SERVICE:-polymarket-orderbook-collector.service}"
+UPLOADER_SERVICE="${ORDERBOOKS_UPLOADER_SERVICE:-polymarket-orderbook-uploader.service}"
+WAIT_SECONDS="${ORDERBOOKS_SMOKE_WAIT_SECONDS:-900}"
+RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)"
+EVIDENCE_PATH="${ORDERBOOKS_SMOKE_EVIDENCE_PATH:-${MANIFEST_DIR}/vps_runtime_smoke_${RUN_ID}.json}"
+PYTHON_BIN="${ORDERBOOKS_PYTHON:-python3}"
+
+usage() {
+  cat <<'EOF'
+Usage: scripts/vps_runtime_smoke_check.sh [options]
+
+Run on the VPS after installing collector/uploader systemd units. The check
+records durable JSON evidence, forces one collector service restart, verifies
+old raw gzip files still parse and keep their checksum, waits for a later valid
+collector cycle, then starts the uploader service and records upload evidence.
+
+Options:
+  --app-dir DIR             App checkout. Default: /opt/orderbooks.
+  --data-dir DIR            Data root. Default: /var/lib/orderbooks.
+  --raw-dir DIR             Raw output dir. Default: DATA_DIR/raw_orderbooks.
+  --manifest-dir DIR        Manifest dir. Default: DATA_DIR/manifests.
+  --collector-service NAME  systemd collector service name.
+  --uploader-service NAME   systemd uploader service name.
+  --wait-seconds N          Max wait for valid cycles. Default: 900.
+  --evidence-path PATH      JSON evidence output path.
+  --help                    Show this help.
+
+This script does not delete raw files or manifests. Failures are written to the
+evidence JSON and should be preserved for review.
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --app-dir) APP_DIR="$2"; shift 2 ;;
+    --data-dir) DATA_DIR="$2"; RAW_DIR="${ORDERBOOKS_OUTPUT_DIR:-$2/raw_orderbooks}"; MANIFEST_DIR="${ORDERBOOKS_MANIFEST_DIR:-$2/manifests}"; shift 2 ;;
+    --raw-dir) RAW_DIR="$2"; shift 2 ;;
+    --manifest-dir) MANIFEST_DIR="$2"; shift 2 ;;
+    --collector-service) COLLECTOR_SERVICE="$2"; shift 2 ;;
+    --uploader-service) UPLOADER_SERVICE="$2"; shift 2 ;;
+    --wait-seconds) WAIT_SECONDS="$2"; shift 2 ;;
+    --evidence-path) EVIDENCE_PATH="$2"; shift 2 ;;
+    --help) usage; exit 0 ;;
+    *) echo "Unknown argument: $1" >&2; usage >&2; exit 2 ;;
+  esac
+done
+
+mkdir -p "$(dirname "${EVIDENCE_PATH}")"
+
+PYTHONDONTWRITEBYTECODE=1 "${PYTHON_BIN}" - "$APP_DIR" "$DATA_DIR" "$RAW_DIR" "$MANIFEST_DIR" "$COLLECTOR_SERVICE" "$UPLOADER_SERVICE" "$WAIT_SECONDS" "$EVIDENCE_PATH" <<'PY_SMOKE'
+import datetime as dt
+import gzip
+import hashlib
+import json
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+app_dir = Path(sys.argv[1])
+data_dir = Path(sys.argv[2])
+raw_dir = Path(sys.argv[3])
+manifest_dir = Path(sys.argv[4])
+collector_service = sys.argv[5]
+uploader_service = sys.argv[6]
+wait_seconds = int(sys.argv[7])
+evidence_path = Path(sys.argv[8])
+started = dt.datetime.now(dt.UTC).replace(microsecond=0)
+checks = []
+failures = []
+
+
+def iso_now():
+    return dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace('+00:00', 'Z')
+
+
+def run(command):
+    proc = subprocess.run(command, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    item = {
+        'command': command,
+        'exit_code': proc.returncode,
+        'stdout_tail': proc.stdout[-4000:],
+        'stderr_tail': proc.stderr[-4000:],
+        'ran_at_utc': iso_now(),
+    }
+    checks.append(item)
+    return item
+
+
+def sha256(path):
+    digest = hashlib.sha256()
+    with path.open('rb') as handle:
+        for chunk in iter(lambda: handle.read(1024 * 1024), b''):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def parse_raw(path):
+    rows = 0
+    first_keys = []
+    with gzip.open(path, 'rt', encoding='utf-8') as handle:
+        for line in handle:
+            if not line.strip():
+                continue
+            obj = json.loads(line)
+            if rows == 0:
+                first_keys = sorted(obj.keys())
+            rows += 1
+    return rows, first_keys
+
+
+def collector_manifests():
+    if not manifest_dir.exists():
+        return []
+    return sorted(manifest_dir.glob('polymarket_orderbook_collector_*.json'), key=lambda path: path.stat().st_mtime)
+
+
+def validate_collector(path):
+    manifest = json.loads(path.read_text(encoding='utf-8'))
+    output_files = []
+    for item in manifest.get('output_files', []):
+        raw_path = Path(item['path'])
+        rows, first_keys = parse_raw(raw_path)
+        actual_sha = sha256(raw_path)
+        output_files.append({
+            'path': str(raw_path),
+            'bytes': raw_path.stat().st_size,
+            'manifest_rows': item.get('rows'),
+            'rows_parsed': rows,
+            'row_count_matches_manifest': rows == item.get('rows'),
+            'manifest_sha256': item.get('sha256'),
+            'actual_sha256': actual_sha,
+            'sha256_matches_manifest': actual_sha == item.get('sha256'),
+            'first_row_keys': first_keys,
+            'under_raw_dir': raw_path.resolve().is_relative_to(raw_dir.resolve()),
+            'uses_live_sample_path': 'live_sample' in raw_path.parts,
+        })
+    valid = (
+        manifest.get('gate_status') == 'PASS'
+        and manifest.get('rows_written', 0) > 0
+        and manifest.get('failure_count') == 0
+        and not manifest.get('failures')
+        and bool(output_files)
+        and all(item['rows_parsed'] > 0 and item['row_count_matches_manifest'] and item['sha256_matches_manifest'] and item['under_raw_dir'] and not item['uses_live_sample_path'] for item in output_files)
+    )
+    return {
+        'path': str(path),
+        'manifest': manifest,
+        'output_files': output_files,
+        'valid': valid,
+    }
+
+
+def latest_valid_after(after_mtime=0):
+    deadline = time.time() + wait_seconds
+    last_error = None
+    while time.time() <= deadline:
+        for path in reversed(collector_manifests()):
+            if path.stat().st_mtime <= after_mtime:
+                continue
+            try:
+                result = validate_collector(path)
+            except Exception as exc:
+                last_error = str(exc)
+                continue
+            if result['valid']:
+                return result
+            last_error = f"latest candidate invalid: {path}"
+        time.sleep(10)
+    raise TimeoutError(last_error or f'no valid collector manifest after mtime {after_mtime}')
+
+
+def latest_upload_after(after_mtime=0):
+    candidates = sorted(manifest_dir.glob('upload_archive_*.json'), key=lambda path: path.stat().st_mtime)
+    candidates = [path for path in candidates if path.stat().st_mtime >= after_mtime]
+    if not candidates:
+        raise FileNotFoundError('no upload_archive_*.json manifest found after uploader run')
+    path = candidates[-1]
+    manifest = json.loads(path.read_text(encoding='utf-8'))
+    verified_count = manifest.get('counts', {}).get('verified', len(manifest.get('verified_files', [])))
+    return {
+        'path': str(path),
+        'manifest': manifest,
+        'verified_count': verified_count,
+        'valid': manifest.get('operation_status') == 'UPLOAD_VERIFIED' and manifest.get('gate_status') == 'PASS' and manifest.get('rclone', {}).get('copy_exit_code') == 0 and manifest.get('rclone', {}).get('check_exit_code') == 0 and verified_count > 0,
+    }
+
+summary = {
+    'schema_name': 'vps_runtime_smoke_result',
+    'schema_version': 1,
+    'started_at_utc': started.isoformat().replace('+00:00', 'Z'),
+    'ended_at_utc': None,
+    'gate_status': 'ERROR',
+    'production_ready': False,
+    'app_dir': str(app_dir),
+    'data_dir': str(data_dir),
+    'raw_dir': str(raw_dir),
+    'manifest_dir': str(manifest_dir),
+    'collector_service': collector_service,
+    'uploader_service': uploader_service,
+    'wait_seconds': wait_seconds,
+    'checks': checks,
+    'failures': failures,
+}
+
+try:
+    active = run(['systemctl', 'is-active', collector_service])
+    if active['exit_code'] != 0:
+        failures.append('collector service is not active under systemd')
+        raise RuntimeError('collector service not active')
+
+    before = latest_valid_after(0)
+    before_mtime = Path(before['path']).stat().st_mtime
+    old_raw = before['output_files'][0]
+    old_raw_sha = old_raw['actual_sha256']
+    old_raw_path = Path(old_raw['path'])
+
+    restart = run(['systemctl', 'restart', collector_service])
+    if restart['exit_code'] != 0:
+        failures.append('collector service restart command failed')
+        raise RuntimeError('restart failed')
+    active_after = run(['systemctl', 'is-active', collector_service])
+    if active_after['exit_code'] != 0:
+        failures.append('collector service is not active after restart')
+        raise RuntimeError('collector inactive after restart')
+
+    after = latest_valid_after(before_mtime)
+    old_rows_after, _ = parse_raw(old_raw_path)
+    old_file_unchanged = sha256(old_raw_path) == old_raw_sha and old_rows_after == old_raw['rows_parsed']
+    if not old_file_unchanged:
+        failures.append('raw file from before restart changed or stopped parsing')
+
+    upload_start_mtime = time.time()
+    upload_run = run(['systemctl', 'start', uploader_service])
+    if upload_run['exit_code'] != 0:
+        failures.append('uploader service start failed')
+    try:
+        upload = latest_upload_after(upload_start_mtime - 2)
+        if not upload.get('valid'):
+            failures.append('uploader did not produce a verified upload manifest with at least one verified file')
+    except Exception as exc:
+        upload = {'path': None, 'valid': False, 'error': str(exc)}
+        failures.append(str(exc))
+
+    collector_logs = run(['journalctl', '-u', collector_service, '-n', '80', '--no-pager'])
+    uploader_logs = run(['journalctl', '-u', uploader_service, '-n', '80', '--no-pager'])
+
+    summary.update({
+        'before_restart_collector': before,
+        'after_restart_collector': after,
+        'old_raw_file_unchanged_after_restart': old_file_unchanged,
+        'upload_result': upload,
+        'collector_log_check_exit_code': collector_logs['exit_code'],
+        'uploader_log_check_exit_code': uploader_logs['exit_code'],
+    })
+    if after['valid'] and old_file_unchanged and upload.get('valid') and not failures:
+        summary['gate_status'] = 'PASS'
+    else:
+        summary['gate_status'] = 'FAIL'
+except Exception as exc:
+    failures.append(str(exc))
+    summary['exception'] = repr(exc)
+finally:
+    summary['ended_at_utc'] = iso_now()
+    evidence_path.parent.mkdir(parents=True, exist_ok=True)
+    evidence_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + '\n', encoding='utf-8')
+
+print(f"SMOKE_EVIDENCE={evidence_path}")
+print(f"SMOKE_GATE={summary['gate_status']}")
+if summary['gate_status'] != 'PASS':
+    sys.exit(1)
+PY_SMOKE
diff --git a/systemd/polymarket-orderbook-collector.service b/systemd/polymarket-orderbook-collector.service
new file mode 100644
index 0000000..37e5519
--- /dev/null
+++ b/systemd/polymarket-orderbook-collector.service
@@ -0,0 +1,38 @@
+[Unit]
+Description=Polymarket raw order-book collector cycle
+Documentation=file:/opt/orderbooks/docs/VPS_DEPLOYMENT.md
+After=network-online.target
+Wants=network-online.target
+StartLimitIntervalSec=10min
+StartLimitBurst=20
+
+[Service]
+Type=simple
+User=orderbooks
+Group=orderbooks
+WorkingDirectory=/opt/orderbooks
+Environment=PYTHONUNBUFFERED=1
+Environment=ORDERBOOKS_APP_DIR=/opt/orderbooks
+Environment=ORDERBOOKS_DATA_DIR=/var/lib/orderbooks
+Environment=ORDERBOOKS_OUTPUT_DIR=/var/lib/orderbooks/raw_orderbooks
+Environment=ORDERBOOKS_PYTHON=/opt/orderbooks/.venv/bin/python
+Environment=ORDERBOOKS_COLLECTOR_CONFIG=/etc/orderbooks/polymarket_collector.vps.yaml
+EnvironmentFile=-/etc/orderbooks/polymarket-orderbook-collector.env
+ExecStart=/bin/bash /opt/orderbooks/scripts/run_polymarket_collector_cycle.sh
+Restart=always
+RestartSec=30s
+TimeoutStopSec=90s
+KillSignal=SIGTERM
+KillMode=control-group
+StandardOutput=journal
+StandardError=journal
+SyslogIdentifier=polymarket-orderbook-collector
+NoNewPrivileges=true
+PrivateTmp=true
+ProtectSystem=strict
+ProtectHome=true
+ReadWritePaths=/var/lib/orderbooks
+StateDirectory=orderbooks
+
+[Install]
+WantedBy=multi-user.target
diff --git a/systemd/polymarket-orderbook-uploader.service b/systemd/polymarket-orderbook-uploader.service
new file mode 100644
index 0000000..a88f910
--- /dev/null
+++ b/systemd/polymarket-orderbook-uploader.service
@@ -0,0 +1,29 @@
+[Unit]
+Description=Orderbooks archive upload via rclone
+Documentation=file:/opt/orderbooks/docs/GOOGLE_DRIVE_OFFLOAD.md
+After=network-online.target
+Wants=network-online.target
+
+[Service]
+Type=oneshot
+User=orderbooks
+Group=orderbooks
+WorkingDirectory=/opt/orderbooks
+Environment=ORDERBOOKS_UPLOAD_DATA_DIR=/var/lib/orderbooks
+Environment=ORDERBOOKS_UPLOAD_MANIFEST_DIR=/var/lib/orderbooks/manifests
+Environment=ORDERBOOKS_UPLOAD_RAW_DIR=/var/lib/orderbooks/raw_orderbooks
+Environment=ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS=600
+Environment=ORDERBOOKS_UPLOAD_RETENTION_DAYS=7
+Environment=ORDERBOOKS_RCLONE_BIN=/usr/bin/rclone
+EnvironmentFile=-/etc/orderbooks/orderbook-uploader.env
+ExecStart=/bin/bash /opt/orderbooks/scripts/upload_archive_rclone.sh --execute
+StandardOutput=journal
+StandardError=journal
+SyslogIdentifier=polymarket-orderbook-uploader
+NoNewPrivileges=true
+PrivateTmp=true
+ProtectSystem=strict
+ProtectHome=true
+ReadWritePaths=/var/lib/orderbooks
+StateDirectory=orderbooks
+
diff --git a/systemd/polymarket-orderbook-uploader.timer b/systemd/polymarket-orderbook-uploader.timer
new file mode 100644
index 0000000..18ef329
--- /dev/null
+++ b/systemd/polymarket-orderbook-uploader.timer
@@ -0,0 +1,12 @@
+[Unit]
+Description=Run orderbooks archive upload periodically
+Documentation=file:/opt/orderbooks/docs/GOOGLE_DRIVE_OFFLOAD.md
+
+[Timer]
+OnCalendar=hourly
+RandomizedDelaySec=10min
+Persistent=true
+Unit=polymarket-orderbook-uploader.service
+
+[Install]
+WantedBy=timers.target