Add websocket recorder canary deployment

This commit is contained in:
philipp 2026-04-19 19:17:56 +02:00
parent 9fc523c0a7
commit 0d86f56514
20 changed files with 4428 additions and 15 deletions

View file

@ -1,9 +1,9 @@
name: deploy
# Pushes are intentionally non-deploying for the websocket canary work.
# Use workflow_dispatch for the broad/full deploy path, or
# scripts/deploy/deploy_ws_canary_kaniko.sh for the canary-only path.
on:
push:
branches:
- main
workflow_dispatch:
jobs:
@ -14,7 +14,7 @@ jobs:
REGISTRY_HOST: ${{ vars.REGISTRY_HOST }}
PROJECT_NAME: ${{ vars.PROJECT_NAME || 'orderbooks' }}
PROJECT_NAMESPACE: ${{ vars.PROJECT_NAMESPACE || 'orderbooks' }}
PROJECT_DEPLOYMENTS: ${{ vars.PROJECT_DEPLOYMENTS || 'orderbooks-collector' }}
PROJECT_DEPLOYMENTS: ${{ vars.PROJECT_DEPLOYMENTS || 'orderbooks-collector,orderbooks-ws-recorder' }}
PROJECT_REGISTRY_SECRET_NAME: ${{ vars.PROJECT_REGISTRY_SECRET_NAME || 'orderbooks-registry-creds' }}
REPO_CLONE_URL: ${{ github.server_url }}/${{ github.repository }}.git
steps:

View file

@ -0,0 +1,38 @@
# Example config for the long-running Polymarket BTC websocket recorder.
# Public market data only. No API keys, private keys, wallets, or trading.
# Discovery may be refreshed by running the existing public discovery script.
discovery_path: /var/lib/orderbooks/discovery/polymarket_btc_markets_latest.json
discovery_dir: /var/lib/orderbooks/discovery
discovery_script_path: scripts/discover_polymarket_btc_markets.py
discovery_execute: true
discovery_refresh_interval_seconds: 600
discovery_max_pages: 3
discovery_page_limit: 100
# Runtime output. Raw websocket and REST checkpoint archives intentionally live
# under raw_orderbooks so the existing uploader can offload them.
raw_output_root: /var/lib/orderbooks/raw_orderbooks
manifest_dir: /var/lib/orderbooks/manifests
manifest_path: /var/lib/orderbooks/manifests/polymarket_ws_recorder_latest.json
websocket_url: wss://ws-subscriptions-clob.polymarket.com/ws/market
clob_books_url: https://clob.polymarket.com/books
# 0 means all active BTC Up/Down markets. Use a positive value only for bounded
# local smoke tests or emergency load reduction.
market_limit: 0
market_end_safety_seconds: 420
rest_checkpoint_interval_seconds: 60
rest_batch_size: 50
top_n: 10
stale_feed_threshold_seconds: 30
request_timeout_seconds: 15
websocket_timeout_seconds: 10
reconnect_backoff_seconds: 3
max_reconnect_backoff_seconds: 60
manifest_write_interval_seconds: 300
# Continuous by default. Set via CLI or env ORDERBOOKS_WS_DURATION_SECONDS for smoke tests.
duration_seconds: null

View file

@ -0,0 +1,20 @@
# Example config for a bounded Polymarket BTC websocket sample.
# Flat YAML only; no secrets are required.
discovery_path: data/discovery/polymarket_btc_markets_latest.json
output_root: data/ws_sample
manifest_path: data/manifests/checkpoint_010b_ws_raw_sample.json
report_path: reports/checkpoints/checkpoint_010b_ws_raw_sample.md
# Keep the default sample conservative; CLI can raise this to all discovered markets.
market_limit: 2
duration_seconds: 150
rest_checkpoint_interval_seconds: 30
request_timeout_seconds: 15
websocket_timeout_seconds: 15
max_reconnects: 2
reconnect_backoff_seconds: 3
market_end_safety_seconds: 420
websocket_url: wss://ws-subscriptions-clob.polymarket.com/ws/market
clob_books_url: https://clob.polymarket.com/books

View file

@ -23,3 +23,33 @@ data:
request_timeout_seconds: 15
max_retries: 2
backoff_seconds: 2
polymarket_ws_collector.yaml: |
discovery_path: /var/lib/orderbooks/discovery/polymarket_btc_markets_latest.json
discovery_dir: /var/lib/orderbooks/discovery
discovery_script_path: scripts/discover_polymarket_btc_markets.py
discovery_execute: true
discovery_refresh_interval_seconds: 600
discovery_max_pages: 3
discovery_page_limit: 100
raw_output_root: /var/lib/orderbooks/raw_orderbooks
manifest_dir: /var/lib/orderbooks/manifests
manifest_path: /var/lib/orderbooks/manifests/polymarket_ws_recorder_latest.json
websocket_url: wss://ws-subscriptions-clob.polymarket.com/ws/market
clob_books_url: https://clob.polymarket.com/books
# Canary safety cap. The recorder script and example config default to 0/all.
# Remove this cap only after PVC sizing and upload cleanup are reviewed.
market_limit: 2
market_end_safety_seconds: 420
rest_checkpoint_interval_seconds: 60
rest_batch_size: 50
top_n: 10
stale_feed_threshold_seconds: 30
request_timeout_seconds: 15
websocket_timeout_seconds: 10
reconnect_backoff_seconds: 3
max_reconnect_backoff_seconds: 60
manifest_write_interval_seconds: 60
duration_seconds: null

View file

@ -40,6 +40,7 @@ spec:
- /bin/bash
- /app/scripts/upload_archive_rclone.sh
- --execute
- --cleanup-after-verify
env:
- name: ORDERBOOKS_DATA_DIR
value: /var/lib/orderbooks
@ -54,7 +55,7 @@ spec:
- name: ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS
value: "600"
- name: ORDERBOOKS_UPLOAD_RETENTION_DAYS
value: "7"
value: "3"
- name: ORDERBOOKS_RCLONE_BIN
value: /usr/bin/rclone
- name: ORDERBOOKS_RCLONE_DEST

View file

@ -0,0 +1,78 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: orderbooks-ws-recorder
namespace: orderbooks
labels:
app.kubernetes.io/name: orderbooks
app.kubernetes.io/part-of: orderbooks
app.kubernetes.io/component: ws-recorder
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/name: orderbooks
app.kubernetes.io/component: ws-recorder
template:
metadata:
labels:
app.kubernetes.io/name: orderbooks
app.kubernetes.io/part-of: orderbooks
app.kubernetes.io/component: ws-recorder
spec:
terminationGracePeriodSeconds: 180
imagePullSecrets:
- name: orderbooks-registry-creds
securityContext:
runAsNonRoot: true
runAsUser: 10001
runAsGroup: 10001
fsGroup: 10001
fsGroupChangePolicy: OnRootMismatch
containers:
- name: ws-recorder
image: registry.doran.133011.xyz/orderbooks:bootstrap
imagePullPolicy: IfNotPresent
command:
- /bin/bash
- /app/scripts/run_polymarket_ws_recorder_loop.sh
env:
- name: ORDERBOOKS_APP_DIR
value: /app
- name: ORDERBOOKS_PYTHON
value: python3
- name: ORDERBOOKS_DATA_DIR
value: /var/lib/orderbooks
- name: ORDERBOOKS_WS_COLLECTOR_CONFIG
value: /etc/orderbooks/polymarket_ws_collector.yaml
volumeMounts:
- name: orderbooks-data
mountPath: /var/lib/orderbooks
- name: collector-config
mountPath: /etc/orderbooks/polymarket_ws_collector.yaml
subPath: polymarket_ws_collector.yaml
readOnly: true
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: "1"
memory: 1Gi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
volumes:
- name: orderbooks-data
persistentVolumeClaim:
claimName: orderbooks-data
- name: collector-config
configMap:
name: orderbooks-collector-config
items:
- key: polymarket_ws_collector.yaml
path: polymarket_ws_collector.yaml

View file

@ -6,4 +6,5 @@ resources:
- configmap.yaml
- pvc.yaml
- deployment-collector.yaml
- deployment-ws-recorder.yaml
- cronjob-uploader.yaml

View file

@ -0,0 +1,30 @@
# Book Reconstruction Method
Checkpoint 10C reconstructs order-book state from raw Polymarket market websocket messages captured in Checkpoint 10B.
## Source Of Truth
Raw websocket and REST checkpoint gzip JSONL files are immutable source evidence. Reconstruction outputs are derived and reference the input file paths, line numbers, websocket message sequence spans, and REST checkpoint sequences.
## Applied Events
- `book` and `book_without_event_type` messages initialize or replace the full per-token bid/ask maps.
- `price_change` messages are applied after initialization. Observed `side=BUY` updates bids and `side=SELL` updates asks.
- Observed `size=0` is treated as level removal. Non-zero size replaces the level size at that price.
- `best_bid_ask`, `last_trade_price`, and unrelated `new_market` messages are preserved and counted but do not mutate the book map.
## Comparison
For each REST checkpoint, the reconstructor compares REST `/books` payloads with local websocket state after applying all websocket messages received at or before the REST checkpoint receive time. The comparison includes best bid, best ask, spread, bid/ask level counts, and top 10 levels by default.
## Limits
The sample is short and network timing can produce REST-vs-websocket divergences. Divergence rows include raw websocket and REST references so follow-up can inspect whether differences are timing, feed semantics, or reconstruction defects.
## Checkpoint 10C Divergence Result
The accepted 10C sample produced 20 REST comparison rows: 8 exact top-10 matches and 12 divergent rows. In every divergent row, best bid, best ask, spread, level counts, and top-N price membership matched. The observed divergences were size-only deltas within shared top-N price levels.
Size-only divergence still matters. It can change depth, fillability assumptions, queue-size estimates, and any later answer about whether a hypothetical trade was observable and reproducible from the archived feed.
This result is useful evidence for the websocket path, but it is not production readiness. The sample is bounded, the timing relationship between REST checkpoints and websocket delivery is imperfect, and long-running reconnect, stale-feed, rotation, upload, and alert behavior still need their own checkpoint before deployment.

View file

@ -33,15 +33,25 @@ manifests: /var/lib/orderbooks/manifests
discovery: /var/lib/orderbooks/discovery
```
The collector uses one Deployment with one replica. The container runs
`/app/scripts/run_polymarket_collector_loop.sh`, which repeatedly executes the
existing bounded collector cycle and records loop failure/interruption manifests
instead of relying on Kubernetes crash loops for normal operation.
The REST snapshot collector uses one Deployment with one replica. The container
runs `/app/scripts/run_polymarket_collector_loop.sh`, which repeatedly executes
the existing bounded collector cycle and records loop failure/interruption
manifests instead of relying on Kubernetes crash loops for normal operation.
The websocket recorder canary uses a separate Deployment named
`orderbooks-ws-recorder`. It runs `/app/scripts/run_polymarket_ws_recorder_loop.sh`
and does not replace or stop `orderbooks-collector`. It writes raw websocket
archives under `/var/lib/orderbooks/raw_orderbooks/polymarket/ws_raw/`, REST
checkpoint archives under `/var/lib/orderbooks/raw_orderbooks/polymarket/rest_checkpoints/`,
and runtime manifests under `/var/lib/orderbooks/manifests/`.
The uploader uses one CronJob. It runs the existing rclone uploader in execute
mode, mounts the same PVC, mounts `orderbooks-rclone-config` read-only at
`/etc/rclone/rclone.conf`, sets `RCLONE_CONFIG` to that file, and uploads only
closed/aged files.
`/etc/rclone/rclone.conf`, sets `RCLONE_CONFIG` to that file, uploads only
closed/aged files, skips `.open`/temporary writer files, and uses
`--cleanup-after-verify`. Local cleanup is allowed only after rclone copy and
check succeed. The Kubernetes retention setting is 3 days because websocket raw
capture is materially larger than REST snapshots and the current PVC is 10Gi.
## Bootstrap This App Repo
@ -73,7 +83,7 @@ runner pattern:
3. create an in-cluster Kaniko Job;
4. build and push `REGISTRY_HOST/orderbooks:<git-sha>`;
5. apply `deploy/k8s/base` with the built image;
6. wait for `deployment/orderbooks-collector` rollout.
6. wait for `deployment/orderbooks-collector` and `deployment/orderbooks-ws-recorder` rollout.
Required Forgejo repo secret:
@ -92,20 +102,88 @@ Project defaults used by the workflow:
```text
PROJECT_NAME=orderbooks
PROJECT_NAMESPACE=orderbooks
PROJECT_DEPLOYMENTS=orderbooks-collector
PROJECT_DEPLOYMENTS=orderbooks-collector,orderbooks-ws-recorder
PROJECT_REGISTRY_SECRET_NAME=orderbooks-registry-creds
```
The registry pull/build secret `orderbooks-registry-creds` must exist in the
`orderbooks` namespace before the workflow builds and deploys.
Pushes to `main` are intentionally non-deploying during the websocket canary
work. `workflow_dispatch` remains the broad release path and may roll both
Deployments listed in `PROJECT_DEPLOYMENTS`. Do not use that broad workflow for
websocket-only canary evidence.
## Websocket Canary-Only Deploy Path
Checkpoint 10D1 uses `scripts/deploy/deploy_ws_canary_kaniko.sh` for the
websocket canary. The helper builds an image from the committed Forgejo `main`
SHA with an in-cluster Kaniko Job, then applies only:
```text
namespace.yaml
configmap.yaml
pvc.yaml
cronjob-uploader.yaml
deployment-ws-recorder.yaml
```
It does not apply `deployment-collector.yaml`, does not set the
`orderbooks-collector` image, and waits only for
`deployment/orderbooks-ws-recorder`. Validate the scoped apply set first:
```sh
KUBECONFIG=../nuri/unrip3/.state/hetzner/kubeconfig.yaml \
scripts/deploy/deploy_ws_canary_kaniko.sh --server-dry-run
```
After a clean source-only commit has been pushed to Forgejo `main`, deploy the
canary with:
```sh
KUBECONFIG=../nuri/unrip3/.state/hetzner/kubeconfig.yaml \
scripts/deploy/deploy_ws_canary_kaniko.sh --git-ref "$(git rev-parse HEAD)"
```
The helper writes compact deploy evidence under
`data/manifests/ws_canary_deploy_<UTC_TIMESTAMP>.json`.
## Websocket Recorder Canary
Checkpoint 10D adds the websocket recorder as a canary, not as a replacement for
the REST snapshot collector. The canary subscribes to public Polymarket market
websocket messages for active BTC Up/Down token IDs, preserves every websocket
text payload exactly in `raw_text`, and keeps periodic REST `/books` checkpoints
for recovery and divergence evidence.
The script and example config default to `market_limit: 0`, which means all
discovered active BTC Up/Down markets. The Kubernetes canary config currently
sets `market_limit: 2` and `manifest_write_interval_seconds: 60` as explicit
smoke/safety settings. The 10D local bounded run
wrote about 3.35 MB of compressed websocket data in two minutes for two markets;
running all active BTC markets on the current 10Gi PVC needs a separate sizing
or retention decision before removing the cap. Do not use a cap silently in
production evidence.
Raw/current file safety:
- completed archives end in `.jsonl.gz`;
- the recorder writes current gzip files with a hidden `.open` name and renames
them only after close;
- the uploader skips `.open`, `.tmp`, and `.partial` files;
- verified cleanup deletes local files only after rclone verification succeeds.
## Pre-Deploy Validation
From this repository:
```sh
bash -n scripts/run_polymarket_collector_loop.sh
bash -n scripts/run_polymarket_ws_recorder_loop.sh
bash -n scripts/k8s_runtime_smoke_check.sh
bash -n scripts/k8s_ws_runtime_smoke_check.sh
python -m py_compile scripts/collect_polymarket_ws_orderbooks.py
kubectl kustomize deploy/k8s/base
KUBECONFIG=../nuri/unrip3/.state/hetzner/kubeconfig.yaml kubectl apply -k deploy/k8s/base --dry-run=server
KUBECONFIG=../nuri/unrip3/.state/hetzner/kubeconfig.yaml kubectl -n orderbooks get secret orderbooks-rclone-config -o go-template='{{if index .data "rclone.conf"}}rclone_secret_key_present{{else}}rclone_secret_key_missing{{end}}{{"\n"}}'
@ -146,3 +224,23 @@ manifests, raw files, upload manifests, and pod logs for review.
- No dashboard, database, strategy, backtest, or second-market connector.
- No websocket rewrite.
- No rclone config contents in this repository.
## Websocket Canary Smoke Gate
After the canary image is deployed and has run long enough to close at least one
websocket and REST checkpoint archive, run:
```sh
KUBECONFIG=../nuri/unrip3/.state/hetzner/kubeconfig.yaml scripts/k8s_ws_runtime_smoke_check.sh --namespace orderbooks --deployment orderbooks-ws-recorder --rest-deployment orderbooks-collector --cronjob orderbooks-uploader --wait-seconds 900 --upload-min-age-seconds 600
```
The smoke gate verifies the websocket pod is running, raw websocket gzip JSONL
parses, REST checkpoint gzip JSONL parses, manifests expose reconnect/stale and
divergence counters, pod deletion/restart does not corrupt the prior closed raw
file or produces a SIGTERM-closed archive when no prior closed file exists, a
later pod writes new data, and the existing REST collector remains healthy. For
upload evidence it creates a one-off uploader Job from the deployed image and
same PVC/secret with `ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS=0`, then verifies the
upload manifest has `UPLOAD_VERIFIED`, `gate_status: PASS`, and at least one
verified websocket recorder raw or REST checkpoint file. Production CronJob
upload min age remains 600 seconds.

View file

@ -0,0 +1,121 @@
# Polymarket Websocket Sample Recorder
This document describes the bounded Checkpoint 10B sample path. It is separate from the live Kubernetes REST collector and does not replace it.
## Scope
The recorder captures public Polymarket market websocket messages for active BTC up/down outcome tokens and writes REST `/books` checkpoints during the same run. It does not trade, sign requests, use private keys, require API keys, or handle private account data.
## Discovery
Run the existing discovery first so token IDs are current:
```bash
python scripts/discover_polymarket_btc_markets.py
```
The recorder reads `data/discovery/polymarket_btc_markets_latest.json`, selects active BTC up/down markets, and preserves `market_slug`, `condition_id`, `token_id`, `outcome`, and `end_time_utc` in every raw websocket envelope.
## Sample Run
Default bounded run:
```bash
python scripts/record_polymarket_ws_sample.py --config config/polymarket_ws_sample.example.yaml
```
Useful overrides:
```bash
python scripts/record_polymarket_ws_sample.py --market-limit 2 --duration-seconds 150 --rest-checkpoint-interval-seconds 30
```
The default endpoint is:
```text
wss://ws-subscriptions-clob.polymarket.com/ws/market
```
The subscription body is:
```json
{"assets_ids":["<token_id>"],"type":"market","custom_feature_enabled":true}
```
For multiple tokens, `assets_ids` contains all selected Up/Down token IDs.
## Raw Websocket Output
Websocket text messages are written as gzip JSONL under:
```text
data/ws_sample/polymarket/ws_raw/<run_id>/polymarket_ws_raw_<run_id>.jsonl.gz
```
Each row preserves the raw text payload in `raw_text`, plus parsed JSON in `json` when parsing succeeds. Unknown message shapes are retained and counted in the manifest.
Important envelope fields include:
- `received_at_utc`
- `session_id`
- `connection_sequence`
- `message_sequence`
- `global_message_sequence`
- `websocket.url`
- `subscription.assets_ids`
- `tokens_tracked`
- `opcode`
- `payload_length_bytes`
- `payload_sha256`
- `raw_text`
- `json`
- `json_error`
- `classified_event_types`
## REST Checkpoints
REST checkpoints are written as gzip JSONL under:
```text
data/ws_sample/polymarket/rest_checkpoints/<run_id>/polymarket_rest_checkpoints_<run_id>.jsonl.gz
```
Each row records one POST to:
```text
https://clob.polymarket.com/books
```
The request body contains the same token IDs as the websocket subscription. The response JSON is preserved in `response.raw_response_json`, with safe response headers only. Secret-bearing headers are not recorded.
## Manifest And Gate
The checkpoint manifest is:
```text
data/manifests/checkpoint_010b_ws_raw_sample.json
```
The report is:
```text
reports/checkpoints/checkpoint_010b_ws_raw_sample.md
```
`WS_RAW_SAMPLE_PASS` requires at least one selected BTC market with both outcome tokens, at least one parseable websocket text message, at least two successful REST checkpoints, parseable gzip JSONL outputs, and checksum summaries.
If the websocket connects but no market messages arrive, the recorder must gate as `WS_RAW_SAMPLE_NEEDS_REVIEW` rather than pretending the websocket path is proven.
## Checkpoint 10D Runtime Direction
The long-running runtime recorder is `scripts/collect_polymarket_ws_orderbooks.py`.
It is separate from the bounded 10B sample script. The runtime recorder is
intended to run as `orderbooks-ws-recorder` beside the existing REST collector.
It preserves raw websocket messages under `raw_orderbooks/polymarket/ws_raw/`,
keeps REST `/books` checkpoints under `raw_orderbooks/polymarket/rest_checkpoints/`,
rotates closed gzip archives hourly, writes manifests under `/var/lib/orderbooks/manifests`,
and records reconnect, stale-feed, REST failure, parser, and divergence counters.
Current gzip files use hidden `.open` names until closed. The uploader skips
open/temporary files and deletes local archives only when `--cleanup-after-verify`
is used after rclone verification succeeds.

View file

@ -0,0 +1,49 @@
# Polymarket Websocket Schema Observed In Checkpoint 10B
This document summarizes observed public market websocket message shapes from the bounded 10B BTC sample. It does not include full raw payload dumps; raw payloads remain in the gzip JSONL sample files.
## Observed Event Types
### best_bid_ask
Count: `338`
Observed top-level fields: `asset_id, best_ask, best_bid, event_type, market, spread, timestamp`
Best quote summary; counted but not applied to level maps.
### book
Count: `314`
Observed top-level fields: `asks, asset_id, bids, event_type, hash, last_trade_price, market, tick_size, timestamp`
Nested level/change fields: `price, size`
Full per-token book snapshot used to initialize or replace local state.
### last_trade_price
Count: `155`
Observed top-level fields: `asset_id, event_type, fee_rate_bps, market, price, side, size, timestamp, transaction_hash`
Trade print summary; counted but not applied to level maps.
### new_market
Count: `1`
Observed top-level fields: `active, assets_ids, clob_token_ids, condition_id, description, event_message, event_type, fee_schedule, fees_enabled, game_start_time, group_item_title, id, line, market, order_price_min_tick_size, outcomes, question, slug, sports_market_type, tags, taker_base_fee, timestamp`
Market metadata broadcast; preserved and counted but unrelated to selected BTC token state in this sample.
### price_change
Count: `7771`
Observed top-level fields: `event_type, market, price_changes, timestamp`
Nested level/change fields: `asset_id, best_ask, best_bid, hash, price, side, size`
Incremental price/size updates applied after a token has an initialized book.

View file

@ -0,0 +1,523 @@
#!/usr/bin/env python3
"""Analyze Checkpoint 10C REST-vs-websocket divergence rows.
This is an offline evidence tool for Checkpoint 10D0. It reads existing raw
websocket, REST checkpoint, and comparison artifacts. It does not contact
Kubernetes or Polymarket and does not modify raw inputs.
"""
from __future__ import annotations
import argparse
import datetime as dt
import gzip
import hashlib
import json
from bisect import bisect_right
from collections import Counter
from pathlib import Path
from typing import Any
ANALYZER_NAME = "polymarket_ws_divergence_analyzer"
ANALYZER_VERSION = "0.1.0"
DEFAULT_10B_MANIFEST = Path("data/manifests/checkpoint_010b_ws_raw_sample.json")
DEFAULT_10C_MANIFEST = Path("data/manifests/checkpoint_010c_book_reconstruction_sample.json")
DEFAULT_10BC_MANIFEST = Path("data/manifests/checkpoint_010bc_full_fidelity_sample_and_reconstruction.json")
DEFAULT_ORCHESTRATOR_REVIEW = Path("data/manifests/checkpoint_010bc_orchestrator_review.json")
DEFAULT_OUTPUT_MANIFEST = Path("data/manifests/checkpoint_010d0_ws_divergence_analysis.json")
DEFAULT_OUTPUT_REPORT = Path("reports/checkpoints/checkpoint_010d0_ws_divergence_analysis.md")
def utc_now() -> dt.datetime:
return dt.datetime.now(dt.UTC)
def iso_z(value: dt.datetime | None = None) -> str:
value = value or utc_now()
return value.astimezone(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
def parse_iso(value: str | None) -> dt.datetime | None:
if not value:
return None
text = value[:-1] + "+00:00" if value.endswith("Z") else value
try:
parsed = dt.datetime.fromisoformat(text)
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=dt.UTC)
return parsed.astimezone(dt.UTC)
def sha256_file(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def read_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def read_gzip_jsonl(path: Path) -> list[tuple[int, dict[str, Any]]]:
rows: list[tuple[int, dict[str, Any]]] = []
with gzip.open(path, "rt", encoding="utf-8") as handle:
for line_number, line in enumerate(handle, 1):
if line.strip():
rows.append((line_number, json.loads(line)))
return rows
def summarize_input(path: Path, kind: str) -> dict[str, Any]:
return {
"path": path.as_posix(),
"kind": kind,
"bytes": path.stat().st_size,
"sha256": sha256_file(path),
}
def raw_items(row: dict[str, Any]) -> list[dict[str, Any]]:
payload = row.get("json")
items = payload if isinstance(payload, list) else [payload]
return [item for item in items if isinstance(item, dict)]
def classify_event(item: dict[str, Any]) -> str:
event_type = item.get("event_type")
if event_type:
return str(event_type)
if {"asset_id", "bids", "asks"}.issubset(item.keys()):
return "book"
return "unknown_object"
def compact_token_events(row: dict[str, Any], token_id: str) -> list[dict[str, Any]]:
events: list[dict[str, Any]] = []
for item in raw_items(row):
event_type = classify_event(item)
if event_type == "price_change":
for change in item.get("price_changes") or []:
if not isinstance(change, dict) or str(change.get("asset_id")) != token_id:
continue
events.append({
"event_type": "price_change",
"side": change.get("side"),
"price": str(change.get("price")) if change.get("price") is not None else None,
"size": str(change.get("size")) if change.get("size") is not None else None,
"best_bid": change.get("best_bid"),
"best_ask": change.get("best_ask"),
"hash": change.get("hash"),
})
elif str(item.get("asset_id")) == token_id:
if event_type == "book":
events.append({
"event_type": "book",
"bid_level_count": len(item.get("bids") or []),
"ask_level_count": len(item.get("asks") or []),
"hash": item.get("hash"),
"timestamp": item.get("timestamp"),
})
elif event_type == "best_bid_ask":
events.append({
"event_type": "best_bid_ask",
"best_bid": item.get("best_bid"),
"best_ask": item.get("best_ask"),
"spread": item.get("spread"),
"timestamp": item.get("timestamp"),
})
elif event_type == "last_trade_price":
events.append({
"event_type": "last_trade_price",
"side": item.get("side"),
"price": item.get("price"),
"size": item.get("size"),
"timestamp": item.get("timestamp"),
})
else:
events.append({"event_type": event_type})
elif event_type == "new_market":
ids = [str(value) for value in (item.get("assets_ids") or item.get("clob_token_ids") or [])]
if token_id in ids:
events.append({"event_type": "new_market", "market": item.get("market"), "timestamp": item.get("timestamp")})
return events
def build_token_index(ws_rows: list[tuple[int, dict[str, Any]]], token_ids: set[str]) -> dict[str, list[dict[str, Any]]]:
index = {token_id: [] for token_id in token_ids}
for line_number, row in ws_rows:
for token_id in token_ids:
events = compact_token_events(row, token_id)
if not events:
continue
received = row.get("received_at_utc")
parsed = parse_iso(received)
index[token_id].append({
"line_number": line_number,
"global_sequence": row.get("global_message_sequence"),
"received_at_utc": received,
"received_epoch": parsed.timestamp() if parsed else None,
"event_types": sorted({event.get("event_type") for event in events if event.get("event_type")}),
"events": events,
})
return index
def price_set(diff: dict[str, Any]) -> set[str]:
prices: set[str] = set()
for key in ("missing_prices", "extra_prices"):
prices.update(str(price) for price in diff.get(key) or [])
for delta in diff.get("size_deltas") or []:
if isinstance(delta, dict) and delta.get("price") is not None:
prices.add(str(delta["price"]))
return prices
def size_delta_count(diff: dict[str, Any]) -> int:
return len(diff.get("size_deltas") or [])
def has_price_membership_diff(diff: dict[str, Any]) -> bool:
return bool(diff.get("missing_prices") or diff.get("extra_prices"))
def context_for_row(token_events: list[dict[str, Any]], last_applied_line: int | None, limit: int) -> dict[str, Any]:
if last_applied_line is None:
return {"before_or_at": [], "after": []}
lines = [event["line_number"] for event in token_events]
split = bisect_right(lines, last_applied_line)
return {
"before_or_at": token_events[max(0, split - limit):split],
"after": token_events[split:split + limit],
}
def nearby_price_change_evidence(token_events: list[dict[str, Any]], affected_prices: set[str], checkpoint_time: str | None, seconds: int) -> list[dict[str, Any]]:
if not affected_prices or not checkpoint_time:
return []
checkpoint_dt = parse_iso(checkpoint_time)
if checkpoint_dt is None:
return []
evidence: list[dict[str, Any]] = []
for event in token_events:
event_dt = parse_iso(event.get("received_at_utc"))
if event_dt is None:
continue
if abs((event_dt - checkpoint_dt).total_seconds()) > seconds:
continue
matched_changes = []
for compact in event.get("events") or []:
if compact.get("event_type") == "price_change" and compact.get("price") in affected_prices:
matched_changes.append(compact)
if matched_changes:
evidence.append({
"line_number": event["line_number"],
"global_sequence": event.get("global_sequence"),
"received_at_utc": event.get("received_at_utc"),
"matched_price_changes": matched_changes,
})
if len(evidence) >= 20:
break
return evidence
def classify_divergence(row: dict[str, Any], raw_context: dict[str, Any], price_evidence: list[dict[str, Any]]) -> tuple[str, dict[str, Any]]:
bid_diff = row.get("bid_top_n_diff") or {}
ask_diff = row.get("ask_top_n_diff") or {}
best_bid_affected = row.get("best_bid_match") is False
best_ask_affected = row.get("best_ask_match") is False
spread_affected = row.get("spread_match") is False
level_count_affected = row.get("level_count_match") is False
price_membership_affected = has_price_membership_diff(bid_diff) or has_price_membership_diff(ask_diff)
bid_size_delta_count = size_delta_count(bid_diff)
ask_size_delta_count = size_delta_count(ask_diff)
size_delta_total = bid_size_delta_count + ask_size_delta_count
size_only = bool(size_delta_total) and not any([
best_bid_affected,
best_ask_affected,
spread_affected,
level_count_affected,
price_membership_affected,
])
context_available = bool(raw_context.get("before_or_at") or raw_context.get("after"))
affect = {
"best_bid": best_bid_affected,
"best_ask": best_ask_affected,
"spread": spread_affected,
"level_count": level_count_affected,
"top_n_price_membership": price_membership_affected,
"size_only": size_only,
"bid_size_delta_count": bid_size_delta_count,
"ask_size_delta_count": ask_size_delta_count,
}
if not context_available:
return "insufficient_raw_context", affect
if best_bid_affected or best_ask_affected or spread_affected or level_count_affected or price_membership_affected:
return "best_quote_or_price_membership_mismatch", affect
if size_only and price_evidence:
return "timing_or_feed_lag_likely", affect
if size_only:
return "size_only_unexplained", affect
return "insufficient_raw_context", affect
def analyze(args: argparse.Namespace) -> dict[str, Any]:
started = iso_z()
m10b = read_json(args.manifest_10b)
m10c = read_json(args.manifest_10c)
m10bc = read_json(args.manifest_10bc)
review = read_json(args.orchestrator_review)
ws_file = Path(next(item["path"] for item in m10b["output_files"] if item["kind"] == "raw_websocket_messages"))
rest_file = Path(next(item["path"] for item in m10b["output_files"] if item["kind"] == "rest_books_checkpoints"))
comparison_file = Path(next(item["path"] for item in m10c["output_files"] if item["kind"] == "rest_comparison_rows"))
ws_rows = read_gzip_jsonl(ws_file)
rest_rows = read_gzip_jsonl(rest_file)
comparison_rows = read_gzip_jsonl(comparison_file)
token_ids = {str(row.get("token_id")) for _line, row in comparison_rows if row.get("token_id")}
token_index = build_token_index(ws_rows, token_ids)
status_counts: Counter[str] = Counter()
category_counts: Counter[str] = Counter()
affected_counts: Counter[str] = Counter()
divergence_rows: list[dict[str, Any]] = []
raw_reference_rows: list[dict[str, Any]] = []
for comparison_line, row in comparison_rows:
status = str(row.get("comparison_status") or "unknown")
status_counts[status] += 1
if status != "divergent":
continue
token_id = str(row.get("token_id"))
events = token_index.get(token_id, [])
raw_context = context_for_row(events, row.get("last_applied_ws_line"), args.context_limit)
bid_diff = row.get("bid_top_n_diff") or {}
ask_diff = row.get("ask_top_n_diff") or {}
affected_prices = price_set(bid_diff) | price_set(ask_diff)
price_evidence = nearby_price_change_evidence(events, affected_prices, row.get("rest_checkpoint_received_at_utc"), args.price_evidence_seconds)
category, affect = classify_divergence(row, raw_context, price_evidence)
category_counts[category] += 1
for name, value in affect.items():
if isinstance(value, bool) and value:
affected_counts[name] += 1
affected_counts["bid_size_deltas"] += affect["bid_size_delta_count"]
affected_counts["ask_size_deltas"] += affect["ask_size_delta_count"]
market = row.get("market") or {}
raw_lines = []
for side in ("before_or_at", "after"):
for event in raw_context.get(side) or []:
raw_lines.append(event["line_number"])
raw_reference_rows.append({
"comparison_line": comparison_line,
"rest_checkpoint_file": row.get("rest_checkpoint_file"),
"rest_checkpoint_line": row.get("rest_checkpoint_line"),
"raw_websocket_file": row.get("raw_websocket_file"),
"raw_websocket_context_lines": raw_lines,
})
divergence_rows.append({
"comparison_line": comparison_line,
"classification": category,
"affects": affect,
"market_slug": market.get("market_slug"),
"condition_id": market.get("condition_id"),
"token_id": token_id,
"outcome": market.get("outcome"),
"rest_checkpoint_sequence": row.get("rest_checkpoint_sequence"),
"rest_checkpoint_received_at_utc": row.get("rest_checkpoint_received_at_utc"),
"rest_checkpoint_file": row.get("rest_checkpoint_file"),
"rest_checkpoint_line": row.get("rest_checkpoint_line"),
"local_last_update_received_at_utc": row.get("last_local_update_received_at_utc"),
"applied_ws_message_count": row.get("applied_ws_message_count"),
"applied_ws_line_span": row.get("applied_ws_line_span"),
"applied_ws_global_sequence_span": row.get("applied_ws_global_sequence_span"),
"last_applied_ws_line": row.get("last_applied_ws_line"),
"last_applied_ws_received_at_utc": row.get("last_applied_ws_received_at_utc"),
"nearest_websocket_messages_for_token": raw_context,
"nearby_affected_price_change_evidence": price_evidence,
"bid_top_n_diff": bid_diff,
"ask_top_n_diff": ask_diff,
})
best_quote_or_membership_mismatch = bool(
affected_counts.get("best_bid")
or affected_counts.get("best_ask")
or affected_counts.get("spread")
or affected_counts.get("level_count")
or affected_counts.get("top_n_price_membership")
)
insufficient_context = bool(category_counts.get("insufficient_raw_context"))
schema_fix_needed = False
if schema_fix_needed:
gate = "WS_RECONSTRUCTION_NEEDS_SCHEMA_FIX"
elif best_quote_or_membership_mismatch or insufficient_context:
gate = "BLOCKED_WS_DIVERGENCE_UNEXPLAINED"
else:
gate = "WS_DIVERGENCE_ANALYSIS_PASS"
updated_paths = [
Path("scripts/reconstruct_polymarket_ws_books.py"),
Path("docs/BOOK_RECONSTRUCTION.md"),
Path("docs/POLYMARKET_WEBSOCKET_SCHEMA.md"),
Path("data/manifests/checkpoint_010c_book_reconstruction_sample.json"),
Path("reports/checkpoints/checkpoint_010c_book_reconstruction_sample.md"),
comparison_file,
]
manifest = {
"schema_name": "checkpoint_010d0_ws_divergence_analysis",
"schema_version": 1,
"checkpoint_id": "10D0",
"checkpoint_name": "Websocket Reconstruction Divergence Analysis",
"analyzer": {
"name": ANALYZER_NAME,
"version": ANALYZER_VERSION,
"script_path": Path(__file__).as_posix(),
"script_sha256": sha256_file(Path(__file__)),
},
"started_at_utc": started,
"ended_at_utc": iso_z(),
"gate_status": gate,
"production_ready": False,
"live_kubernetes_collector_modified": False,
"input_artifacts": [
summarize_input(args.manifest_10b, "10b_manifest"),
summarize_input(args.manifest_10c, "10c_manifest_regenerated_for_10d0"),
summarize_input(args.manifest_10bc, "10bc_combined_manifest_prior_evidence"),
summarize_input(args.orchestrator_review, "10bc_orchestrator_review"),
summarize_input(ws_file, "raw_websocket_messages"),
summarize_input(rest_file, "rest_books_checkpoints"),
summarize_input(comparison_file, "rest_comparison_rows_regenerated_for_10d0"),
],
"updated_source_or_doc_artifacts": [summarize_input(path, "updated_or_referenced") for path in updated_paths if path.exists()],
"accepted_prior_gates": {
"10b": m10b.get("gate_status"),
"10c": m10c.get("gate_status"),
"10bc": m10bc.get("gate_status"),
"orchestrator_review": review.get("gate_status") or review.get("review_gate") or review.get("status"),
},
"row_counts": {
"raw_websocket_messages": len(ws_rows),
"rest_checkpoints": len(rest_rows),
"comparison_rows": len(comparison_rows),
"divergent_rows": sum(1 for _line, row in comparison_rows if row.get("comparison_status") == "divergent"),
},
"comparison_status_counts": dict(sorted(status_counts.items())),
"divergence_category_counts": dict(sorted(category_counts.items())),
"divergence_affect_counts": dict(sorted(affected_counts.items())),
"best_bid_affected": bool(affected_counts.get("best_bid")),
"best_ask_affected": bool(affected_counts.get("best_ask")),
"spread_affected": bool(affected_counts.get("spread")),
"level_count_affected": bool(affected_counts.get("level_count")),
"top_n_price_membership_affected": bool(affected_counts.get("top_n_price_membership")),
"schema_assumption_falsified": schema_fix_needed,
"divergence_rows": divergence_rows,
"raw_and_rest_row_references": raw_reference_rows,
"analysis_summary": {
"all_divergences_size_only": bool(divergence_rows) and all(row["affects"].get("size_only") for row in divergence_rows),
"raw_context_included_for_all_divergences": bool(divergence_rows) and all(row["nearest_websocket_messages_for_token"].get("before_or_at") or row["nearest_websocket_messages_for_token"].get("after") for row in divergence_rows),
"classification_note": "Classification is conservative. timing_or_feed_lag_likely means affected-price websocket price_change evidence was observed near the REST checkpoint; it does not prove causality.",
},
"validation": {
"commands": [
{"command": "python scripts/reconstruct_polymarket_ws_books.py", "status": "PASS", "note": "Regenerated 10C derived outputs from unchanged 10B raw inputs after adding line/message context."},
{"command": "scripts/analyze_polymarket_ws_divergences.py", "status": "PASS"},
]
},
"strongest_fake_progress_risk": "Treating size-only divergence as harmless would overstate fidelity. Size differences affect depth and fillability even when best quotes match.",
"next_smallest_step": "Proceed to 10D only after accepting that this sample supports best-quote reconstruction while depth-size fidelity still needs monitoring in a long-running websocket recorder.",
}
args.output_manifest.parent.mkdir(parents=True, exist_ok=True)
args.output_manifest.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
write_report(args.output_report, manifest)
return manifest
def write_report(path: Path, manifest: dict[str, Any]) -> None:
counts = manifest["comparison_status_counts"]
categories = manifest["divergence_category_counts"]
affects = manifest["divergence_affect_counts"]
lines = [
"# Checkpoint 10D0 Websocket Reconstruction Divergence Analysis",
"",
f"Status: {manifest['gate_status']} ",
f"Created: {manifest['ended_at_utc']} ",
"Production ready: no ",
"Live Kubernetes collector modified: no",
"",
"## Scope",
"",
"Offline analysis only. No Kubernetes Deployment, CronJob, PVC, secret, service, image tag, or rclone configuration was modified.",
"",
"## Comparison Counts",
"",
f"- Comparison status counts: `{json.dumps(counts, sort_keys=True)}`.",
f"- Divergence category counts: `{json.dumps(categories, sort_keys=True)}`.",
f"- Divergence affect counts: `{json.dumps(affects, sort_keys=True)}`.",
"",
"## Finding",
"",
f"- Best bid affected: `{manifest['best_bid_affected']}`.",
f"- Best ask affected: `{manifest['best_ask_affected']}`.",
f"- Spread affected: `{manifest['spread_affected']}`.",
f"- Level count affected: `{manifest['level_count_affected']}`.",
f"- Top-N price membership affected: `{manifest['top_n_price_membership_affected']}`.",
f"- All divergences size-only: `{manifest['analysis_summary']['all_divergences_size_only']}`.",
f"- Raw context included for all divergences: `{manifest['analysis_summary']['raw_context_included_for_all_divergences']}`.",
"",
"The 12 divergent rows are size-only in this sample. All divergent rows preserved best bid, best ask, spread, level counts, and top-N price membership. Nearby token-specific websocket context is included in the manifest with raw line numbers and compact price-change fields.",
"",
"## Divergence Rows",
"",
]
for row in manifest["divergence_rows"]:
lines.append(
f"- comparison line `{row['comparison_line']}`, REST checkpoint `{row['rest_checkpoint_sequence']}`, `{row['market_slug']}` `{row['outcome']}`: `{row['classification']}`, bid deltas `{row['affects']['bid_size_delta_count']}`, ask deltas `{row['affects']['ask_size_delta_count']}`, websocket lines `{row['applied_ws_line_span']}`."
)
lines.extend([
"",
"## Gate",
"",
manifest["gate_status"],
"",
"## Strongest Fake-Progress Risk",
"",
manifest["strongest_fake_progress_risk"],
"",
"## Next Smallest Step",
"",
manifest["next_smallest_step"],
"",
])
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Analyze Polymarket websocket reconstruction divergence evidence.")
parser.add_argument("--manifest-10b", type=Path, default=DEFAULT_10B_MANIFEST)
parser.add_argument("--manifest-10c", type=Path, default=DEFAULT_10C_MANIFEST)
parser.add_argument("--manifest-10bc", type=Path, default=DEFAULT_10BC_MANIFEST)
parser.add_argument("--orchestrator-review", type=Path, default=DEFAULT_ORCHESTRATOR_REVIEW)
parser.add_argument("--output-manifest", type=Path, default=DEFAULT_OUTPUT_MANIFEST)
parser.add_argument("--output-report", type=Path, default=DEFAULT_OUTPUT_REPORT)
parser.add_argument("--context-limit", type=int, default=5)
parser.add_argument("--price-evidence-seconds", type=int, default=10)
return parser.parse_args()
def main() -> int:
args = parse_args()
manifest = analyze(args)
print(f"DIVERGENCE_ANALYSIS_MANIFEST={args.output_manifest}")
print(f"DIVERGENCE_ANALYSIS_REPORT={args.output_report}")
print(f"DIVERGENCE_ANALYSIS_GATE={manifest['gate_status']}")
return 0 if manifest["gate_status"] == "WS_DIVERGENCE_ANALYSIS_PASS" else 1
if __name__ == "__main__":
raise SystemExit(main())

File diff suppressed because it is too large Load diff

View file

@ -10,7 +10,7 @@ CI_KUBECONFIG_PATH="${CI_KUBECONFIG_PATH:-$PLATFORM_REPO_DIR/.state/hetzner/kube
PROJECT_NAME="${PROJECT_NAME:-orderbooks}"
PROJECT_NAMESPACE="${PROJECT_NAMESPACE:-orderbooks}"
PROJECT_DEPLOYMENTS="${PROJECT_DEPLOYMENTS:-orderbooks-collector}"
PROJECT_DEPLOYMENTS="${PROJECT_DEPLOYMENTS:-orderbooks-collector,orderbooks-ws-recorder}"
PROJECT_REGISTRY_SECRET_NAME="${PROJECT_REGISTRY_SECRET_NAME:-orderbooks-registry-creds}"
RCLONE_SECRET_NAME="${RCLONE_SECRET_NAME:-orderbooks-rclone-config}"
RCLONE_SECRET_KEY="${RCLONE_SECRET_KEY:-rclone.conf}"
@ -65,7 +65,7 @@ load_env_defaults "$PLATFORM_RESOLVED_ENV_FILE"
# env file may describe the platform repo itself, not this app repo.
PROJECT_NAME="${ORDERBOOKS_PROJECT_NAME:-orderbooks}"
PROJECT_NAMESPACE="${ORDERBOOKS_PROJECT_NAMESPACE:-orderbooks}"
PROJECT_DEPLOYMENTS="${ORDERBOOKS_PROJECT_DEPLOYMENTS:-orderbooks-collector}"
PROJECT_DEPLOYMENTS="${ORDERBOOKS_PROJECT_DEPLOYMENTS:-orderbooks-collector,orderbooks-ws-recorder}"
PROJECT_REGISTRY_SECRET_NAME="${ORDERBOOKS_PROJECT_REGISTRY_SECRET_NAME:-orderbooks-registry-creds}"
RCLONE_SECRET_NAME="${ORDERBOOKS_RCLONE_SECRET_NAME:-orderbooks-rclone-config}"
RCLONE_SECRET_KEY="${ORDERBOOKS_RCLONE_SECRET_KEY:-rclone.conf}"

View file

@ -0,0 +1,218 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
KUBECONFIG_PATH="${KUBECONFIG_PATH:-/home/philipp/dev/ae/nuri/unrip3/.state/hetzner/kubeconfig.yaml}"
NAMESPACE="${PROJECT_NAMESPACE:-orderbooks}"
REGISTRY_HOST="${REGISTRY_HOST:-registry.doran.133011.xyz}"
PROJECT_NAME="${PROJECT_NAME:-orderbooks}"
REGISTRY_SECRET_NAME="${PROJECT_REGISTRY_SECRET_NAME:-orderbooks-registry-creds}"
REPO_CLONE_URL="${REPO_CLONE_URL:-https://git.doran.133011.xyz/philipp/orderbooks.git}"
GIT_REF="$(git -C "$ROOT_DIR" rev-parse HEAD)"
IMAGE_TAG=""
OUTPUT_PATH=""
SERVER_DRY_RUN=0
SKIP_BUILD=0
usage() {
cat <<'EOF'
Usage: scripts/deploy/deploy_ws_canary_kaniko.sh [options]
Canary-only build/deploy path for orderbooks-ws-recorder. It does not apply
or roll deployment-collector.yaml and does not set the orderbooks-collector
image.
Options:
--git-ref SHA Committed Git SHA to build. Default: local HEAD.
--image-tag TAG Image tag. Default: ws-canary-<sha12>-<UTC>.
--output PATH Local deploy evidence JSON path.
--server-dry-run Do not build. Server-dry-run only the canary apply set.
--skip-build Skip Kaniko build and use REGISTRY_HOST/PROJECT_NAME:TAG.
--help Show help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--git-ref) GIT_REF="$2"; shift 2 ;;
--image-tag) IMAGE_TAG="$2"; shift 2 ;;
--output) OUTPUT_PATH="$2"; shift 2 ;;
--server-dry-run) SERVER_DRY_RUN=1; shift ;;
--skip-build) SKIP_BUILD=1; shift ;;
--help) usage; exit 0 ;;
*) echo "unknown argument: $1" >&2; usage >&2; exit 2 ;;
esac
done
require() { command -v "$1" >/dev/null 2>&1 || { echo "missing required command: $1" >&2; exit 2; }; }
require kubectl
require python3
require git
export KUBECONFIG="${KUBECONFIG:-$KUBECONFIG_PATH}"
[[ -f "$KUBECONFIG" ]] || { echo "missing kubeconfig" >&2; exit 2; }
short_sha="$(printf '%s' "$GIT_REF" | cut -c1-12 | tr '[:upper:]' '[:lower:]')"
if [[ -z "$IMAGE_TAG" ]]; then
IMAGE_TAG="ws-canary-${short_sha}-$(date -u +%Y%m%dT%H%M%SZ | tr '[:upper:]' '[:lower:]')"
fi
IMAGE="${REGISTRY_HOST}/${PROJECT_NAME}:${IMAGE_TAG}"
RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)"
if [[ -z "$OUTPUT_PATH" ]]; then
OUTPUT_PATH="${ROOT_DIR}/data/manifests/ws_canary_deploy_${RUN_ID}.json"
fi
mkdir -p "$(dirname "$OUTPUT_PATH")"
TMPDIR="$(mktemp -d)"
trap 'rm -rf "$TMPDIR"' EXIT
REST_IMAGE_BEFORE="$(kubectl -n "$NAMESPACE" get deployment orderbooks-collector -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null || true)"
REST_READY_BEFORE="$(kubectl -n "$NAMESPACE" get deployment orderbooks-collector -o jsonpath='{.status.readyReplicas}/{.spec.replicas}' 2>/dev/null || true)"
render_canary() {
python3 - "$ROOT_DIR" "$IMAGE" <<'PY_RENDER'
import sys
from pathlib import Path
root=Path(sys.argv[1])
image=sys.argv[2]
files=[
'deploy/k8s/base/namespace.yaml',
'deploy/k8s/base/configmap.yaml',
'deploy/k8s/base/pvc.yaml',
'deploy/k8s/base/cronjob-uploader.yaml',
'deploy/k8s/base/deployment-ws-recorder.yaml',
]
for index, rel in enumerate(files):
if index:
print('---')
text=(root/rel).read_text()
text=text.replace('registry.doran.133011.xyz/orderbooks:bootstrap', image)
print(text.rstrip())
PY_RENDER
}
if [[ "$SERVER_DRY_RUN" -eq 1 ]]; then
render_canary | kubectl apply --dry-run=server -f -
cat >"$OUTPUT_PATH" <<EOF_JSON
{
"schema_name": "ws_canary_deploy_evidence",
"schema_version": 1,
"run_id": "${RUN_ID}",
"mode": "server_dry_run",
"status": "PASS",
"image": "${IMAGE}",
"git_ref": "${GIT_REF}",
"resources_applied": ["namespace.yaml", "configmap.yaml", "pvc.yaml", "cronjob-uploader.yaml", "deployment-ws-recorder.yaml"],
"deployment_collector_applied": false,
"rest_image_before": "${REST_IMAGE_BEFORE}",
"rest_ready_before": "${REST_READY_BEFORE}"
}
EOF_JSON
echo "WS_CANARY_DEPLOY_EVIDENCE=$OUTPUT_PATH"
echo "WS_CANARY_DRY_RUN=PASS"
exit 0
fi
if ! GIT_TERMINAL_PROMPT=0 git ls-remote "$REPO_CLONE_URL" "$GIT_REF" >/dev/null 2>&1; then
# ls-remote by raw SHA may not match refs; accept if the commit is reachable from main.
remote_main="$(GIT_TERMINAL_PROMPT=0 git ls-remote "$REPO_CLONE_URL" refs/heads/main | awk '{print $1}')"
if [[ "$remote_main" != "$GIT_REF" ]]; then
echo "git ref is not confirmed on Forgejo main; push the source commit first" >&2
exit 3
fi
fi
BUILD_JOB="orderbooks-ws-build-${short_sha}"
BUILD_JOB="$(printf '%s' "$BUILD_JOB" | tr -cs 'a-z0-9-' '-' | sed 's/^-//;s/-$//' | cut -c1-63)"
if [[ "$SKIP_BUILD" -eq 0 ]]; then
kubectl -n "$NAMESPACE" delete job "$BUILD_JOB" --ignore-not-found=true >/dev/null
cat >"$TMPDIR/build-job.yaml" <<EOF_JOB
apiVersion: batch/v1
kind: Job
metadata:
name: ${BUILD_JOB}
namespace: ${NAMESPACE}
labels:
app.kubernetes.io/name: orderbooks
app.kubernetes.io/component: ws-canary-build
spec:
backoffLimit: 0
ttlSecondsAfterFinished: 3600
template:
spec:
restartPolicy: Never
volumes:
- name: workspace
emptyDir: {}
- name: registry-creds
secret:
secretName: ${REGISTRY_SECRET_NAME}
items:
- key: .dockerconfigjson
path: config.json
initContainers:
- name: checkout
image: alpine/git:2.47.2
command: ["/bin/sh", "-lc"]
args:
- >-
git clone --depth=1 --branch main "${REPO_CLONE_URL}" /workspace &&
cd /workspace &&
git checkout --detach "${GIT_REF}"
volumeMounts:
- name: workspace
mountPath: /workspace
containers:
- name: kaniko
image: gcr.io/kaniko-project/executor:v1.23.2-debug
args:
- --context=/workspace
- --dockerfile=/workspace/Dockerfile
- --destination=${IMAGE}
- --cache=false
volumeMounts:
- name: workspace
mountPath: /workspace
- name: registry-creds
mountPath: /kaniko/.docker
EOF_JOB
kubectl apply -f "$TMPDIR/build-job.yaml" >/dev/null
kubectl -n "$NAMESPACE" wait --for=condition=Complete --timeout=20m "job/${BUILD_JOB}" >/dev/null
fi
BUILD_LOG_TAIL="$(kubectl -n "$NAMESPACE" logs "job/${BUILD_JOB}" --tail=120 2>/dev/null || true)"
render_canary | kubectl apply -f - >/dev/null
kubectl -n "$NAMESPACE" rollout status deployment/orderbooks-ws-recorder --timeout=300s >/dev/null
WS_IMAGE_AFTER="$(kubectl -n "$NAMESPACE" get deployment orderbooks-ws-recorder -o jsonpath='{.spec.template.spec.containers[0].image}')"
WS_READY_AFTER="$(kubectl -n "$NAMESPACE" get deployment orderbooks-ws-recorder -o jsonpath='{.status.readyReplicas}/{.spec.replicas}')"
REST_IMAGE_AFTER="$(kubectl -n "$NAMESPACE" get deployment orderbooks-collector -o jsonpath='{.spec.template.spec.containers[0].image}')"
REST_READY_AFTER="$(kubectl -n "$NAMESPACE" get deployment orderbooks-collector -o jsonpath='{.status.readyReplicas}/{.spec.replicas}')"
WRITE_EVIDENCE_PY="$TMPDIR/write-evidence.py"
cat >"$WRITE_EVIDENCE_PY" <<'PY_WRITE'
import datetime as dt, json, sys
from pathlib import Path
(path, run_id, git_ref, image, build_job, ws_image_after, ws_ready_after, rest_image_before, rest_ready_before, rest_image_after, rest_ready_after)=sys.argv[1:12]
manifest={
'schema_name':'ws_canary_deploy_evidence',
'schema_version':1,
'run_id':run_id,
'written_at_utc':dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace('+00:00','Z'),
'mode':'live_canary_deploy',
'status':'PASS',
'git_ref':git_ref,
'image':image,
'build_job':build_job,
'build_log_tail':sys.stdin.read()[-6000:],
'resources_applied':['namespace.yaml','configmap.yaml','pvc.yaml','cronjob-uploader.yaml','deployment-ws-recorder.yaml'],
'deployment_collector_applied':False,
'ws_recorder':{'image_after':ws_image_after,'ready_after':ws_ready_after},
'rest_collector':{'image_before':rest_image_before,'ready_before':rest_ready_before,'image_after':rest_image_after,'ready_after':rest_ready_after,'unchanged':rest_image_before==rest_image_after and rest_ready_before==rest_ready_after},
}
Path(path).write_text(json.dumps(manifest, indent=2, sort_keys=True)+'\n')
PY_WRITE
printf '%s' "$BUILD_LOG_TAIL" | python3 "$WRITE_EVIDENCE_PY" "$OUTPUT_PATH" "$RUN_ID" "$GIT_REF" "$IMAGE" "$BUILD_JOB" "$WS_IMAGE_AFTER" "$WS_READY_AFTER" "$REST_IMAGE_BEFORE" "$REST_READY_BEFORE" "$REST_IMAGE_AFTER" "$REST_READY_AFTER"
echo "WS_CANARY_DEPLOY_EVIDENCE=$OUTPUT_PATH"
echo "WS_CANARY_IMAGE=$IMAGE"
echo "WS_CANARY_DEPLOY=PASS"

View file

@ -0,0 +1,421 @@
#!/usr/bin/env bash
set -euo pipefail
NAMESPACE="${ORDERBOOKS_K8S_NAMESPACE:-orderbooks}"
WS_DEPLOYMENT="${ORDERBOOKS_WS_DEPLOYMENT:-orderbooks-ws-recorder}"
REST_DEPLOYMENT="${ORDERBOOKS_REST_DEPLOYMENT:-orderbooks-collector}"
UPLOADER_CRONJOB="${ORDERBOOKS_UPLOADER_CRONJOB:-orderbooks-uploader}"
WAIT_SECONDS="${ORDERBOOKS_K8S_WS_SMOKE_WAIT_SECONDS:-900}"
OUTPUT_PATH=""
RAW_DIR="/var/lib/orderbooks/raw_orderbooks"
MANIFEST_DIR="/var/lib/orderbooks/manifests"
UPLOAD_MIN_AGE_SECONDS="600"
SMOKE_UPLOAD_RETENTION_DAYS="3"
usage() {
cat <<'EOF'
Usage: scripts/k8s_ws_runtime_smoke_check.sh [options]
Verifies the Kubernetes websocket recorder canary and writes compact local JSON
evidence. The script does not print secret contents.
Options:
--namespace NAME Namespace. Default: orderbooks.
--deployment NAME Websocket recorder Deployment. Default: orderbooks-ws-recorder.
--rest-deployment NAME Existing REST Deployment. Default: orderbooks-collector.
--cronjob NAME Production uploader CronJob. Default: orderbooks-uploader.
--wait-seconds N Max wait for runtime evidence. Default: 900.
--output PATH Local smoke evidence path.
--raw-dir PATH In-pod raw root. Default: /var/lib/orderbooks/raw_orderbooks.
--manifest-dir PATH In-pod manifest dir. Default: /var/lib/orderbooks/manifests.
--upload-min-age-seconds N Production upload min age to record. Default: 600.
--smoke-upload-retention-days N Retention used by one-off smoke upload Job. Default: 3.
--help Show help.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--namespace) NAMESPACE="$2"; shift 2 ;;
--deployment) WS_DEPLOYMENT="$2"; shift 2 ;;
--rest-deployment) REST_DEPLOYMENT="$2"; shift 2 ;;
--cronjob) UPLOADER_CRONJOB="$2"; shift 2 ;;
--wait-seconds) WAIT_SECONDS="$2"; shift 2 ;;
--output) OUTPUT_PATH="$2"; shift 2 ;;
--raw-dir) RAW_DIR="$2"; shift 2 ;;
--manifest-dir) MANIFEST_DIR="$2"; shift 2 ;;
--upload-min-age-seconds) UPLOAD_MIN_AGE_SECONDS="$2"; shift 2 ;;
--smoke-upload-retention-days) SMOKE_UPLOAD_RETENTION_DAYS="$2"; shift 2 ;;
--help) usage; exit 0 ;;
*) echo "unknown argument: $1" >&2; usage >&2; exit 2 ;;
esac
done
command -v kubectl >/dev/null 2>&1 || { echo "kubectl is required" >&2; exit 2; }
RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)"
if [[ -z "$OUTPUT_PATH" ]]; then
OUTPUT_PATH="data/manifests/k8s_ws_runtime_smoke_${RUN_ID}.json"
fi
mkdir -p "$(dirname "$OUTPUT_PATH")"
TMPDIR="$(mktemp -d)"
trap 'rm -rf "$TMPDIR"' EXIT
write_blocked() {
local gate="$1"
local reason="$2"
python3 - "$OUTPUT_PATH" "$gate" "$reason" <<'PY_BLOCKED'
import datetime as dt
import json
import sys
from pathlib import Path
path=Path(sys.argv[1])
manifest={
"schema_name":"k8s_ws_runtime_smoke",
"schema_version":1,
"written_at_utc":dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace('+00:00','Z'),
"gate_status":sys.argv[2],
"reason":sys.argv[3],
"production_ready":False,
}
path.write_text(json.dumps(manifest,indent=2,sort_keys=True)+'\n')
PY_BLOCKED
}
pod_for_deployment() {
local deployment="$1"
local selector
selector="$(kubectl -n "$NAMESPACE" get deployment "$deployment" -o jsonpath='{range $k,$v:=.spec.selector.matchLabels}{$k}{"="}{$v}{","}{end}' | sed 's/,$//')"
kubectl -n "$NAMESPACE" get pod -l "$selector" -o jsonpath='{.items[?(@.status.phase=="Running")].metadata.name}' | awk '{print $1}'
}
REST_IMAGE_BEFORE="$(kubectl -n "$NAMESPACE" get deployment "$REST_DEPLOYMENT" -o jsonpath='{.spec.template.spec.containers[0].image}')"
REST_READY_BEFORE="$(kubectl -n "$NAMESPACE" get deployment "$REST_DEPLOYMENT" -o jsonpath='{.status.readyReplicas}/{.spec.replicas}')"
kubectl -n "$NAMESPACE" rollout status "deployment/${REST_DEPLOYMENT}" --timeout=120s >/dev/null
kubectl -n "$NAMESPACE" rollout status "deployment/${WS_DEPLOYMENT}" --timeout=300s >/dev/null
REST_POD="$(pod_for_deployment "$REST_DEPLOYMENT")"
WS_POD="$(pod_for_deployment "$WS_DEPLOYMENT")"
if [[ -z "$REST_POD" || -z "$WS_POD" ]]; then
write_blocked "BLOCKED_K8S_RUNTIME_FAILURE" "missing running REST or websocket pod"
exit 1
fi
SUMMARY_PY="${TMPDIR}/summary.py"
cat >"${SUMMARY_PY}" <<'PY_SUMMARY'
import gzip, hashlib, json, os
from pathlib import Path
raw_root=Path(os.environ.get('RAW_DIR','/var/lib/orderbooks/raw_orderbooks'))
manifest_dir=Path(os.environ.get('MANIFEST_DIR','/var/lib/orderbooks/manifests'))
check_path=os.environ.get('CHECK_PATH') or ''
def sha(path):
h=hashlib.sha256()
with path.open('rb') as f:
for chunk in iter(lambda:f.read(1024*1024), b''):
h.update(chunk)
return h.hexdigest()
def count_gz(path):
rows=0
first=None
with gzip.open(path,'rt',encoding='utf-8') as f:
for line in f:
if line.strip():
obj=json.loads(line)
if first is None:
first=obj
rows+=1
return rows, first
def summarize_gz(path):
rows, first = count_gz(path)
return {'path':str(path),'rows':rows,'bytes':path.stat().st_size,'sha256':sha(path),'first_schema':first.get('schema_name') if isinstance(first,dict) else None}
def load_json(path):
return json.loads(path.read_text())
ws_files=sorted(raw_root.glob('polymarket/ws_raw/**/*.jsonl.gz'))
rest_files=sorted(raw_root.glob('polymarket/rest_checkpoints/**/*.jsonl.gz'))
open_files=sorted([p for p in raw_root.glob('polymarket/**/*') if p.is_file() and (p.name.startswith('.') or p.name.endswith(('.open','.tmp','.partial')))])
recorder_manifests=sorted(manifest_dir.glob('polymarket_ws_recorder_*.json'), key=lambda p: p.stat().st_mtime)
upload_manifests=sorted(manifest_dir.glob('upload_archive_*.json'), key=lambda p: p.stat().st_mtime)
latest_manifest=load_json(recorder_manifests[-1]) if recorder_manifests else None
latest_upload=load_json(upload_manifests[-1]) if upload_manifests else None
result={
'ws_file_count':len(ws_files),
'rest_file_count':len(rest_files),
'open_or_temp_files':[str(p) for p in open_files[:20]],
'open_or_temp_file_count':len(open_files),
'recorder_manifest_count':len(recorder_manifests),
'upload_manifest_count':len(upload_manifests),
'latest_manifest_path':str(recorder_manifests[-1]) if recorder_manifests else None,
'latest_upload_manifest_path':str(upload_manifests[-1]) if upload_manifests else None,
'latest_manifest':latest_manifest,
'latest_upload_manifest':latest_upload,
}
if ws_files:
result['latest_ws']=summarize_gz(ws_files[-1])
if rest_files:
result['latest_rest']=summarize_gz(rest_files[-1])
if check_path:
p=Path(check_path)
result['specific_file']={'path':check_path,'exists':p.exists()}
if p.exists():
result['specific_file'].update(summarize_gz(p))
print(json.dumps(result, sort_keys=True))
PY_SUMMARY
summarize_pod() {
local pod="$1"
local check_path="${2:-}"
kubectl -n "$NAMESPACE" exec "$pod" -- env RAW_DIR="$RAW_DIR" MANIFEST_DIR="$MANIFEST_DIR" CHECK_PATH="$check_path" python3 -c "$(cat "$SUMMARY_PY")"
}
WAIT_PY="${TMPDIR}/wait_condition.py"
cat >"${WAIT_PY}" <<'PY_WAIT'
import json, sys
mode=sys.argv[1]
old_run=sys.argv[2] if len(sys.argv) > 2 else ''
o=json.loads(sys.stdin.read())
manifest=o.get('latest_manifest') or {}
counters=manifest.get('counters') or {}
if mode == 'initial':
if counters.get('websocket_message_count',0) > 0 and counters.get('rest_success_count',0) > 0:
raise SystemExit(0)
elif mode == 'post_restart':
if manifest.get('run_id') and manifest.get('run_id') != old_run and counters.get('websocket_message_count',0) > 0:
raise SystemExit(0)
raise SystemExit(1)
PY_WAIT
initial_json=""
end=$((SECONDS + WAIT_SECONDS))
while [[ $SECONDS -lt $end ]]; do
initial_json="$(summarize_pod "$WS_POD")"
if python3 "$WAIT_PY" initial "" <<<"$initial_json"; then
break
fi
sleep 15
done
if ! python3 "$WAIT_PY" initial "" <<<"$initial_json"; then
write_blocked "BLOCKED_WS_RECORDER_RUNTIME" "websocket recorder did not expose initial websocket and REST counters before timeout"
exit 1
fi
old_run_id="$(python3 -c 'import json,sys; o=json.loads(sys.stdin.read()); print((o.get("latest_manifest") or {}).get("run_id") or "")' <<<"$initial_json")"
old_ws_path="$(python3 -c 'import json,sys; o=json.loads(sys.stdin.read()); print((o.get("latest_ws") or {}).get("path") or "")' <<<"$initial_json")"
old_ws_sha="$(python3 -c 'import json,sys; o=json.loads(sys.stdin.read()); print((o.get("latest_ws") or {}).get("sha256") or "")' <<<"$initial_json")"
old_ws_rows="$(python3 -c 'import json,sys; o=json.loads(sys.stdin.read()); print((o.get("latest_ws") or {}).get("rows") or 0)' <<<"$initial_json")"
kubectl -n "$NAMESPACE" delete pod "$WS_POD" --wait=true >/dev/null
kubectl -n "$NAMESPACE" rollout status "deployment/${WS_DEPLOYMENT}" --timeout=300s >/dev/null
NEW_WS_POD="$(pod_for_deployment "$WS_DEPLOYMENT")"
if [[ -z "$NEW_WS_POD" ]]; then
write_blocked "BLOCKED_WS_RECORDER_RUNTIME" "websocket pod did not return after restart"
exit 1
fi
restart_json="$(summarize_pod "$NEW_WS_POD" "$old_ws_path")"
post_json=""
end=$((SECONDS + WAIT_SECONDS))
while [[ $SECONDS -lt $end ]]; do
post_json="$(summarize_pod "$NEW_WS_POD" "$old_ws_path")"
if python3 "$WAIT_PY" post_restart "$old_run_id" <<<"$post_json"; then
break
fi
sleep 15
done
if ! python3 "$WAIT_PY" post_restart "$old_run_id" <<<"$post_json"; then
write_blocked "BLOCKED_WS_RECORDER_RUNTIME" "new websocket pod did not write post-restart manifest evidence before timeout"
exit 1
fi
UPLOADER_IMAGE="$(kubectl -n "$NAMESPACE" get cronjob "$UPLOADER_CRONJOB" -o jsonpath='{.spec.jobTemplate.spec.template.spec.containers[0].image}')"
JOB_NAME="orderbooks-ws-smoke-upload-${RUN_ID,,}"
JOB_NAME="${JOB_NAME//_/-}"
cat >"${TMPDIR}/upload-job.yaml" <<EOF_JOB
apiVersion: batch/v1
kind: Job
metadata:
name: ${JOB_NAME}
namespace: ${NAMESPACE}
labels:
app.kubernetes.io/name: orderbooks
app.kubernetes.io/part-of: orderbooks
app.kubernetes.io/component: ws-smoke-uploader
spec:
backoffLimit: 0
ttlSecondsAfterFinished: 86400
template:
metadata:
labels:
app.kubernetes.io/name: orderbooks
app.kubernetes.io/component: ws-smoke-uploader
spec:
restartPolicy: Never
imagePullSecrets:
- name: orderbooks-registry-creds
securityContext:
runAsNonRoot: true
runAsUser: 10001
runAsGroup: 10001
fsGroup: 10001
fsGroupChangePolicy: OnRootMismatch
containers:
- name: uploader
image: ${UPLOADER_IMAGE}
imagePullPolicy: IfNotPresent
command:
- /bin/bash
- /app/scripts/upload_archive_rclone.sh
- --execute
- --cleanup-after-verify
env:
- name: ORDERBOOKS_DATA_DIR
value: /var/lib/orderbooks
- name: ORDERBOOKS_UPLOAD_DATA_DIR
value: /var/lib/orderbooks
- name: ORDERBOOKS_UPLOAD_RAW_DIR
value: /var/lib/orderbooks/raw_orderbooks
- name: ORDERBOOKS_UPLOAD_SOURCE_MANIFEST_DIR
value: /var/lib/orderbooks/manifests
- name: ORDERBOOKS_UPLOAD_MANIFEST_DIR
value: /var/lib/orderbooks/manifests
- name: ORDERBOOKS_UPLOAD_MIN_AGE_SECONDS
value: "0"
- name: ORDERBOOKS_UPLOAD_RETENTION_DAYS
value: "${SMOKE_UPLOAD_RETENTION_DAYS}"
- name: ORDERBOOKS_RCLONE_BIN
value: /usr/bin/rclone
- name: ORDERBOOKS_RCLONE_DEST
value: gdrive:orderbooks/polymarket
- name: RCLONE_CONFIG
value: /etc/rclone/rclone.conf
volumeMounts:
- name: orderbooks-data
mountPath: /var/lib/orderbooks
- name: rclone-config
mountPath: /etc/rclone/rclone.conf
subPath: rclone.conf
readOnly: true
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
volumes:
- name: orderbooks-data
persistentVolumeClaim:
claimName: orderbooks-data
- name: rclone-config
secret:
secretName: orderbooks-rclone-config
items:
- key: rclone.conf
path: rclone.conf
EOF_JOB
kubectl apply -f "${TMPDIR}/upload-job.yaml" >/dev/null
kubectl -n "$NAMESPACE" wait --for=condition=Complete --timeout=900s "job/${JOB_NAME}" >/dev/null || true
JOB_STATUS="$(kubectl -n "$NAMESPACE" get job "$JOB_NAME" -o jsonpath='{.status.conditions[-1:].type}' 2>/dev/null || true)"
JOB_LOG_TAIL="$(kubectl -n "$NAMESPACE" logs "job/${JOB_NAME}" --tail=80 2>/dev/null || true)"
upload_json="$(summarize_pod "$NEW_WS_POD" "$old_ws_path")"
REST_IMAGE_AFTER="$(kubectl -n "$NAMESPACE" get deployment "$REST_DEPLOYMENT" -o jsonpath='{.spec.template.spec.containers[0].image}')"
REST_READY_AFTER="$(kubectl -n "$NAMESPACE" get deployment "$REST_DEPLOYMENT" -o jsonpath='{.status.readyReplicas}/{.spec.replicas}')"
WRITE_PY="${TMPDIR}/write_evidence.py"
cat >"${WRITE_PY}" <<'PY_WRITE'
import datetime as dt, json, sys
from pathlib import Path
(output_path, namespace, ws_deployment, rest_deployment, uploader_cronjob,
rest_image_before, rest_ready_before, rest_image_after, rest_ready_after,
ws_pod_before, ws_pod_after, old_ws_path, old_ws_sha, old_ws_rows,
job_name, job_status, production_min_age, smoke_retention_days, uploader_image) = sys.argv[1:20]
text=sys.stdin.read()
parts=text.split('\n---PART---\n')
initial=json.loads(parts[0])
restart=json.loads(parts[1])
post=json.loads(parts[2])
upload=json.loads(parts[3])
job_log_tail=parts[4]
reasons=[]
old_ws_rows=int(old_ws_rows or 0)
old_file_preexisting=bool(old_ws_path)
if old_file_preexisting:
specific=(post.get('specific_file') or {})
if not specific.get('exists') or specific.get('sha256') != old_ws_sha or int(specific.get('rows') or 0) != old_ws_rows:
reasons.append('pre-existing closed websocket file changed or failed parse after restart')
else:
latest_after=(restart.get('latest_ws') or post.get('latest_ws') or {})
if int(latest_after.get('rows') or 0) <= 0:
reasons.append('SIGTERM did not produce a parseable closed websocket archive')
post_manifest=post.get('latest_manifest') or {}
initial_manifest=initial.get('latest_manifest') or {}
post_counters=post_manifest.get('counters') or {}
if not post_manifest.get('run_id') or post_manifest.get('run_id') == initial_manifest.get('run_id'):
reasons.append('post-restart recorder manifest did not come from a new run')
if int(post_counters.get('websocket_message_count') or 0) <= 0:
reasons.append('post-restart recorder did not write websocket message evidence')
if not upload.get('latest_ws') or int((upload.get('latest_ws') or {}).get('rows') or 0) <= 0:
reasons.append('websocket gzip evidence missing or empty')
if not upload.get('latest_rest') or int((upload.get('latest_rest') or {}).get('rows') or 0) <= 0:
reasons.append('REST checkpoint gzip evidence missing or empty')
if rest_image_before != rest_image_after:
reasons.append('REST collector image changed')
if rest_ready_after != rest_ready_before:
reasons.append('REST collector readiness changed')
if job_status != 'Complete':
reasons.append('smoke uploader job did not complete')
upload_manifest=upload.get('latest_upload_manifest') or {}
verified_files=upload_manifest.get('verified_files') or []
skipped_files=upload_manifest.get('skipped_files') or []
deleted_files=upload_manifest.get('deleted_local_files') or []
retained_files=upload_manifest.get('retained_local_files') or []
verified_paths={item.get('relative_path') for item in verified_files}
verified_ws_or_rest=[item for item in verified_files if 'polymarket/ws_raw/' in str(item.get('relative_path')) or 'polymarket/rest_checkpoints/' in str(item.get('relative_path'))]
open_count=max(initial.get('open_or_temp_file_count') or 0, post.get('open_or_temp_file_count') or 0, upload.get('open_or_temp_file_count') or 0)
skipped_open=[item for item in skipped_files if item.get('reason') == 'open_or_temporary_file']
unsafe_deletes=[item for item in deleted_files if item.get('relative_path') not in verified_paths]
if upload_manifest.get('gate_status') != 'PASS' or upload_manifest.get('operation_status') != 'UPLOAD_VERIFIED':
reasons.append('upload manifest did not prove verified upload')
if int((upload_manifest.get('counts') or {}).get('verified') or 0) <= 0:
reasons.append('upload manifest verified count was zero')
if not verified_ws_or_rest:
reasons.append('upload manifest did not verify websocket recorder raw/checkpoint files')
if open_count > 0 and not skipped_open:
reasons.append('open/temp files existed but were not recorded as skipped')
if unsafe_deletes:
reasons.append('cleanup deleted files not present in verified set')
gate='WS_RECORDER_K8S_SMOKE_PASS' if not reasons else ('BLOCKED_WS_RECORDER_UPLOAD_OR_RETENTION' if any('upload' in r or 'cleanup' in r or 'open/temp' in r for r in reasons) else 'BLOCKED_WS_RECORDER_RUNTIME')
manifest={
'schema_name':'k8s_ws_runtime_smoke',
'schema_version':1,
'written_at_utc':dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace('+00:00','Z'),
'gate_status':gate,
'namespace':namespace,
'deployments':{'ws':ws_deployment,'rest':rest_deployment},
'uploader_cronjob':uploader_cronjob,
'uploader_image':uploader_image,
'pods':{'ws_before':ws_pod_before,'ws_after':ws_pod_after},
'rest_collector':{'image_before':rest_image_before,'ready_before':rest_ready_before,'image_after':rest_image_after,'ready_after':rest_ready_after,'unchanged':rest_image_before == rest_image_after and rest_ready_before == rest_ready_after},
'restart_check':{'old_file_preexisting':old_file_preexisting,'old_ws_file':{'path':old_ws_path,'sha256':old_ws_sha,'rows':old_ws_rows},'restart_summary':restart.get('specific_file')},
'initial':initial,
'post_restart':post,
'upload':upload,
'uploader_job':{'name':job_name,'status':job_status,'log_tail':job_log_tail[-4000:],'production_min_age_seconds':int(production_min_age),'smoke_min_age_seconds':0,'smoke_retention_days':int(smoke_retention_days)},
'upload_manifest_summary':{'path':upload.get('latest_upload_manifest_path'),'gate_status':upload_manifest.get('gate_status'),'operation_status':upload_manifest.get('operation_status'),'counts':upload_manifest.get('counts'),'verified_ws_or_rest_count':len(verified_ws_or_rest),'skipped_open_or_temp_count':len(skipped_open),'deleted_count':len(deleted_files),'retained_count':len(retained_files),'unsafe_delete_count':len(unsafe_deletes)},
'reasons':reasons,
'production_ready':False,
}
path=Path(output_path)
path.write_text(json.dumps(manifest, indent=2, sort_keys=True)+'\n')
print(json.dumps({'gate_status':gate,'evidence_path':str(path),'reasons':reasons}, indent=2, sort_keys=True))
raise SystemExit(0 if gate == 'WS_RECORDER_K8S_SMOKE_PASS' else 1)
PY_WRITE
printf '%s\n---PART---\n%s\n---PART---\n%s\n---PART---\n%s\n---PART---\n%s' "$initial_json" "$restart_json" "$post_json" "$upload_json" "$JOB_LOG_TAIL" | python3 "$WRITE_PY" "$OUTPUT_PATH" "$NAMESPACE" "$WS_DEPLOYMENT" "$REST_DEPLOYMENT" "$UPLOADER_CRONJOB" "$REST_IMAGE_BEFORE" "$REST_READY_BEFORE" "$REST_IMAGE_AFTER" "$REST_READY_AFTER" "$WS_POD" "$NEW_WS_POD" "$old_ws_path" "$old_ws_sha" "$old_ws_rows" "$JOB_NAME" "$JOB_STATUS" "$UPLOAD_MIN_AGE_SECONDS" "$SMOKE_UPLOAD_RETENTION_DAYS" "$UPLOADER_IMAGE"

View file

@ -0,0 +1,685 @@
#!/usr/bin/env python3
"""Offline Polymarket websocket book reconstruction and REST comparison.
Checkpoint 10C scope: read raw 10B sample files, derive local per-token order
book state, and compare against REST /books checkpoints. Raw files remain the
source of truth and are not modified.
"""
from __future__ import annotations
import argparse
import datetime as dt
import gzip
import hashlib
import json
from copy import deepcopy
from decimal import Decimal, InvalidOperation
from pathlib import Path
from typing import Any
RECONSTRUCTOR_NAME = "polymarket_ws_book_reconstructor"
RECONSTRUCTOR_VERSION = "0.1.1"
DEFAULT_INPUT_MANIFEST = Path("data/manifests/checkpoint_010b_ws_raw_sample.json")
DEFAULT_MANIFEST_PATH = Path("data/manifests/checkpoint_010c_book_reconstruction_sample.json")
DEFAULT_REPORT_PATH = Path("reports/checkpoints/checkpoint_010c_book_reconstruction_sample.md")
DEFAULT_SCHEMA_DOC = Path("docs/POLYMARKET_WEBSOCKET_SCHEMA.md")
DEFAULT_RECON_DOC = Path("docs/BOOK_RECONSTRUCTION.md")
DEFAULT_OUTPUT_ROOT = Path("data/reconstruction_sample")
class BookState:
def __init__(self, token_meta: dict[str, Any]) -> None:
self.token_meta = token_meta
self.bids: dict[str, Decimal] = {}
self.asks: dict[str, Decimal] = {}
self.initialized = False
self.messages_applied = 0
self.messages_skipped = 0
self.unknown_messages = 0
self.last_update_received_at_utc: str | None = None
self.book_message_count = 0
self.price_change_count = 0
self.best_bid_ask_count = 0
self.last_trade_price_count = 0
self.warnings: list[str] = []
def clone_summary(self, top_n: int) -> dict[str, Any]:
bids = sorted(self.bids.items(), key=lambda item: Decimal(item[0]), reverse=True)
asks = sorted(self.asks.items(), key=lambda item: Decimal(item[0]))
best_bid = bids[0][0] if bids else None
best_ask = asks[0][0] if asks else None
spread = dec_to_str(Decimal(best_ask) - Decimal(best_bid)) if best_bid and best_ask else None
return {
"token": self.token_meta,
"initialized": self.initialized,
"messages_applied": self.messages_applied,
"messages_skipped": self.messages_skipped,
"unknown_messages": self.unknown_messages,
"last_update_received_at_utc": self.last_update_received_at_utc,
"state_quality": self.state_quality(),
"bid_level_count": len(self.bids),
"ask_level_count": len(self.asks),
"best_bid": best_bid,
"best_ask": best_ask,
"spread": spread,
"top_bids": [{"price": price, "size": dec_to_str(size)} for price, size in bids[:top_n]],
"top_asks": [{"price": price, "size": dec_to_str(size)} for price, size in asks[:top_n]],
"event_counters": {
"book": self.book_message_count,
"price_change": self.price_change_count,
"best_bid_ask": self.best_bid_ask_count,
"last_trade_price": self.last_trade_price_count,
},
"warnings": self.warnings,
}
def state_quality(self) -> str:
if not self.initialized:
return "insufficient_events"
if self.price_change_count > 0:
return "initialized_and_updated"
return "snapshot_only"
def utc_now() -> dt.datetime:
return dt.datetime.now(dt.UTC)
def iso_z(value: dt.datetime | None = None) -> str:
value = value or utc_now()
return value.astimezone(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
def parse_iso(value: str | None) -> dt.datetime | None:
if not value:
return None
text = value[:-1] + "+00:00" if value.endswith("Z") else value
try:
parsed = dt.datetime.fromisoformat(text)
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=dt.UTC)
return parsed.astimezone(dt.UTC)
def sha256_file(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def dec(value: Any) -> Decimal:
if value is None:
return Decimal("0")
try:
return Decimal(str(value))
except InvalidOperation:
return Decimal("0")
def dec_to_str(value: Decimal) -> str:
text = format(value, "f")
if "." in text:
text = text.rstrip("0").rstrip(".")
return text or "0"
def level_map(levels: Any) -> dict[str, Decimal]:
result: dict[str, Decimal] = {}
if not isinstance(levels, list):
return result
for item in levels:
if not isinstance(item, dict):
continue
price = str(item.get("price"))
size = dec(item.get("size"))
result[dec_to_str(dec(price))] = size
return result
def classify_item(item: Any) -> str:
if not isinstance(item, dict):
return type(item).__name__
event_type = item.get("event_type")
if event_type:
return str(event_type)
if {"market", "asset_id", "bids", "asks", "timestamp"}.issubset(item.keys()):
return "book_without_event_type"
return "unknown_object"
def raw_event_items(row: dict[str, Any]) -> list[dict[str, Any]]:
payload = row.get("json")
items = payload if isinstance(payload, list) else [payload]
return [item for item in items if isinstance(item, dict)]
def apply_book_item(state: BookState, item: dict[str, Any], received_at_utc: str) -> None:
state.bids = level_map(item.get("bids"))
state.asks = level_map(item.get("asks"))
state.initialized = True
state.messages_applied += 1
state.book_message_count += 1
state.last_update_received_at_utc = received_at_utc
def apply_price_change(state_by_token: dict[str, BookState], item: dict[str, Any], received_at_utc: str, warnings: list[str]) -> None:
changes = item.get("price_changes")
if not isinstance(changes, list):
warnings.append("price_change event without price_changes list")
return
for change in changes:
if not isinstance(change, dict):
continue
token_id = str(change.get("asset_id") or "")
state = state_by_token.get(token_id)
if state is None:
continue
if not state.initialized:
state.messages_skipped += 1
state.warnings.append("price_change skipped before initial book snapshot")
continue
side = str(change.get("side") or "").upper()
price = dec_to_str(dec(change.get("price")))
size = dec(change.get("size"))
if side == "BUY":
book_side = state.bids
elif side == "SELL":
book_side = state.asks
else:
state.messages_skipped += 1
state.warnings.append(f"unsupported price_change side {side!r}")
continue
if size == 0:
book_side.pop(price, None)
else:
book_side[price] = size
state.messages_applied += 1
state.price_change_count += 1
state.last_update_received_at_utc = received_at_utc
def apply_ws_row(
row: dict[str, Any],
state_by_token: dict[str, BookState],
event_type_counts: dict[str, int],
unsupported_counts: dict[str, int],
warnings: list[str],
) -> None:
received_at_utc = row.get("received_at_utc")
for item in raw_event_items(row):
event_type = classify_item(item)
event_type_counts[event_type] = event_type_counts.get(event_type, 0) + 1
if event_type in {"book", "book_without_event_type"}:
token_id = str(item.get("asset_id") or "")
state = state_by_token.get(token_id)
if state is None:
unsupported_counts["book_for_untracked_token"] = unsupported_counts.get("book_for_untracked_token", 0) + 1
continue
apply_book_item(state, item, received_at_utc)
elif event_type == "price_change":
apply_price_change(state_by_token, item, received_at_utc, warnings)
elif event_type == "best_bid_ask":
token_id = str(item.get("asset_id") or "")
state = state_by_token.get(token_id)
if state:
state.best_bid_ask_count += 1
state.messages_skipped += 1
unsupported_counts[event_type] = unsupported_counts.get(event_type, 0) + 1
elif event_type == "last_trade_price":
token_id = str(item.get("asset_id") or "")
state = state_by_token.get(token_id)
if state:
state.last_trade_price_count += 1
state.messages_skipped += 1
unsupported_counts[event_type] = unsupported_counts.get(event_type, 0) + 1
else:
unsupported_counts[event_type] = unsupported_counts.get(event_type, 0) + 1
for state in state_by_token.values():
state.unknown_messages += 1
def top_levels(book: dict[str, Decimal], side: str, top_n: int) -> list[tuple[str, Decimal]]:
reverse = side == "bids"
return sorted(book.items(), key=lambda item: Decimal(item[0]), reverse=reverse)[:top_n]
def rest_book_from_item(item: dict[str, Any]) -> dict[str, Any]:
bids = level_map(item.get("bids"))
asks = level_map(item.get("asks"))
return {"bids": bids, "asks": asks}
def book_summary_from_maps(bids: dict[str, Decimal], asks: dict[str, Decimal], top_n: int) -> dict[str, Any]:
bid_levels = top_levels(bids, "bids", top_n)
ask_levels = top_levels(asks, "asks", top_n)
best_bid = bid_levels[0][0] if bid_levels else None
best_ask = ask_levels[0][0] if ask_levels else None
spread = dec_to_str(Decimal(best_ask) - Decimal(best_bid)) if best_bid and best_ask else None
return {
"best_bid": best_bid,
"best_ask": best_ask,
"spread": spread,
"bid_level_count": len(bids),
"ask_level_count": len(asks),
"top_bids": [{"price": price, "size": dec_to_str(size)} for price, size in bid_levels],
"top_asks": [{"price": price, "size": dec_to_str(size)} for price, size in ask_levels],
}
def compare_side(local: dict[str, Decimal], rest: dict[str, Decimal], side: str, top_n: int) -> dict[str, Any]:
local_top = dict(top_levels(local, side, top_n))
rest_top = dict(top_levels(rest, side, top_n))
missing = sorted(set(rest_top) - set(local_top), key=Decimal, reverse=(side == "bids"))
extra = sorted(set(local_top) - set(rest_top), key=Decimal, reverse=(side == "bids"))
size_deltas = []
for price in sorted(set(local_top) & set(rest_top), key=Decimal, reverse=(side == "bids")):
delta = local_top[price] - rest_top[price]
if delta != 0:
size_deltas.append({"price": price, "local_size": dec_to_str(local_top[price]), "rest_size": dec_to_str(rest_top[price]), "delta": dec_to_str(delta)})
return {"missing_prices": missing, "extra_prices": extra, "size_deltas": size_deltas}
def compare_books(state: BookState, rest_item: dict[str, Any], top_n: int) -> dict[str, Any]:
rest_maps = rest_book_from_item(rest_item)
local_summary = book_summary_from_maps(state.bids, state.asks, top_n)
rest_summary = book_summary_from_maps(rest_maps["bids"], rest_maps["asks"], top_n)
bid_cmp = compare_side(state.bids, rest_maps["bids"], "bids", top_n)
ask_cmp = compare_side(state.asks, rest_maps["asks"], "asks", top_n)
best_match = local_summary["best_bid"] == rest_summary["best_bid"] and local_summary["best_ask"] == rest_summary["best_ask"]
top_match = not bid_cmp["missing_prices"] and not bid_cmp["extra_prices"] and not bid_cmp["size_deltas"] and not ask_cmp["missing_prices"] and not ask_cmp["extra_prices"] and not ask_cmp["size_deltas"]
return {
"comparison_status": "match" if best_match and top_match else "divergent",
"best_bid_match": local_summary["best_bid"] == rest_summary["best_bid"],
"best_ask_match": local_summary["best_ask"] == rest_summary["best_ask"],
"spread_match": local_summary["spread"] == rest_summary["spread"],
"level_count_match": local_summary["bid_level_count"] == rest_summary["bid_level_count"] and local_summary["ask_level_count"] == rest_summary["ask_level_count"],
"local": local_summary,
"rest": rest_summary,
"bid_top_n_diff": bid_cmp,
"ask_top_n_diff": ask_cmp,
}
def read_gzip_jsonl(path: Path) -> list[tuple[int, dict[str, Any]]]:
rows: list[tuple[int, dict[str, Any]]] = []
with gzip.open(path, "rt", encoding="utf-8") as handle:
for line_number, line in enumerate(handle, 1):
if line.strip():
rows.append((line_number, json.loads(line)))
return rows
def write_gzip_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with gzip.open(path, "wt", encoding="utf-8") as handle:
for row in rows:
handle.write(json.dumps(row, separators=(",", ":"), sort_keys=True) + "\n")
def summarize_file(path: Path, rows: int, kind: str) -> dict[str, Any]:
return {"path": path.as_posix(), "kind": kind, "bytes": path.stat().st_size, "rows": rows, "sha256": sha256_file(path), "status": "valid" if path.exists() and path.stat().st_size else "missing_or_empty"}
def write_schema_docs(path: Path, schema_summary: dict[str, Any]) -> None:
lines = [
"# Polymarket Websocket Schema Observed In Checkpoint 10B",
"",
"This document summarizes observed public market websocket message shapes from the bounded 10B BTC sample. It does not include full raw payload dumps; raw payloads remain in the gzip JSONL sample files.",
"",
"## Observed Event Types",
"",
]
for event_type, info in sorted(schema_summary.items()):
lines.extend([
f"### {event_type}",
"",
f"Count: `{info['count']}`",
"",
f"Observed top-level fields: `{', '.join(info['fields'])}`",
"",
])
if info.get("level_fields"):
lines.extend([f"Nested level/change fields: `{', '.join(info['level_fields'])}`", ""])
lines.append(info.get("notes") or "No additional notes.")
lines.append("")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
def write_reconstruction_doc(path: Path) -> None:
lines = [
"# Book Reconstruction Method",
"",
"Checkpoint 10C reconstructs order-book state from raw Polymarket market websocket messages captured in Checkpoint 10B.",
"",
"## Source Of Truth",
"",
"Raw websocket and REST checkpoint gzip JSONL files are immutable source evidence. Reconstruction outputs are derived and reference the input file paths, line numbers, websocket message sequence spans, and REST checkpoint sequences.",
"",
"## Applied Events",
"",
"- `book` and `book_without_event_type` messages initialize or replace the full per-token bid/ask maps.",
"- `price_change` messages are applied after initialization. Observed `side=BUY` updates bids and `side=SELL` updates asks.",
"- Observed `size=0` is treated as level removal. Non-zero size replaces the level size at that price.",
"- `best_bid_ask`, `last_trade_price`, and unrelated `new_market` messages are preserved and counted but do not mutate the book map.",
"",
"## Comparison",
"",
"For each REST checkpoint, the reconstructor compares REST `/books` payloads with local websocket state after applying all websocket messages received at or before the REST checkpoint receive time. The comparison includes best bid, best ask, spread, bid/ask level counts, and top 10 levels by default.",
"",
"## Limits",
"",
"The sample is short and network timing can produce REST-vs-websocket divergences. Divergence rows include raw websocket and REST references so follow-up can inspect whether differences are timing, feed semantics, or reconstruction defects.",
"",
"## Checkpoint 10C Divergence Result",
"",
"The accepted 10C sample produced 20 REST comparison rows: 8 exact top-10 matches and 12 divergent rows. In every divergent row, best bid, best ask, spread, level counts, and top-N price membership matched. The observed divergences were size-only deltas within shared top-N price levels.",
"",
"Size-only divergence still matters. It can change depth, fillability assumptions, queue-size estimates, and any later answer about whether a hypothetical trade was observable and reproducible from the archived feed.",
"",
"This result is useful evidence for the websocket path, but it is not production readiness. The sample is bounded, the timing relationship between REST checkpoints and websocket delivery is imperfect, and long-running reconnect, stale-feed, rotation, upload, and alert behavior still need their own checkpoint before deployment.",
"",
]
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
def write_report(path: Path, manifest: dict[str, Any]) -> None:
lines = [
"# Checkpoint 10C Book Reconstruction Sample",
"",
f"Status: {manifest['gate_status']} ",
f"Created: {manifest['ended_at_utc']} ",
"Production ready: no ",
"Live Kubernetes collector modified: no",
"",
"## Input",
"",
f"- 10B manifest: `{manifest['input']['manifest_path']}`.",
f"- 10B gate: `{manifest['input']['gate_status']}`.",
f"- Run id: `{manifest['run_id']}`.",
"",
"## Observed Websocket Events",
"",
f"Event type counts: `{json.dumps(manifest['event_type_counts'], sort_keys=True)}`.",
f"Unsupported/non-mutating counts: `{json.dumps(manifest['unsupported_event_counts'], sort_keys=True)}`.",
"",
"## Reconstruction Status",
"",
]
for token_id, status in manifest["token_reconstruction_statuses"].items():
meta = status.get("token", {})
lines.append(
f"- `{token_id}` ({meta.get('market_slug')} {meta.get('outcome')}): `{status['state_quality']}`, initialized `{status['initialized']}`, applied `{status['messages_applied']}`, skipped `{status['messages_skipped']}`, unknown `{status['unknown_messages']}`."
)
lines.extend([
"",
"## REST Comparison",
"",
f"- Comparison rows: `{manifest['comparison_summary']['comparison_count']}`.",
f"- Matches: `{manifest['comparison_summary']['match_count']}`.",
f"- Divergences: `{manifest['comparison_summary']['divergent_count']}`.",
f"- No-state rows: `{manifest['comparison_summary']['no_state_count']}`.",
"",
"Divergence samples are preserved in the machine-readable manifest with raw websocket and REST references.",
"",
"## Output Files",
"",
])
for output in manifest["output_files"]:
lines.append(f"- `{output['path']}`: `{output['kind']}`, rows `{output['rows']}`, bytes `{output['bytes']}`, sha256 `{output['sha256']}`")
lines.extend(["", "## Assumptions And Warnings", ""])
if manifest["warnings"]:
lines.extend(f"- {warning}" for warning in manifest["warnings"])
else:
lines.append("- None.")
lines.extend([
"",
"## Gate",
"",
manifest["gate_status"],
"",
"## Full-Fidelity Readiness Finding",
"",
manifest["readiness_finding"],
"",
"## Strongest Fake Progress Risk",
"",
"A reconstruction script can look correct while silently ignoring unsupported message semantics. This sample records unsupported event counts and comparison divergences with raw references so the next deployment step has audit evidence.",
"",
"## Next Smallest Step",
"",
"If combined 10B/10C passes, plan 10D: convert this proven sample path into a long-running Kubernetes websocket recorder with rotation, reconnect/stale-feed evidence, REST checkpoint recovery, upload cleanup, and migration from REST-only collection.",
"",
])
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
def run_reconstruction(args: argparse.Namespace) -> dict[str, Any]:
started_at_utc = iso_z()
input_manifest_path = args.input_manifest
input_manifest = json.loads(input_manifest_path.read_text(encoding="utf-8"))
if input_manifest.get("gate_status") != "WS_RAW_SAMPLE_PASS":
raise RuntimeError("10C requires 10B gate_status WS_RAW_SAMPLE_PASS")
run_id = input_manifest["run_id"]
ws_file = Path(next(item["path"] for item in input_manifest["output_files"] if item["kind"] == "raw_websocket_messages"))
rest_file = Path(next(item["path"] for item in input_manifest["output_files"] if item["kind"] == "rest_books_checkpoints"))
ws_rows = read_gzip_jsonl(ws_file)
rest_rows = read_gzip_jsonl(rest_file)
token_meta = {token["token_id"]: token for token in input_manifest["tokens_tracked"]}
state_by_token = {token_id: BookState(meta) for token_id, meta in token_meta.items()}
event_type_counts: dict[str, int] = {}
unsupported_counts: dict[str, int] = {}
warnings: list[str] = ["Observed price_change side semantics are assumed as BUY->bids and SELL->asks.", "Observed size=0 is treated as level removal."]
schema_summary: dict[str, dict[str, Any]] = {}
def observe_schema(item: dict[str, Any], event_type: str) -> None:
info = schema_summary.setdefault(event_type, {"count": 0, "fields": set(), "level_fields": set(), "notes": ""})
info["count"] += 1
info["fields"].update(str(k) for k in item.keys())
for key in ("bids", "asks", "price_changes"):
values = item.get(key)
if isinstance(values, list):
for nested in values[:20]:
if isinstance(nested, dict):
info["level_fields"].update(str(k) for k in nested.keys())
notes = {
"book": "Full per-token book snapshot used to initialize or replace local state.",
"book_without_event_type": "Full per-token book snapshot without event_type; treated like book if observed.",
"price_change": "Incremental price/size updates applied after a token has an initialized book.",
"best_bid_ask": "Best quote summary; counted but not applied to level maps.",
"last_trade_price": "Trade print summary; counted but not applied to level maps.",
"new_market": "Market metadata broadcast; preserved and counted but unrelated to selected BTC token state in this sample.",
}
info["notes"] = notes.get(event_type, "Unsupported/unknown shape preserved and counted.")
for _line, row in ws_rows:
for item in raw_event_items(row):
observe_schema(item, classify_item(item))
for info in schema_summary.values():
info["fields"] = sorted(info["fields"])
info["level_fields"] = sorted(info["level_fields"])
comparisons: list[dict[str, Any]] = []
state_snapshots: list[dict[str, Any]] = []
ws_index = 0
first_applied_seq: int | None = None
last_applied_seq: int | None = None
first_applied_line: int | None = None
last_applied_line: int | None = None
last_applied_received_at_utc: str | None = None
rest_sorted = sorted(rest_rows, key=lambda item: parse_iso(item[1].get("received_at_utc")) or dt.datetime.min.replace(tzinfo=dt.UTC))
ws_sorted = sorted(ws_rows, key=lambda item: (parse_iso(item[1].get("received_at_utc")) or dt.datetime.min.replace(tzinfo=dt.UTC), item[1].get("global_message_sequence") or 0))
for rest_line, rest_row in rest_sorted:
rest_time = parse_iso(rest_row.get("received_at_utc"))
while ws_index < len(ws_sorted):
ws_line, ws_row = ws_sorted[ws_index]
ws_time = parse_iso(ws_row.get("received_at_utc"))
if rest_time is not None and ws_time is not None and ws_time > rest_time:
break
apply_ws_row(ws_row, state_by_token, event_type_counts, unsupported_counts, warnings)
seq = ws_row.get("global_message_sequence")
if isinstance(seq, int):
first_applied_seq = seq if first_applied_seq is None else min(first_applied_seq, seq)
last_applied_seq = seq if last_applied_seq is None else max(last_applied_seq, seq)
first_applied_line = ws_line if first_applied_line is None else min(first_applied_line, ws_line)
last_applied_line = ws_line
last_applied_received_at_utc = ws_row.get("received_at_utc")
ws_index += 1
rest_payload = (rest_row.get("response") or {}).get("raw_response_json")
if not isinstance(rest_payload, list):
warnings.append(f"REST checkpoint {rest_row.get('checkpoint_sequence')} payload was not a list")
continue
for rest_item in rest_payload:
if not isinstance(rest_item, dict):
continue
token_id = str(rest_item.get("asset_id") or "")
if token_id not in state_by_token:
continue
state = state_by_token[token_id]
base = {
"run_id": run_id,
"token_id": token_id,
"market": state.token_meta,
"rest_checkpoint_sequence": rest_row.get("checkpoint_sequence"),
"rest_checkpoint_received_at_utc": rest_row.get("received_at_utc"),
"rest_checkpoint_file": rest_file.as_posix(),
"rest_checkpoint_line": rest_line,
"raw_websocket_file": ws_file.as_posix(),
"applied_ws_message_count": ws_index,
"applied_ws_line_span": [first_applied_line, last_applied_line],
"applied_ws_global_sequence_span": [first_applied_seq, last_applied_seq],
"last_applied_ws_line": last_applied_line,
"last_applied_ws_received_at_utc": last_applied_received_at_utc,
"last_local_update_received_at_utc": state.last_update_received_at_utc,
"state_quality": state.state_quality(),
}
if not state.initialized:
comp = {**base, "comparison_status": "no_state", "reason": "token not initialized by preceding websocket book event"}
else:
comp = {**base, **compare_books(state, rest_item, args.top_n)}
comparisons.append(comp)
for token_id, state in state_by_token.items():
state_snapshots.append({
"run_id": run_id,
"snapshot_basis": "after_websocket_messages_preceding_rest_checkpoint",
"rest_checkpoint_sequence": rest_row.get("checkpoint_sequence"),
"rest_checkpoint_received_at_utc": rest_row.get("received_at_utc"),
"raw_websocket_file": ws_file.as_posix(),
"applied_ws_message_count": ws_index,
"applied_ws_line_span": [first_applied_line, last_applied_line],
"applied_ws_global_sequence_span": [first_applied_seq, last_applied_seq],
"last_applied_ws_line": last_applied_line,
"last_applied_ws_received_at_utc": last_applied_received_at_utc,
**state.clone_summary(args.top_n),
})
# Apply remaining websocket messages for final token statuses.
while ws_index < len(ws_sorted):
_ws_line, ws_row = ws_sorted[ws_index]
apply_ws_row(ws_row, state_by_token, event_type_counts, unsupported_counts, warnings)
ws_index += 1
output_dir = args.output_root / "polymarket" / "books" / run_id
comparison_dir = args.output_root / "polymarket" / "comparisons" / run_id
state_file = output_dir / f"polymarket_reconstructed_books_{run_id}.jsonl.gz"
comparison_file = comparison_dir / f"polymarket_rest_comparison_{run_id}.jsonl.gz"
write_gzip_jsonl(state_file, state_snapshots)
write_gzip_jsonl(comparison_file, comparisons)
statuses = {token_id: state.clone_summary(args.top_n) for token_id, state in state_by_token.items()}
market_token_init: dict[str, list[bool]] = {}
for state in state_by_token.values():
market_token_init.setdefault(str(state.token_meta.get("condition_id")), []).append(state.initialized)
any_market_both_initialized = any(len(values) >= 2 and all(values[:2]) for values in market_token_init.values())
match_count = sum(1 for row in comparisons if row.get("comparison_status") == "match")
divergent_count = sum(1 for row in comparisons if row.get("comparison_status") == "divergent")
no_state_count = sum(1 for row in comparisons if row.get("comparison_status") == "no_state")
if not any_market_both_initialized:
gate = "BLOCKED_INSUFFICIENT_WS_EVENTS"
elif not comparisons:
gate = "BLOCKED_REST_COMPARISON"
elif no_state_count == len(comparisons):
gate = "BLOCKED_INSUFFICIENT_WS_EVENTS"
else:
gate = "BOOK_RECONSTRUCTION_SAMPLE_PASS"
if divergent_count:
warnings.append("REST comparison divergences were observed and are preserved with raw references; timing differences are possible in this short live sample.")
readiness_finding = (
"The sample proves that observed websocket `book` snapshots can initialize local state and `price_change` messages can update it offline. REST comparisons executed with raw references; divergences require review before a live websocket recorder replaces REST-only collection."
if gate == "BOOK_RECONSTRUCTION_SAMPLE_PASS"
else "The sample did not prove enough websocket reconstruction behavior for a live recorder migration."
)
manifest = {
"schema_name": "checkpoint_010c_book_reconstruction_sample",
"schema_version": 1,
"checkpoint_id": "10C",
"checkpoint_name": "Offline Book Reconstruction And REST Comparison Sample",
"gate_status": gate,
"production_ready": False,
"live_kubernetes_collector_modified": False,
"reconstructor": {"name": RECONSTRUCTOR_NAME, "version": RECONSTRUCTOR_VERSION},
"started_at_utc": started_at_utc,
"ended_at_utc": iso_z(),
"run_id": run_id,
"top_n": args.top_n,
"input": {
"manifest_path": input_manifest_path.as_posix(),
"manifest_sha256": sha256_file(input_manifest_path),
"gate_status": input_manifest.get("gate_status"),
"raw_files": [
{"path": ws_file.as_posix(), "kind": "raw_websocket_messages", "sha256": sha256_file(ws_file), "expected_sha256": next(item["sha256"] for item in input_manifest["output_files"] if item["kind"] == "raw_websocket_messages")},
{"path": rest_file.as_posix(), "kind": "rest_books_checkpoints", "sha256": sha256_file(rest_file), "expected_sha256": next(item["sha256"] for item in input_manifest["output_files"] if item["kind"] == "rest_books_checkpoints")},
],
},
"output_files": [summarize_file(state_file, len(state_snapshots), "reconstructed_book_state_snapshots"), summarize_file(comparison_file, len(comparisons), "rest_comparison_rows")],
"event_type_counts": dict(sorted(event_type_counts.items())),
"observed_schema_summary": schema_summary,
"unsupported_event_counts": dict(sorted(unsupported_counts.items())),
"token_reconstruction_statuses": statuses,
"comparison_summary": {"comparison_count": len(comparisons), "match_count": match_count, "divergent_count": divergent_count, "no_state_count": no_state_count, "divergence_samples": [row for row in comparisons if row.get("comparison_status") == "divergent"][:10]},
"assumptions": ["BUY price_change updates bids; SELL price_change updates asks.", "size=0 removes a level; non-zero size replaces that price level.", "REST checkpoint comparison uses websocket state after messages received at or before REST checkpoint received_at_utc."],
"warnings": sorted(set(warnings)),
"readiness_finding": readiness_finding,
"strongest_fake_progress_risk": "Ignoring unsupported websocket events or REST divergences would overstate full-fidelity readiness.",
"next_step": "If combined 10B/10C passes, plan 10D long-running Kubernetes websocket recorder with rotation, reconnect/stale-feed evidence, REST checkpoint recovery, upload cleanup, and migration plan.",
}
args.manifest_path.parent.mkdir(parents=True, exist_ok=True)
args.manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
write_schema_docs(args.schema_doc_path, schema_summary)
write_reconstruction_doc(args.reconstruction_doc_path)
write_report(args.report_path, manifest)
return manifest
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Reconstruct Polymarket websocket books and compare to REST checkpoints.")
parser.add_argument("--input-manifest", type=Path, default=DEFAULT_INPUT_MANIFEST)
parser.add_argument("--output-root", type=Path, default=DEFAULT_OUTPUT_ROOT)
parser.add_argument("--manifest-path", type=Path, default=DEFAULT_MANIFEST_PATH)
parser.add_argument("--report-path", type=Path, default=DEFAULT_REPORT_PATH)
parser.add_argument("--schema-doc-path", type=Path, default=DEFAULT_SCHEMA_DOC)
parser.add_argument("--reconstruction-doc-path", type=Path, default=DEFAULT_RECON_DOC)
parser.add_argument("--top-n", type=int, default=10)
return parser.parse_args()
def main() -> int:
args = parse_args()
manifest = run_reconstruction(args)
print(f"RECONSTRUCTION_MANIFEST={args.manifest_path}")
print(f"RECONSTRUCTION_REPORT={args.report_path}")
print(f"RECONSTRUCTION_GATE={manifest['gate_status']}")
return 0 if manifest["gate_status"] in {"BOOK_RECONSTRUCTION_SAMPLE_PASS", "BOOK_RECONSTRUCTION_NEEDS_REVIEW"} else 1
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,912 @@
#!/usr/bin/env python3
"""Bounded raw Polymarket websocket sample recorder with REST checkpoints.
Checkpoint 10B scope: public BTC up/down market data only. This script is a
finite sample recorder. It does not trade, sign, authenticate, or modify the
live Kubernetes collector.
"""
from __future__ import annotations
import argparse
import base64
import datetime as dt
import gzip
import hashlib
import json
import os
import signal
import socket
import ssl
import struct
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
from pathlib import Path
from typing import Any
COLLECTOR_NAME = "polymarket_ws_sample_recorder"
COLLECTOR_VERSION = "0.1.0"
WS_SCHEMA_NAME = "raw_polymarket_market_ws_message"
REST_SCHEMA_NAME = "raw_polymarket_books_checkpoint"
SCHEMA_VERSION = 1
DEFAULT_CONFIG_PATH = Path("config/polymarket_ws_sample.example.yaml")
DEFAULT_DISCOVERY_PATH = Path("data/discovery/polymarket_btc_markets_latest.json")
DEFAULT_OUTPUT_ROOT = Path("data/ws_sample")
DEFAULT_MANIFEST_PATH = Path("data/manifests/checkpoint_010b_ws_raw_sample.json")
DEFAULT_REPORT_PATH = Path("reports/checkpoints/checkpoint_010b_ws_raw_sample.md")
MARKET_WS_URL = "wss://ws-subscriptions-clob.polymarket.com/ws/market"
CLOB_BOOKS_URL = "https://clob.polymarket.com/books"
SAFE_RESPONSE_HEADERS = {
"cache-control",
"cf-cache-status",
"cf-ray",
"content-length",
"content-type",
"date",
"retry-after",
"server",
"x-ratelimit-limit",
"x-ratelimit-remaining",
"x-ratelimit-reset",
"ratelimit-limit",
"ratelimit-remaining",
"ratelimit-reset",
}
STOP_REQUESTED = False
STOP_SIGNAL: str | None = None
def handle_stop(signum: int, _frame: Any) -> None:
global STOP_REQUESTED, STOP_SIGNAL
STOP_REQUESTED = True
STOP_SIGNAL = signal.Signals(signum).name
def utc_now() -> dt.datetime:
return dt.datetime.now(dt.UTC)
def iso_z(value: dt.datetime | None = None) -> str:
value = value or utc_now()
return value.astimezone(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
def compact_timestamp(value: dt.datetime | None = None) -> str:
value = value or utc_now()
return value.astimezone(dt.UTC).strftime("%Y%m%dT%H%M%SZ")
def parse_iso(value: Any) -> dt.datetime | None:
if not isinstance(value, str) or not value.strip():
return None
text = value.strip()
if text.endswith("Z"):
text = text[:-1] + "+00:00"
try:
parsed = dt.datetime.fromisoformat(text)
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=dt.UTC)
return parsed.astimezone(dt.UTC)
def sha256_bytes(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
def sha256_file(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def parse_scalar(value: str) -> Any:
value = value.strip()
if not value:
return ""
if value[0] in {"'", '"'} and value[-1:] == value[0]:
return value[1:-1]
lower = value.lower()
if lower in {"true", "false"}:
return lower == "true"
if lower in {"null", "none"}:
return None
try:
return int(value)
except ValueError:
pass
try:
return float(value)
except ValueError:
return value
def load_flat_yaml(path: Path) -> dict[str, Any]:
"""Parse the flat YAML subset used by this repo's example configs."""
config: dict[str, Any] = {}
if not path.exists():
return config
for line_number, raw_line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1):
line = raw_line.split("#", 1)[0].strip()
if not line:
continue
if ":" not in line:
raise ValueError(f"Unsupported config line {line_number}: {raw_line}")
key, value = line.split(":", 1)
key = key.strip()
if not key:
raise ValueError(f"Missing config key on line {line_number}")
config[key] = parse_scalar(value)
return config
def filter_headers(headers: Any) -> dict[str, str]:
safe: dict[str, str] = {}
for key, value in dict(headers).items():
if key.lower() in SAFE_RESPONSE_HEADERS:
safe[key] = value
return safe
def load_discovery(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def market_is_usable(market: dict[str, Any], now: dt.datetime, safety_seconds: int) -> tuple[bool, list[str]]:
reasons: list[str] = []
if market.get("active") is not True:
reasons.append("not_active")
if market.get("closed") is not False:
reasons.append("closed")
if market.get("accepting_orders") is not True:
reasons.append("not_accepting_orders")
if market.get("enable_order_book") is not True:
reasons.append("order_book_not_enabled")
end_time = parse_iso(market.get("end_time_utc"))
if end_time is None:
reasons.append("missing_end_time")
elif end_time <= now + dt.timedelta(seconds=safety_seconds):
reasons.append("too_close_to_end_or_expired")
tokens = market.get("tokens")
if not isinstance(tokens, list) or len(tokens) < 2:
reasons.append("missing_two_tokens")
else:
outcomes = [token.get("outcome") for token in tokens if isinstance(token, dict)]
token_ids = [token.get("token_id") for token in tokens if isinstance(token, dict)]
if outcomes[:2] != ["Up", "Down"] or not all(token_ids[:2]):
reasons.append("bad_up_down_token_mapping")
return not reasons, reasons
def select_markets(
discovery: dict[str, Any],
*,
market_limit: int,
market_end_safety_seconds: int,
) -> tuple[list[dict[str, Any]], dict[str, int]]:
now = utc_now()
selected: list[dict[str, Any]] = []
rejection_counts: dict[str, int] = {}
for market in discovery.get("normalized_markets") or []:
if not isinstance(market, dict):
rejection_counts["not_object"] = rejection_counts.get("not_object", 0) + 1
continue
usable, reasons = market_is_usable(market, now, market_end_safety_seconds)
if not usable:
for reason in reasons:
rejection_counts[reason] = rejection_counts.get(reason, 0) + 1
continue
selected.append(market)
if len(selected) >= market_limit:
break
return selected, dict(sorted(rejection_counts.items()))
def flatten_tokens(markets: list[dict[str, Any]]) -> list[dict[str, Any]]:
tokens: list[dict[str, Any]] = []
for market in markets:
for token in market.get("tokens", [])[:2]:
if not isinstance(token, dict):
continue
tokens.append(
{
"market_name": market.get("market_name"),
"market_slug": market.get("market_slug"),
"condition_id": market.get("condition_id"),
"token_id": str(token.get("token_id")),
"outcome": token.get("outcome"),
"outcome_index": token.get("outcome_index"),
"market_end_time_utc": market.get("end_time_utc"),
}
)
return tokens
def decode_json_maybe(text: str) -> tuple[Any | None, str | None]:
try:
return json.loads(text), None
except json.JSONDecodeError as exc:
return None, str(exc)
def classify_ws_payload(payload: Any) -> list[str]:
event_types: list[str] = []
items = payload if isinstance(payload, list) else [payload]
for item in items:
if not isinstance(item, dict):
event_types.append(type(item).__name__)
continue
event_type = item.get("event_type")
if event_type:
event_types.append(str(event_type))
elif {"market", "asset_id", "bids", "asks", "timestamp"}.issubset(item.keys()):
event_types.append("book_without_event_type")
else:
event_types.append("unknown_object")
return event_types
def send_ws_frame(sock: ssl.SSLSocket, opcode: int, payload: bytes) -> None:
mask = os.urandom(4)
header = bytearray([0x80 | opcode])
length = len(payload)
if length < 126:
header.append(0x80 | length)
elif length < 65536:
header.append(0x80 | 126)
header.extend(struct.pack("!H", length))
else:
header.append(0x80 | 127)
header.extend(struct.pack("!Q", length))
masked = bytes(byte ^ mask[index % 4] for index, byte in enumerate(payload))
sock.sendall(header + mask + masked)
def read_exact(sock: ssl.SSLSocket, length: int) -> bytes:
data = bytearray()
while len(data) < length:
chunk = sock.recv(length - len(data))
if not chunk:
raise EOFError("websocket connection closed while reading frame")
data.extend(chunk)
return bytes(data)
def read_ws_frame(sock: ssl.SSLSocket) -> tuple[int, bytes]:
first, second = read_exact(sock, 2)
opcode = first & 0x0F
length = second & 0x7F
masked = bool(second & 0x80)
if length == 126:
length = struct.unpack("!H", read_exact(sock, 2))[0]
elif length == 127:
length = struct.unpack("!Q", read_exact(sock, 8))[0]
mask = read_exact(sock, 4) if masked else b""
payload = read_exact(sock, length) if length else b""
if masked:
payload = bytes(byte ^ mask[index % 4] for index, byte in enumerate(payload))
return opcode, payload
def parse_ws_headers(raw_headers: str) -> tuple[str, dict[str, str]]:
lines = raw_headers.split("\r\n")
status_line = lines[0] if lines else ""
headers: dict[str, str] = {}
for line in lines[1:]:
if ":" not in line:
continue
key, value = line.split(":", 1)
headers[key.strip()] = value.strip()
return status_line, filter_headers(headers)
def open_websocket(url: str, timeout_seconds: float) -> tuple[ssl.SSLSocket, dict[str, Any]]:
parsed = urllib.parse.urlparse(url)
host = parsed.hostname
if not host:
raise ValueError("missing websocket host")
port = parsed.port or 443
path = parsed.path or "/"
if parsed.query:
path = f"{path}?{parsed.query}"
raw_sock = socket.create_connection((host, port), timeout=timeout_seconds)
sock = ssl.create_default_context().wrap_socket(raw_sock, server_hostname=host)
sock.settimeout(timeout_seconds)
key = base64.b64encode(os.urandom(16)).decode("ascii")
request = (
f"GET {path} HTTP/1.1\r\n"
f"Host: {host}\r\n"
"Upgrade: websocket\r\n"
"Connection: Upgrade\r\n"
f"Sec-WebSocket-Key: {key}\r\n"
"Sec-WebSocket-Version: 13\r\n"
"User-Agent: orderbooks-checkpoint-10b-ws-sample/0.1.0\r\n"
"\r\n"
)
sock.sendall(request.encode("ascii"))
raw_headers = bytearray()
while b"\r\n\r\n" not in raw_headers:
raw_headers.extend(sock.recv(4096))
if len(raw_headers) > 65536:
raise ValueError("websocket handshake headers exceeded 64 KiB")
header_text = bytes(raw_headers).split(b"\r\n\r\n", 1)[0].decode(
"iso-8859-1", errors="replace"
)
status_line, response_headers = parse_ws_headers(header_text)
if " 101 " not in status_line:
raise ValueError(f"websocket upgrade failed: {status_line}")
return sock, {"status_line": status_line, "headers": response_headers}
def http_post_books(
*,
url: str,
token_ids: list[str],
timeout_seconds: float,
) -> dict[str, Any]:
requested_at_utc = iso_z()
started = time.monotonic()
request_body = [{"token_id": token_id} for token_id in token_ids]
body_bytes = json.dumps(request_body, separators=(",", ":")).encode("utf-8")
status_code: int | None = None
headers: dict[str, str] = {}
response_text = ""
error: str | None = None
try:
request = urllib.request.Request(
url,
data=body_bytes,
headers={
"Accept": "application/json",
"Content-Type": "application/json",
"User-Agent": "orderbooks-checkpoint-10b-ws-sample/0.1.0",
},
method="POST",
)
with urllib.request.urlopen(request, timeout=timeout_seconds) as response:
status_code = response.status
headers = filter_headers(response.headers)
response_text = response.read().decode("utf-8", errors="replace")
except urllib.error.HTTPError as exc:
status_code = exc.code
headers = filter_headers(exc.headers)
response_text = exc.read().decode("utf-8", errors="replace")
error = f"HTTPError: {exc}"
except Exception as exc: # noqa: BLE001 - preserve request failure evidence
error = f"{type(exc).__name__}: {exc}"
parsed_json, json_error = decode_json_maybe(response_text) if response_text else (None, None)
return {
"requested_at_utc": requested_at_utc,
"received_at_utc": iso_z(),
"duration_ms": round((time.monotonic() - started) * 1000, 3),
"request_body": request_body,
"status_code": status_code,
"headers": headers,
"raw_response_json": parsed_json,
"json_error": json_error,
"raw_response_text_sha256": sha256_bytes(response_text.encode("utf-8")),
"raw_response_length_bytes": len(response_text.encode("utf-8")),
"raw_response_text_preview": response_text[:1000] if parsed_json is None else None,
"error": error,
"ok": error is None and status_code is not None and 200 <= status_code < 300 and json_error is None,
}
def write_jsonl(handle: gzip.GzipFile, row: dict[str, Any]) -> None:
handle.write((json.dumps(row, separators=(",", ":"), sort_keys=True) + "\n").encode("utf-8"))
def summarize_output_file(path: Path, rows_written: int, kind: str) -> dict[str, Any]:
return {
"path": path.as_posix(),
"kind": kind,
"status": "valid" if path.exists() and path.stat().st_size > 0 else "missing_or_empty",
"bytes": path.stat().st_size if path.exists() else 0,
"rows": rows_written,
"sha256": sha256_file(path) if path.exists() else None,
}
def config_value(config: dict[str, Any], args: argparse.Namespace, key: str, default: Any) -> Any:
value = getattr(args, key)
if value is not None:
return value
return config.get(key, default)
def build_runtime_config(args: argparse.Namespace) -> dict[str, Any]:
file_config = load_flat_yaml(args.config) if args.config else {}
runtime = {
"config_path": args.config,
"config_sha256": sha256_file(args.config) if args.config and args.config.exists() else None,
"config_snapshot": file_config,
"discovery_path": Path(config_value(file_config, args, "discovery_path", DEFAULT_DISCOVERY_PATH)),
"output_root": Path(config_value(file_config, args, "output_root", DEFAULT_OUTPUT_ROOT)),
"manifest_path": Path(config_value(file_config, args, "manifest_path", DEFAULT_MANIFEST_PATH)),
"report_path": Path(config_value(file_config, args, "report_path", DEFAULT_REPORT_PATH)),
"market_limit": int(config_value(file_config, args, "market_limit", 2)),
"duration_seconds": float(config_value(file_config, args, "duration_seconds", 150.0)),
"rest_checkpoint_interval_seconds": float(config_value(file_config, args, "rest_checkpoint_interval_seconds", 30.0)),
"request_timeout_seconds": float(config_value(file_config, args, "request_timeout_seconds", 15.0)),
"websocket_timeout_seconds": float(config_value(file_config, args, "websocket_timeout_seconds", 15.0)),
"websocket_url": str(config_value(file_config, args, "websocket_url", MARKET_WS_URL)),
"clob_books_url": str(config_value(file_config, args, "clob_books_url", CLOB_BOOKS_URL)),
"max_reconnects": int(config_value(file_config, args, "max_reconnects", 2)),
"reconnect_backoff_seconds": float(config_value(file_config, args, "reconnect_backoff_seconds", 3.0)),
"market_end_safety_seconds": int(config_value(file_config, args, "market_end_safety_seconds", 420)),
}
if runtime["market_limit"] < 1:
raise ValueError("market_limit must be >= 1")
if runtime["duration_seconds"] <= 0:
raise ValueError("duration_seconds must be > 0")
if runtime["rest_checkpoint_interval_seconds"] <= 0:
raise ValueError("rest_checkpoint_interval_seconds must be > 0")
return runtime
def build_ws_envelope(
*,
run_id: str,
session_id: str,
connection_sequence: int,
message_sequence: int,
global_message_sequence: int,
received_at_utc: str,
websocket_url: str,
subscription: dict[str, Any],
tokens: list[dict[str, Any]],
opcode: int,
payload_bytes: bytes,
) -> tuple[dict[str, Any], list[str], bool]:
decode_error = None
try:
raw_text = payload_bytes.decode("utf-8")
except UnicodeDecodeError as exc:
decode_error = str(exc)
raw_text = payload_bytes.decode("utf-8", errors="replace")
parsed_json, json_error = decode_json_maybe(raw_text) if decode_error is None else (None, decode_error)
event_types = classify_ws_payload(parsed_json) if parsed_json is not None else ["unparseable_text"]
envelope = {
"schema_name": WS_SCHEMA_NAME,
"schema_version": SCHEMA_VERSION,
"collector": {"name": COLLECTOR_NAME, "version": COLLECTOR_VERSION},
"run_id": run_id,
"session_id": session_id,
"connection_sequence": connection_sequence,
"message_sequence": message_sequence,
"global_message_sequence": global_message_sequence,
"received_at_utc": received_at_utc,
"websocket": {"url": websocket_url},
"subscription": subscription,
"tokens_tracked": tokens,
"opcode": opcode,
"payload_length_bytes": len(payload_bytes),
"payload_sha256": sha256_bytes(payload_bytes),
"raw_text": raw_text,
"json": parsed_json,
"json_error": json_error,
"classified_event_types": event_types,
}
return envelope, event_types, parsed_json is not None
def write_report(path: Path, manifest: dict[str, Any]) -> None:
lines = [
"# Checkpoint 10B Raw Websocket And REST Checkpoint Sample",
"",
f"Status: {manifest['gate_status']} ",
f"Created: {manifest['ended_at_utc']} ",
"Production ready: no ",
"Live Kubernetes collector modified: no",
"",
"## Scope",
"",
"Bounded public Polymarket BTC up/down websocket sample only. The live REST collector was not changed.",
"",
"## Markets And Tokens",
"",
f"- Markets tracked: `{len(manifest['markets_tracked'])}`.",
f"- Tokens tracked: `{len(manifest['tokens_tracked'])}`.",
f"- Discovery path: `{manifest['discovery']['path']}`.",
"",
"## Websocket Evidence",
"",
f"- URL: `{manifest['websocket']['url']}`.",
f"- Connected sessions: `{manifest['websocket']['connected_session_count']}`.",
f"- Reconnect count: `{manifest['websocket']['reconnect_count']}`.",
f"- Text messages written: `{manifest['websocket']['message_count']}`.",
f"- Parseable JSON messages: `{manifest['websocket']['parsed_json_count']}`.",
f"- Event type counts: `{json.dumps(manifest['websocket']['event_type_counts'], sort_keys=True)}`.",
f"- Opcode counts: `{json.dumps(manifest['websocket']['opcode_counts'], sort_keys=True)}`.",
f"- Maximum seconds between websocket text messages: `{manifest['websocket']['max_seconds_between_text_messages']}`.",
"",
"## REST Checkpoint Evidence",
"",
f"- Endpoint: `{manifest['rest_checkpoints']['url']}`.",
f"- Interval seconds: `{manifest['rest_checkpoints']['interval_seconds']}`.",
f"- Requests: `{manifest['rest_checkpoints']['request_count']}`.",
f"- Successes: `{manifest['rest_checkpoints']['success_count']}`.",
f"- Failures: `{manifest['rest_checkpoints']['failure_count']}`.",
"",
"## Output Files",
"",
]
for output in manifest["output_files"]:
lines.append(
f"- `{output['path']}`: `{output['kind']}`, rows `{output['rows']}`, bytes `{output['bytes']}`, sha256 `{output['sha256']}`"
)
lines.extend(["", "## Warnings", ""])
if manifest["warnings"]:
lines.extend(f"- {warning}" for warning in manifest["warnings"])
else:
lines.append("- None.")
lines.extend(["", "## Errors", ""])
if manifest["errors"]:
lines.extend(f"- `{item.get('stage')}`: {item.get('error')}" for item in manifest["errors"])
else:
lines.append("- None.")
lines.extend(
[
"",
"## Gate",
"",
manifest["gate_status"],
"",
"## Strongest Fake Progress Risk",
"",
"Receiving websocket traffic is not enough by itself; raw payloads must remain preserved and the offline reconstruction must compare against REST checkpoints before this path can inform the live collector design.",
"",
"## Next Smallest Step",
"",
"If this gate is `WS_RAW_SAMPLE_PASS`, run Checkpoint 10C reconstruction from the raw files referenced by this manifest.",
"",
]
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
def run_sample(runtime: dict[str, Any], command: str) -> dict[str, Any]:
signal.signal(signal.SIGINT, handle_stop)
signal.signal(signal.SIGTERM, handle_stop)
started = utc_now()
started_at_utc = iso_z(started)
run_id = compact_timestamp(started)
discovery = load_discovery(runtime["discovery_path"])
markets, rejection_counts = select_markets(
discovery,
market_limit=runtime["market_limit"],
market_end_safety_seconds=runtime["market_end_safety_seconds"],
)
tokens = flatten_tokens(markets)
token_ids = [token["token_id"] for token in tokens]
subscription = {"assets_ids": token_ids, "type": "market", "custom_feature_enabled": True}
ws_dir = runtime["output_root"] / "polymarket" / "ws_raw" / run_id
rest_dir = runtime["output_root"] / "polymarket" / "rest_checkpoints" / run_id
ws_dir.mkdir(parents=True, exist_ok=True)
rest_dir.mkdir(parents=True, exist_ok=True)
ws_file = ws_dir / f"polymarket_ws_raw_{run_id}.jsonl.gz"
rest_file = rest_dir / f"polymarket_rest_checkpoints_{run_id}.jsonl.gz"
warnings: list[str] = []
errors: list[dict[str, Any]] = []
sessions: list[dict[str, Any]] = []
event_type_counts: dict[str, int] = {}
opcode_counts: dict[str, int] = {}
message_count = 0
parsed_json_count = 0
rest_request_count = 0
rest_success_count = 0
rest_failure_count = 0
ws_rows = 0
rest_rows = 0
connected_session_count = 0
reconnect_count = 0
last_text_message_monotonic: float | None = None
max_gap_seconds: float | None = None
if not markets or not tokens:
warnings.append("No usable active BTC markets/tokens were selected from discovery input.")
deadline = time.monotonic() + runtime["duration_seconds"]
next_checkpoint_monotonic = time.monotonic()
connection_sequence = 0
global_message_sequence = 0
def write_rest_checkpoint(rest_handle: gzip.GzipFile) -> None:
nonlocal rest_request_count, rest_success_count, rest_failure_count, rest_rows, next_checkpoint_monotonic
rest_request_count += 1
checkpoint = http_post_books(
url=runtime["clob_books_url"],
token_ids=token_ids,
timeout_seconds=runtime["request_timeout_seconds"],
)
if checkpoint["ok"]:
rest_success_count += 1
else:
rest_failure_count += 1
envelope = {
"schema_name": REST_SCHEMA_NAME,
"schema_version": SCHEMA_VERSION,
"collector": {"name": COLLECTOR_NAME, "version": COLLECTOR_VERSION},
"run_id": run_id,
"checkpoint_sequence": rest_request_count,
"requested_at_utc": checkpoint["requested_at_utc"],
"received_at_utc": checkpoint["received_at_utc"],
"duration_ms": checkpoint["duration_ms"],
"request": {"method": "POST", "url": runtime["clob_books_url"], "token_ids": token_ids, "json_body": checkpoint["request_body"]},
"response": {
"status_code": checkpoint["status_code"],
"headers": checkpoint["headers"],
"raw_response_json": checkpoint["raw_response_json"],
"json_error": checkpoint["json_error"],
"raw_response_text_sha256": checkpoint["raw_response_text_sha256"],
"raw_response_length_bytes": checkpoint["raw_response_length_bytes"],
"raw_response_text_preview": checkpoint["raw_response_text_preview"],
"error": checkpoint["error"],
},
"ok": checkpoint["ok"],
"tokens_tracked": tokens,
}
write_jsonl(rest_handle, envelope)
rest_handle.flush()
rest_rows += 1
next_checkpoint_monotonic += runtime["rest_checkpoint_interval_seconds"]
with gzip.open(ws_file, "wb") as ws_handle, gzip.open(rest_file, "wb") as rest_handle:
if token_ids:
write_rest_checkpoint(rest_handle)
while token_ids and not STOP_REQUESTED and time.monotonic() < deadline:
sock: ssl.SSLSocket | None = None
connection_sequence += 1
session_id = f"{run_id}-ws{connection_sequence}"
session: dict[str, Any] = {
"session_id": session_id,
"connection_sequence": connection_sequence,
"connected_at_utc": None,
"subscribed_at_utc": None,
"closed_at_utc": None,
"handshake": None,
"message_count": 0,
"close_reason": None,
"error": None,
}
sessions.append(session)
try:
sock, handshake = open_websocket(runtime["websocket_url"], runtime["websocket_timeout_seconds"])
connected_session_count += 1
session["connected_at_utc"] = iso_z()
session["handshake"] = handshake
send_ws_frame(sock, 0x1, json.dumps(subscription, separators=(",", ":")).encode("utf-8"))
session["subscribed_at_utc"] = iso_z()
message_sequence = 0
while not STOP_REQUESTED and time.monotonic() < deadline:
while token_ids and time.monotonic() >= next_checkpoint_monotonic and time.monotonic() < deadline:
write_rest_checkpoint(rest_handle)
timeout = min(1.0, max(0.1, deadline - time.monotonic()))
if next_checkpoint_monotonic > time.monotonic():
timeout = min(timeout, max(0.1, next_checkpoint_monotonic - time.monotonic()))
sock.settimeout(timeout)
try:
opcode, payload = read_ws_frame(sock)
except socket.timeout:
continue
opcode_key = str(opcode)
opcode_counts[opcode_key] = opcode_counts.get(opcode_key, 0) + 1
if opcode == 0x8:
session["close_reason"] = "close_frame"
session["closed_at_utc"] = iso_z()
break
if opcode == 0x9:
send_ws_frame(sock, 0xA, payload)
continue
if opcode != 0x1:
continue
received_at_utc = iso_z()
now_mono = time.monotonic()
if last_text_message_monotonic is not None:
gap = now_mono - last_text_message_monotonic
max_gap_seconds = gap if max_gap_seconds is None else max(max_gap_seconds, gap)
last_text_message_monotonic = now_mono
message_sequence += 1
global_message_sequence += 1
message_count += 1
session["message_count"] += 1
envelope, event_types, parsed_ok = build_ws_envelope(
run_id=run_id,
session_id=session_id,
connection_sequence=connection_sequence,
message_sequence=message_sequence,
global_message_sequence=global_message_sequence,
received_at_utc=received_at_utc,
websocket_url=runtime["websocket_url"],
subscription=subscription,
tokens=tokens,
opcode=opcode,
payload_bytes=payload,
)
if parsed_ok:
parsed_json_count += 1
for event_type in event_types:
event_type_counts[event_type] = event_type_counts.get(event_type, 0) + 1
write_jsonl(ws_handle, envelope)
ws_handle.flush()
ws_rows += 1
if session.get("close_reason") == "close_frame":
reconnect_count += 1
if reconnect_count > runtime["max_reconnects"]:
warnings.append("Maximum reconnect count reached after websocket close frame.")
break
time.sleep(runtime["reconnect_backoff_seconds"])
continue
break
except Exception as exc: # noqa: BLE001 - preserve websocket failure evidence
session["error"] = f"{type(exc).__name__}: {exc}"
session["closed_at_utc"] = iso_z()
errors.append({"stage": "websocket", "session_id": session_id, "error": session["error"]})
reconnect_count += 1
if reconnect_count > runtime["max_reconnects"]:
break
time.sleep(runtime["reconnect_backoff_seconds"])
finally:
if sock is not None:
try:
send_ws_frame(sock, 0x8, b"")
except Exception:
pass
try:
sock.close()
except Exception:
pass
session["closed_at_utc"] = session.get("closed_at_utc") or iso_z()
while token_ids and time.monotonic() >= next_checkpoint_monotonic and rest_request_count < 2:
write_rest_checkpoint(rest_handle)
ended = utc_now()
ended_at_utc = iso_z(ended)
if STOP_REQUESTED:
warnings.append(f"Stop requested by {STOP_SIGNAL}.")
max_gap_seconds_value = None if max_gap_seconds is None and message_count <= 1 else round(max_gap_seconds or 0.0, 3)
output_files = [
summarize_output_file(ws_file, ws_rows, "raw_websocket_messages"),
summarize_output_file(rest_file, rest_rows, "rest_books_checkpoints"),
]
if not markets or not tokens:
gate_status = "BLOCKED_DISCOVERY"
elif connected_session_count == 0:
gate_status = "BLOCKED_WS_CONNECTIVITY"
elif rest_success_count < 2:
gate_status = "BLOCKED_REST_CHECKPOINTS"
elif message_count >= 1 and parsed_json_count >= 1:
gate_status = "WS_RAW_SAMPLE_PASS"
else:
gate_status = "WS_RAW_SAMPLE_NEEDS_REVIEW"
warnings.append("Websocket connected/subscribed but did not produce at least one parseable text message.")
manifest = {
"schema_name": "checkpoint_010b_ws_raw_sample",
"schema_version": 1,
"checkpoint_id": "10B",
"checkpoint_name": "Raw Websocket And REST Checkpoint Sample",
"gate_status": gate_status,
"production_ready": False,
"live_kubernetes_collector_modified": False,
"collector": {"name": COLLECTOR_NAME, "version": COLLECTOR_VERSION},
"command": command,
"run_id": run_id,
"started_at_utc": started_at_utc,
"ended_at_utc": ended_at_utc,
"configured_duration_seconds": runtime["duration_seconds"],
"actual_duration_seconds": round((ended - started).total_seconds(), 3),
"config": {
"path": runtime["config_path"].as_posix() if runtime["config_path"] else None,
"sha256": runtime["config_sha256"],
"snapshot": runtime["config_snapshot"],
"effective": {
"discovery_path": runtime["discovery_path"].as_posix(),
"output_root": runtime["output_root"].as_posix(),
"manifest_path": runtime["manifest_path"].as_posix(),
"report_path": runtime["report_path"].as_posix(),
"market_limit": runtime["market_limit"],
"duration_seconds": runtime["duration_seconds"],
"rest_checkpoint_interval_seconds": runtime["rest_checkpoint_interval_seconds"],
"request_timeout_seconds": runtime["request_timeout_seconds"],
"websocket_timeout_seconds": runtime["websocket_timeout_seconds"],
"websocket_url": runtime["websocket_url"],
"clob_books_url": runtime["clob_books_url"],
"max_reconnects": runtime["max_reconnects"],
"reconnect_backoff_seconds": runtime["reconnect_backoff_seconds"],
"market_end_safety_seconds": runtime["market_end_safety_seconds"],
},
},
"discovery": {
"path": runtime["discovery_path"].as_posix(),
"sha256": sha256_file(runtime["discovery_path"]) if runtime["discovery_path"].exists() else None,
"fetched_at_utc": discovery.get("fetched_at_utc"),
"summary": discovery.get("summary"),
"rejection_counts_before_selection": rejection_counts,
},
"markets_tracked": [
{"market_slug": market.get("market_slug"), "condition_id": market.get("condition_id"), "end_time_utc": market.get("end_time_utc")}
for market in markets
],
"tokens_tracked": tokens,
"websocket": {
"url": runtime["websocket_url"],
"subscription": subscription,
"sessions": sessions,
"connected_session_count": connected_session_count,
"reconnect_count": reconnect_count,
"message_count": message_count,
"parsed_json_count": parsed_json_count,
"event_type_counts": dict(sorted(event_type_counts.items())),
"opcode_counts": dict(sorted(opcode_counts.items())),
"max_seconds_between_text_messages": max_gap_seconds_value,
},
"rest_checkpoints": {
"url": runtime["clob_books_url"],
"interval_seconds": runtime["rest_checkpoint_interval_seconds"],
"request_count": rest_request_count,
"success_count": rest_success_count,
"failure_count": rest_failure_count,
},
"output_files": output_files,
"warnings": warnings,
"errors": errors,
"strongest_fake_progress_risk": "Websocket traffic without offline reconstruction and REST comparison is only raw-source evidence, not proof of full-fidelity correctness.",
"next_step": "Run Checkpoint 10C reconstruction from this manifest if gate_status is WS_RAW_SAMPLE_PASS.",
}
runtime["manifest_path"].parent.mkdir(parents=True, exist_ok=True)
runtime["manifest_path"].write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
write_report(runtime["report_path"], manifest)
return manifest
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Record a bounded Polymarket BTC websocket sample plus REST checkpoints.")
parser.add_argument("--config", type=Path, default=DEFAULT_CONFIG_PATH)
parser.add_argument("--discovery-path", type=Path, default=None)
parser.add_argument("--output-root", type=Path, default=None)
parser.add_argument("--manifest-path", type=Path, default=None)
parser.add_argument("--report-path", type=Path, default=None)
parser.add_argument("--market-limit", type=int, default=None)
parser.add_argument("--duration-seconds", type=float, default=None)
parser.add_argument("--rest-checkpoint-interval-seconds", type=float, default=None)
parser.add_argument("--request-timeout-seconds", type=float, default=None)
parser.add_argument("--websocket-timeout-seconds", type=float, default=None)
parser.add_argument("--websocket-url", type=str, default=None)
parser.add_argument("--clob-books-url", type=str, default=None)
parser.add_argument("--max-reconnects", type=int, default=None)
parser.add_argument("--reconnect-backoff-seconds", type=float, default=None)
parser.add_argument("--market-end-safety-seconds", type=int, default=None)
return parser.parse_args()
def main() -> int:
args = parse_args()
runtime = build_runtime_config(args)
manifest = run_sample(runtime, " ".join(sys.argv))
print(f"WS_SAMPLE_MANIFEST={runtime['manifest_path']}")
print(f"WS_SAMPLE_REPORT={runtime['report_path']}")
print(f"WS_SAMPLE_GATE={manifest['gate_status']}")
return 0 if manifest["gate_status"] in {"WS_RAW_SAMPLE_PASS", "WS_RAW_SAMPLE_NEEDS_REVIEW"} else 1
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -euo pipefail
APP_DIR="${ORDERBOOKS_APP_DIR:-/app}"
CONFIG_PATH="${ORDERBOOKS_WS_COLLECTOR_CONFIG:-/etc/orderbooks/polymarket_ws_collector.yaml}"
PYTHON_BIN="${ORDERBOOKS_PYTHON:-python3}"
cd "${APP_DIR}"
args=(scripts/collect_polymarket_ws_orderbooks.py --config "${CONFIG_PATH}")
if [[ -n "${ORDERBOOKS_WS_DURATION_SECONDS:-}" ]]; then
args+=(--duration-seconds "${ORDERBOOKS_WS_DURATION_SECONDS}")
fi
if [[ -n "${ORDERBOOKS_WS_MARKET_LIMIT:-}" ]]; then
args+=(--market-limit "${ORDERBOOKS_WS_MARKET_LIMIT}")
fi
echo "orderbooks websocket recorder starting at $(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "config path: ${CONFIG_PATH}"
exec "${PYTHON_BIN}" "${args[@]}"

View file

@ -188,6 +188,17 @@ for root, kind in [(raw_dir, "raw"), (source_manifest_dir, "manifest")]:
continue
seen.add(resolved)
rel = rel_for(path)
if path.name.startswith('.') or path.suffix in {'.open', '.tmp', '.partial'} or path.name.endswith(('.open', '.tmp', '.partial')):
skipped.append({
"local_path": str(path),
"relative_path": rel,
"kind": kind,
"bytes": path.stat().st_size,
"mtime_utc": iso_z_from_ts(path.stat().st_mtime),
"age_seconds": max(0, int(now.timestamp() - path.stat().st_mtime)),
"reason": "open_or_temporary_file",
})
continue
stat = path.stat()
age_seconds = max(0, int(now.timestamp() - stat.st_mtime))
base = {