Implement runtime health sentinel and angry dashboard
All checks were successful
deploy / deploy (push) Successful in 26s
All checks were successful
deploy / deploy (push) Successful in 26s
Proof: Runtime health sentinel, alert routing, and anomaly detection for stale/disconnected quote truth, truthful dashboard severity, webhook notifications, and safe executor containment. Assumptions: Existing control APIs remain the service-local truth surface; external notification stays as a generic webhook sink; executor disarm is an allowed non-fund-moving containment action; current dashboard/operator files in the worktree belong to this turn and are intended to ship together. Still fake: No live external receiver is configured; webhook delivery is implemented but unverified end-to-end in production; cluster rollout still depends on deploying the new image; no automatic deployment restart path was added.
This commit is contained in:
parent
af74c48f29
commit
0b7e5e2e6c
18 changed files with 4029 additions and 10 deletions
|
|
@ -95,3 +95,5 @@ OPS_SENTINEL_PRICE_STALE_MS=30000
|
|||
OPS_SENTINEL_INVENTORY_STALE_MS=30000
|
||||
OPS_SENTINEL_FUNDING_CREDIT_PENDING_MS=300000
|
||||
OPS_SENTINEL_FUNDING_STUCK_MS=3600000
|
||||
OPS_SENTINEL_ALERT_WEBHOOK_URL=
|
||||
OPS_SENTINEL_ALERT_WEBHOOK_TIMEOUT_MS=5000
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ const topics = [
|
|||
const portfolioMetricTopics = new Set([
|
||||
config.kafkaTopicRefMarketPrice,
|
||||
config.kafkaTopicStateIntentInventory,
|
||||
config.kafkaTopicOpsLiquidityAction,
|
||||
config.kafkaTopicCmdExecuteTrade,
|
||||
config.kafkaTopicExecTradeResult,
|
||||
]);
|
||||
|
|
@ -141,11 +142,28 @@ const controlApi = startControlApi({
|
|||
stateProvider: {
|
||||
async getState() {
|
||||
const connectivity = await pool.query('SELECT 1').then(() => true).catch(() => false);
|
||||
return {
|
||||
...state,
|
||||
database_connectivity: connectivity,
|
||||
};
|
||||
},
|
||||
return {
|
||||
...state,
|
||||
database_connectivity: connectivity,
|
||||
};
|
||||
},
|
||||
},
|
||||
healthProvider: {
|
||||
async getHealth() {
|
||||
const connectivity = await pool.query('SELECT 1').then(() => true).catch(() => false);
|
||||
const lastTruthAt = state.last_write_at || state.last_metrics_at || null;
|
||||
const freshnessAgeMs = lastTruthAt ? Date.now() - new Date(lastTruthAt).getTime() : null;
|
||||
return {
|
||||
ok: connectivity && (freshnessAgeMs == null || freshnessAgeMs <= config.opsSentinelHistoryWriterStaleMs),
|
||||
paused: state.paused,
|
||||
last_write_at: state.last_write_at,
|
||||
last_alert_write_at: state.last_alert_write_at,
|
||||
last_metrics_at: state.last_metrics_at,
|
||||
freshness_age_ms: Number.isFinite(freshnessAgeMs) ? Math.max(0, freshnessAgeMs) : null,
|
||||
database_connectivity: connectivity,
|
||||
last_error: state.last_error,
|
||||
};
|
||||
},
|
||||
},
|
||||
routes: [
|
||||
{
|
||||
|
|
@ -198,11 +216,15 @@ const controlApi = startControlApi({
|
|||
});
|
||||
|
||||
async function refreshPortfolioMetrics() {
|
||||
const inputs = await loadPortfolioMetricInputs(pool);
|
||||
const inputs = await loadPortfolioMetricInputs(pool, {
|
||||
btcAsset: config.tradingBtc,
|
||||
eureAsset: config.tradingEure,
|
||||
});
|
||||
const payload = computePortfolioMetric({
|
||||
baseline: inputs.baseline,
|
||||
currentInventory: inputs.currentInventory?.payload,
|
||||
currentPrice: inputs.currentPrice?.payload,
|
||||
externalFlows: inputs.externalFlows || [],
|
||||
btcAsset: config.tradingBtc,
|
||||
eureAsset: config.tradingEure,
|
||||
commandCount: inputs.commandCount,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import { createLogger } from '../core/log.mjs';
|
|||
import { createPairFilterController } from '../core/pair-filter.mjs';
|
||||
import { loadConfig } from '../lib/config.mjs';
|
||||
import { startNearIntentsWs } from '../venues/near-intents/ws.mjs';
|
||||
import { ageMs } from '../core/runtime-health.mjs';
|
||||
|
||||
const config = loadConfig();
|
||||
const logger = createLogger({
|
||||
|
|
@ -73,7 +74,38 @@ const controlApi = config.nearIntentsControlApiEnabled
|
|||
};
|
||||
},
|
||||
},
|
||||
healthProvider: {
|
||||
getHealth() {
|
||||
const ingest = wsRuntime.getState();
|
||||
const lastTruthAt = ingest.last_published_at || ingest.last_matching_quote_at || ingest.last_message_at;
|
||||
const freshnessAgeMs = ageMs(lastTruthAt);
|
||||
const staleAfterMs = config.opsSentinelIngestQuoteStaleMs;
|
||||
return {
|
||||
ok: Boolean(ingest.connected) && (freshnessAgeMs == null || freshnessAgeMs <= staleAfterMs),
|
||||
connected: ingest.connected,
|
||||
last_message_at: ingest.last_message_at,
|
||||
last_matching_quote_at: ingest.last_matching_quote_at,
|
||||
last_published_at: ingest.last_published_at,
|
||||
freshness_age_ms: freshnessAgeMs,
|
||||
stale_after_ms: staleAfterMs,
|
||||
reason:
|
||||
ingest.connected
|
||||
? freshnessAgeMs != null && freshnessAgeMs > staleAfterMs
|
||||
? 'quote truth stale'
|
||||
: null
|
||||
: 'websocket disconnected',
|
||||
};
|
||||
},
|
||||
},
|
||||
routes: [
|
||||
{
|
||||
method: 'POST',
|
||||
path: '/reconnect',
|
||||
handler: () => {
|
||||
wsRuntime.reconnect();
|
||||
return { ok: true, reconnecting: true };
|
||||
},
|
||||
},
|
||||
{
|
||||
method: 'GET',
|
||||
path: '/pair-filter',
|
||||
|
|
|
|||
617
src/apps/operator-dashboard.mjs
Normal file
617
src/apps/operator-dashboard.mjs
Normal file
|
|
@ -0,0 +1,617 @@
|
|||
import http from 'node:http';
|
||||
import process from 'node:process';
|
||||
import { readdir, readFile } from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
|
||||
import { WebSocketServer } from 'ws';
|
||||
|
||||
import { createConsumer } from '../bus/kafka/consumer.mjs';
|
||||
import { parseEventMessage } from '../core/event-envelope.mjs';
|
||||
import {
|
||||
applyDashboardLiveEvent,
|
||||
buildDashboardBootstrap,
|
||||
buildLiveStatusBar,
|
||||
createDashboardLiveState,
|
||||
listDashboardServices,
|
||||
resolveDashboardControl,
|
||||
} from '../core/operator-dashboard.mjs';
|
||||
import {
|
||||
buildDashboardAuthChallengeHeader,
|
||||
buildDashboardSessionCookie,
|
||||
resolveDashboardRequestAuth,
|
||||
} from '../core/operator-dashboard-auth.mjs';
|
||||
import { createLogger, serializeError } from '../core/log.mjs';
|
||||
import { readJsonBody, sendJson } from '../core/control-api.mjs';
|
||||
import { loadConfig } from '../lib/config.mjs';
|
||||
import { fetchJson } from '../lib/http.mjs';
|
||||
import {
|
||||
createPostgresPool,
|
||||
loadCurrentFundingObservations,
|
||||
loadLatestInventorySnapshot,
|
||||
loadLatestMarketPrice,
|
||||
loadLatestPortfolioMetric,
|
||||
loadRecentAlertTransitions,
|
||||
loadRecentDepositStatuses,
|
||||
loadRecentTradeDecisions,
|
||||
loadRecentQuotes,
|
||||
loadSuccessfulTradeSummary,
|
||||
loadSuccessfulTradesPage,
|
||||
} from '../lib/postgres.mjs';
|
||||
|
||||
const config = loadConfig();
|
||||
const logger = createLogger({
|
||||
service: 'operator-dashboard',
|
||||
component: 'dashboard',
|
||||
namespace: config.projectNamespace,
|
||||
});
|
||||
|
||||
const dashboardRuntimeState = {
|
||||
last_bootstrap_at: null,
|
||||
last_bootstrap_error: null,
|
||||
source_errors: {},
|
||||
last_source_error_at: null,
|
||||
last_live_event_error: null,
|
||||
websocket_clients: 0,
|
||||
};
|
||||
|
||||
if (
|
||||
config.operatorDashboardAuthMode === 'basic'
|
||||
&& (!config.operatorDashboardAuthUsername || !config.operatorDashboardAuthPassword)
|
||||
) {
|
||||
logger.error('dashboard_basic_auth_config_missing', {
|
||||
details: {
|
||||
auth_mode: config.operatorDashboardAuthMode,
|
||||
},
|
||||
});
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const pool = createPostgresPool({
|
||||
connectionString: config.postgresUrl,
|
||||
});
|
||||
|
||||
const staticAssets = await loadStaticAssets();
|
||||
const initialServiceSnapshots = await loadServiceSnapshots();
|
||||
const initialRecentQuotes = await safeSourceLoad(
|
||||
'recent_quotes',
|
||||
() => loadRecentQuotes(pool, {
|
||||
limit: config.operatorDashboardQuoteLimit,
|
||||
}),
|
||||
[],
|
||||
);
|
||||
const initialSuccessfulTradeSummary = await safeSourceLoad(
|
||||
'successful_trade_summary',
|
||||
() => loadSuccessfulTradeSummary(pool),
|
||||
{ total: 0, last_successful_trade_at: null },
|
||||
);
|
||||
const initialMarketPrice = await safeSourceLoad(
|
||||
'latest_market_price',
|
||||
() => loadLatestMarketPrice(pool),
|
||||
null,
|
||||
);
|
||||
const initialInventory = await safeSourceLoad(
|
||||
'latest_inventory',
|
||||
() => loadLatestInventorySnapshot(pool),
|
||||
null,
|
||||
);
|
||||
|
||||
const liveState = createDashboardLiveState({
|
||||
config,
|
||||
recentQuotes: initialRecentQuotes,
|
||||
latestMarketPrice: initialMarketPrice,
|
||||
latestInventory: initialInventory,
|
||||
successfulTradeCount: initialSuccessfulTradeSummary.total,
|
||||
lastSuccessfulTradeAt: initialSuccessfulTradeSummary.last_successful_trade_at,
|
||||
activeAlerts:
|
||||
initialServiceSnapshots.find((snapshot) => snapshot.service === 'ops-sentinel')?.state?.active_alerts
|
||||
|| [],
|
||||
});
|
||||
|
||||
const liveConsumer = await createConsumer({
|
||||
groupId: config.kafkaConsumerGroupOperatorDashboard,
|
||||
brokers: config.kafkaBrokers,
|
||||
clientId: config.kafkaClientId,
|
||||
logger,
|
||||
});
|
||||
|
||||
const liveTopics = [
|
||||
config.kafkaTopicNormSwapDemand,
|
||||
config.kafkaTopicRefMarketPrice,
|
||||
config.kafkaTopicStateIntentInventory,
|
||||
config.kafkaTopicOpsAlert,
|
||||
config.kafkaTopicExecTradeResult,
|
||||
];
|
||||
|
||||
for (const topic of liveTopics) {
|
||||
await liveConsumer.subscribe({ topic, fromBeginning: false });
|
||||
}
|
||||
|
||||
await liveConsumer.run({
|
||||
eachMessage: async ({ topic, message }) => {
|
||||
if (!message.value) return;
|
||||
|
||||
try {
|
||||
const event = parseEventMessage(message.value.toString());
|
||||
const updates = applyDashboardLiveEvent(liveState, { topic, event });
|
||||
for (const update of updates) {
|
||||
broadcast(update);
|
||||
}
|
||||
} catch (error) {
|
||||
dashboardRuntimeState.last_live_event_error = serializeError(error);
|
||||
logger.error('dashboard_live_event_failed', {
|
||||
topic,
|
||||
details: {
|
||||
error: serializeError(error),
|
||||
},
|
||||
});
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const webSockets = new Set();
|
||||
const webSocketServer = new WebSocketServer({
|
||||
noServer: true,
|
||||
});
|
||||
|
||||
webSocketServer.on('connection', (socket, _req, authContext) => {
|
||||
webSockets.add(socket);
|
||||
dashboardRuntimeState.websocket_clients = webSockets.size;
|
||||
socket.send(JSON.stringify({
|
||||
type: 'session.ready',
|
||||
session: authContext,
|
||||
live: {
|
||||
recent_quotes: liveState.recent_quotes,
|
||||
status_bar: buildLiveStatusBar(liveState),
|
||||
},
|
||||
}));
|
||||
|
||||
socket.on('close', () => {
|
||||
webSockets.delete(socket);
|
||||
dashboardRuntimeState.websocket_clients = webSockets.size;
|
||||
});
|
||||
});
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
try {
|
||||
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
|
||||
|
||||
if (req.method === 'GET' && url.pathname === '/healthz') {
|
||||
return sendJson(res, 200, {
|
||||
ok: Object.keys(dashboardRuntimeState.source_errors).length === 0 && !dashboardRuntimeState.last_bootstrap_error,
|
||||
service: 'operator-dashboard',
|
||||
websocket_clients: webSockets.size,
|
||||
source_error_count: Object.keys(dashboardRuntimeState.source_errors).length,
|
||||
last_source_error_at: dashboardRuntimeState.last_source_error_at,
|
||||
last_bootstrap_at: dashboardRuntimeState.last_bootstrap_at,
|
||||
last_bootstrap_error: dashboardRuntimeState.last_bootstrap_error,
|
||||
last_live_event_error: dashboardRuntimeState.last_live_event_error,
|
||||
});
|
||||
}
|
||||
|
||||
if (req.method === 'GET' && url.pathname === '/state') {
|
||||
return sendJson(res, 200, {
|
||||
service: 'operator-dashboard',
|
||||
namespace: config.projectNamespace,
|
||||
websocket_clients: webSockets.size,
|
||||
last_bootstrap_at: dashboardRuntimeState.last_bootstrap_at,
|
||||
last_bootstrap_error: dashboardRuntimeState.last_bootstrap_error,
|
||||
source_errors: Object.values(dashboardRuntimeState.source_errors),
|
||||
source_error_count: Object.keys(dashboardRuntimeState.source_errors).length,
|
||||
last_source_error_at: dashboardRuntimeState.last_source_error_at,
|
||||
last_live_event_error: dashboardRuntimeState.last_live_event_error,
|
||||
});
|
||||
}
|
||||
|
||||
const auth = authenticateHttpRequest(req, res);
|
||||
if (!auth) return;
|
||||
|
||||
if (url.pathname.startsWith('/api/')) {
|
||||
return handleApiRequest({ req, res, url, auth });
|
||||
}
|
||||
|
||||
if (req.method === 'GET' && staticAssets.has(url.pathname)) {
|
||||
const asset = staticAssets.get(url.pathname);
|
||||
res.statusCode = 200;
|
||||
res.setHeader('content-type', asset.contentType);
|
||||
res.end(asset.body);
|
||||
return;
|
||||
}
|
||||
|
||||
return sendJson(res, 404, { error: 'not_found' });
|
||||
} catch (error) {
|
||||
logger.error('dashboard_request_failed', {
|
||||
details: {
|
||||
path: req.url,
|
||||
error: serializeError(error),
|
||||
},
|
||||
});
|
||||
return sendJson(res, 500, {
|
||||
error: error.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
server.on('upgrade', (req, socket, head) => {
|
||||
const url = new URL(req.url || '/', `http://${req.headers.host || 'localhost'}`);
|
||||
if (url.pathname !== '/ws') {
|
||||
socket.destroy();
|
||||
return;
|
||||
}
|
||||
|
||||
const auth = resolveDashboardRequestAuth({
|
||||
mode: config.operatorDashboardAuthMode,
|
||||
authorizationHeader: req.headers.authorization || '',
|
||||
cookieHeader: req.headers.cookie || '',
|
||||
username: config.operatorDashboardAuthUsername,
|
||||
password: config.operatorDashboardAuthPassword,
|
||||
});
|
||||
if (!auth.authenticated) {
|
||||
socket.write(
|
||||
`HTTP/1.1 401 Unauthorized\r\nWWW-Authenticate: ${buildDashboardAuthChallengeHeader({
|
||||
realm: config.operatorDashboardAuthRealm,
|
||||
})}\r\n\r\n`,
|
||||
);
|
||||
socket.destroy();
|
||||
return;
|
||||
}
|
||||
|
||||
webSocketServer.handleUpgrade(req, socket, head, (ws) => {
|
||||
webSocketServer.emit('connection', ws, req, auth);
|
||||
});
|
||||
});
|
||||
|
||||
server.listen(config.operatorDashboardControlPort, config.operatorDashboardControlHost, () => {
|
||||
logger.info('operator_dashboard_started', {
|
||||
details: {
|
||||
host: config.operatorDashboardControlHost,
|
||||
port: config.operatorDashboardControlPort,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
async function handleApiRequest({ req, res, url, auth }) {
|
||||
if (req.method === 'GET' && url.pathname === '/api/session') {
|
||||
return sendJson(res, 200, auth);
|
||||
}
|
||||
|
||||
if (req.method === 'GET' && url.pathname === '/api/bootstrap') {
|
||||
const page = Number(url.searchParams.get('page') || 1);
|
||||
const pageSize = Number(
|
||||
url.searchParams.get('page_size') || config.operatorDashboardTradePageSize,
|
||||
);
|
||||
const payload = await loadBootstrapPayload({
|
||||
auth,
|
||||
page,
|
||||
pageSize,
|
||||
});
|
||||
return sendJson(res, 200, payload);
|
||||
}
|
||||
|
||||
if (req.method === 'GET' && url.pathname === '/api/trades') {
|
||||
const page = Number(url.searchParams.get('page') || 1);
|
||||
const pageSize = Number(
|
||||
url.searchParams.get('page_size') || config.operatorDashboardTradePageSize,
|
||||
);
|
||||
const successfulTrades = await loadSuccessfulTradesPage(pool, {
|
||||
page,
|
||||
pageSize,
|
||||
});
|
||||
return sendJson(res, 200, successfulTrades);
|
||||
}
|
||||
|
||||
const controlMatch = req.method === 'POST'
|
||||
? url.pathname.match(/^\/api\/control\/([^/]+)\/([^/]+)$/)
|
||||
: null;
|
||||
if (controlMatch) {
|
||||
const [, service, action] = controlMatch;
|
||||
const body = await readJsonBody(req);
|
||||
const control = resolveDashboardControl({ service, action });
|
||||
if (!control) {
|
||||
return sendJson(res, 404, {
|
||||
error: 'unknown_control',
|
||||
});
|
||||
}
|
||||
|
||||
const result = await invokeControl(control, body || {});
|
||||
const serviceSnapshot = await loadServiceSnapshot(
|
||||
listDashboardServices(config).find((definition) => definition.service === control.service),
|
||||
);
|
||||
return sendJson(res, 200, {
|
||||
ok: true,
|
||||
control,
|
||||
result,
|
||||
service_snapshot: serviceSnapshot,
|
||||
});
|
||||
}
|
||||
|
||||
return sendJson(res, 404, { error: 'not_found' });
|
||||
}
|
||||
|
||||
async function loadBootstrapPayload({ auth, page, pageSize }) {
|
||||
const sourceErrors = [];
|
||||
const [
|
||||
portfolioMetric,
|
||||
inventorySnapshot,
|
||||
marketPrice,
|
||||
recentQuotes,
|
||||
successfulTradeSummary,
|
||||
successfulTrades,
|
||||
fundingObservations,
|
||||
recentDepositStatuses,
|
||||
recentTradeDecisions,
|
||||
recentAlertTransitions,
|
||||
serviceSnapshots,
|
||||
] = await Promise.all([
|
||||
safeSourceLoad('portfolio_metric', () => loadLatestPortfolioMetric(pool), null, sourceErrors),
|
||||
safeSourceLoad('latest_inventory', () => loadLatestInventorySnapshot(pool), null, sourceErrors),
|
||||
safeSourceLoad('latest_market_price', () => loadLatestMarketPrice(pool), null, sourceErrors),
|
||||
safeSourceLoad(
|
||||
'recent_quotes',
|
||||
() => loadRecentQuotes(pool, {
|
||||
limit: config.operatorDashboardQuoteLimit,
|
||||
}),
|
||||
[],
|
||||
sourceErrors,
|
||||
),
|
||||
safeSourceLoad(
|
||||
'successful_trade_summary',
|
||||
() => loadSuccessfulTradeSummary(pool),
|
||||
{ total: 0, last_successful_trade_at: null },
|
||||
sourceErrors,
|
||||
),
|
||||
safeSourceLoad(
|
||||
'successful_trades',
|
||||
() => loadSuccessfulTradesPage(pool, {
|
||||
page,
|
||||
pageSize,
|
||||
}),
|
||||
{
|
||||
page,
|
||||
page_size: pageSize,
|
||||
total: 0,
|
||||
total_pages: 1,
|
||||
items: [],
|
||||
},
|
||||
sourceErrors,
|
||||
),
|
||||
safeSourceLoad('funding_observations', () => loadCurrentFundingObservations(pool), [], sourceErrors),
|
||||
safeSourceLoad(
|
||||
'recent_deposit_statuses',
|
||||
() => loadRecentDepositStatuses(pool, { limit: 20 }),
|
||||
[],
|
||||
sourceErrors,
|
||||
),
|
||||
safeSourceLoad(
|
||||
'recent_trade_decisions',
|
||||
() => loadRecentTradeDecisions(pool, { limit: 20 }),
|
||||
[],
|
||||
sourceErrors,
|
||||
),
|
||||
safeSourceLoad(
|
||||
'recent_alert_transitions',
|
||||
() => loadRecentAlertTransitions(pool, { limit: 20 }),
|
||||
[],
|
||||
sourceErrors,
|
||||
),
|
||||
loadServiceSnapshots(),
|
||||
]);
|
||||
|
||||
const payload = buildDashboardBootstrap({
|
||||
config,
|
||||
auth,
|
||||
portfolioMetric,
|
||||
inventorySnapshot,
|
||||
marketPrice,
|
||||
recentQuotes,
|
||||
successfulTrades,
|
||||
successfulTradeSummary,
|
||||
fundingObservations,
|
||||
recentDepositStatuses,
|
||||
recentTradeDecisions,
|
||||
recentAlertTransitions,
|
||||
serviceSnapshots,
|
||||
sourceErrors,
|
||||
});
|
||||
dashboardRuntimeState.last_bootstrap_at = new Date().toISOString();
|
||||
dashboardRuntimeState.last_bootstrap_error = null;
|
||||
return payload;
|
||||
}
|
||||
|
||||
async function loadServiceSnapshots() {
|
||||
const services = listDashboardServices(config);
|
||||
return Promise.all(services.map((service) => loadServiceSnapshot(service)));
|
||||
}
|
||||
|
||||
async function loadServiceSnapshot(service) {
|
||||
const [stateResult, healthResult] = await Promise.allSettled([
|
||||
fetchUpstreamJson(`${service.base_url}/state`),
|
||||
fetchUpstreamJson(`${service.base_url}/healthz`),
|
||||
]);
|
||||
|
||||
const state = stateResult.status === 'fulfilled' ? stateResult.value : null;
|
||||
const health = healthResult.status === 'fulfilled' ? healthResult.value : null;
|
||||
const error = stateResult.status === 'rejected'
|
||||
? serializeError(stateResult.reason)
|
||||
: healthResult.status === 'rejected'
|
||||
? serializeError(healthResult.reason)
|
||||
: null;
|
||||
|
||||
return {
|
||||
...service,
|
||||
reachable: Boolean(state || health),
|
||||
state,
|
||||
health,
|
||||
error,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchUpstreamJson(url) {
|
||||
return fetchJson(url, {
|
||||
signal: AbortSignal.timeout(config.operatorDashboardUpstreamTimeoutMs),
|
||||
});
|
||||
}
|
||||
|
||||
async function invokeControl(control, body) {
|
||||
const response = await fetchJson(
|
||||
`${lookupServiceBaseUrl(control.service)}${control.path}`,
|
||||
{
|
||||
method: control.method,
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(body || {}),
|
||||
signal: AbortSignal.timeout(config.operatorDashboardUpstreamTimeoutMs),
|
||||
},
|
||||
);
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
function lookupServiceBaseUrl(serviceName) {
|
||||
const service = listDashboardServices(config).find((entry) => entry.service === serviceName);
|
||||
if (!service) {
|
||||
throw new Error(`unknown service: ${serviceName}`);
|
||||
}
|
||||
return service.base_url;
|
||||
}
|
||||
|
||||
function broadcast(payload) {
|
||||
const encoded = JSON.stringify(payload);
|
||||
for (const socket of webSockets) {
|
||||
if (socket.readyState !== 1) continue;
|
||||
socket.send(encoded);
|
||||
}
|
||||
}
|
||||
|
||||
function authenticateHttpRequest(req, res) {
|
||||
const auth = resolveDashboardRequestAuth({
|
||||
mode: config.operatorDashboardAuthMode,
|
||||
authorizationHeader: req.headers.authorization || '',
|
||||
cookieHeader: req.headers.cookie || '',
|
||||
username: config.operatorDashboardAuthUsername,
|
||||
password: config.operatorDashboardAuthPassword,
|
||||
});
|
||||
|
||||
if (!auth.authenticated) {
|
||||
res.statusCode = 401;
|
||||
res.setHeader('WWW-Authenticate', buildDashboardAuthChallengeHeader({
|
||||
realm: config.operatorDashboardAuthRealm,
|
||||
}));
|
||||
res.end('authentication required\n');
|
||||
return null;
|
||||
}
|
||||
|
||||
if (auth.setSessionCookie) {
|
||||
res.setHeader('Set-Cookie', buildDashboardSessionCookie({
|
||||
sessionCookieName: auth.sessionCookieName,
|
||||
sessionToken: auth.sessionToken,
|
||||
}));
|
||||
}
|
||||
|
||||
return auth;
|
||||
}
|
||||
|
||||
async function loadStaticAssets() {
|
||||
const distDirectory = new URL('../operator-dashboard/dist/', import.meta.url);
|
||||
const assets = new Map();
|
||||
|
||||
await loadStaticAssetDirectory(distDirectory, '', assets);
|
||||
|
||||
const indexAsset = assets.get('/index.html');
|
||||
if (!indexAsset) {
|
||||
throw new Error('operator dashboard frontend is missing /index.html; run the dashboard build');
|
||||
}
|
||||
|
||||
assets.set('/', indexAsset);
|
||||
return assets;
|
||||
}
|
||||
|
||||
async function loadStaticAssetDirectory(directoryUrl, relativeDirectory, assets) {
|
||||
const entries = await readdir(directoryUrl, { withFileTypes: true });
|
||||
|
||||
for (const entry of entries) {
|
||||
if (entry.isDirectory()) {
|
||||
await loadStaticAssetDirectory(
|
||||
new URL(`${entry.name}/`, directoryUrl),
|
||||
path.posix.join(relativeDirectory, entry.name),
|
||||
assets,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
const relativePath = path.posix.join(relativeDirectory, entry.name);
|
||||
const requestPath = `/${relativePath}`;
|
||||
const body = await readFile(new URL(entry.name, directoryUrl));
|
||||
assets.set(requestPath, {
|
||||
contentType: resolveStaticContentType(entry.name),
|
||||
body,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function resolveStaticContentType(filename) {
|
||||
switch (path.extname(filename)) {
|
||||
case '.html':
|
||||
return 'text/html; charset=utf-8';
|
||||
case '.js':
|
||||
return 'text/javascript; charset=utf-8';
|
||||
case '.css':
|
||||
return 'text/css; charset=utf-8';
|
||||
case '.json':
|
||||
return 'application/json; charset=utf-8';
|
||||
case '.svg':
|
||||
return 'image/svg+xml';
|
||||
case '.png':
|
||||
return 'image/png';
|
||||
case '.jpg':
|
||||
case '.jpeg':
|
||||
return 'image/jpeg';
|
||||
case '.webp':
|
||||
return 'image/webp';
|
||||
case '.ico':
|
||||
return 'image/x-icon';
|
||||
default:
|
||||
return 'application/octet-stream';
|
||||
}
|
||||
}
|
||||
|
||||
async function safeSourceLoad(name, loader, fallback, sourceErrors = null) {
|
||||
try {
|
||||
const result = await loader();
|
||||
delete dashboardRuntimeState.source_errors[name];
|
||||
return result;
|
||||
} catch (error) {
|
||||
const serialized = serializeError(error);
|
||||
dashboardRuntimeState.source_errors[name] = {
|
||||
source: name,
|
||||
error: serialized,
|
||||
};
|
||||
dashboardRuntimeState.last_source_error_at = new Date().toISOString();
|
||||
logger.error('dashboard_source_load_failed', {
|
||||
details: {
|
||||
source: name,
|
||||
error: serialized,
|
||||
},
|
||||
});
|
||||
sourceErrors?.push({
|
||||
source: name,
|
||||
error: serialized,
|
||||
});
|
||||
dashboardRuntimeState.last_bootstrap_error = serialized;
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
async function shutdown() {
|
||||
server.close(() => {});
|
||||
for (const socket of webSockets) {
|
||||
socket.close();
|
||||
}
|
||||
await liveConsumer.stop().catch(() => {});
|
||||
await liveConsumer.disconnect().catch(() => {});
|
||||
await pool.end().catch(() => {});
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
process.on('SIGINT', shutdown);
|
||||
process.on('SIGTERM', shutdown);
|
||||
|
|
@ -4,8 +4,16 @@ import { createConsumer } from '../bus/kafka/consumer.mjs';
|
|||
import { createProducer } from '../bus/kafka/producer.mjs';
|
||||
import { startControlApi } from '../core/control-api.mjs';
|
||||
import { createAlertEngine } from '../core/alert-engine.mjs';
|
||||
import { createAlertNotifier } from '../core/alert-notifier.mjs';
|
||||
import { buildEventEnvelope, parseEventMessage } from '../core/event-envelope.mjs';
|
||||
import { createLogger, serializeError } from '../core/log.mjs';
|
||||
import { listDashboardServices } from '../core/operator-dashboard.mjs';
|
||||
import {
|
||||
ageMs,
|
||||
buildRuntimeAlert,
|
||||
createRuntimeHealthThresholds,
|
||||
evaluateRuntimeHealth,
|
||||
} from '../core/runtime-health.mjs';
|
||||
import {
|
||||
assertFundingObservationEvent,
|
||||
assertInventorySnapshotEvent,
|
||||
|
|
@ -15,8 +23,10 @@ import {
|
|||
assertTradeResult,
|
||||
} from '../core/schemas.mjs';
|
||||
import { loadConfig } from '../lib/config.mjs';
|
||||
import { fetchJson } from '../lib/http.mjs';
|
||||
|
||||
const config = loadConfig();
|
||||
const thresholds = createRuntimeHealthThresholds(config);
|
||||
const logger = createLogger({
|
||||
service: 'ops-sentinel',
|
||||
component: 'alerts',
|
||||
|
|
@ -48,6 +58,17 @@ const state = {
|
|||
last_error: null,
|
||||
last_event_at: null,
|
||||
publish_count: 0,
|
||||
last_runtime_eval_at: null,
|
||||
service_snapshots: [],
|
||||
service_health: [],
|
||||
latest_runtime_alerts: [],
|
||||
containment: {
|
||||
executor_auto_disarmed: false,
|
||||
last_action_at: null,
|
||||
last_action_reason: null,
|
||||
last_action_result: null,
|
||||
},
|
||||
anomaly_samples: [],
|
||||
};
|
||||
|
||||
const alertEngine = createAlertEngine({
|
||||
|
|
@ -59,6 +80,14 @@ const alertEngine = createAlertEngine({
|
|||
evaluationIntervalMs: config.opsSentinelEvaluationMs,
|
||||
});
|
||||
|
||||
const notifier = createAlertNotifier({
|
||||
webhookUrl: config.opsSentinelAlertWebhookUrl,
|
||||
webhookTimeoutMs: config.opsSentinelAlertWebhookTimeoutMs,
|
||||
logger: logger.child({ component: 'webhook-notifier' }),
|
||||
});
|
||||
|
||||
const monitoredServices = listDashboardServices(config);
|
||||
|
||||
for (const topic of topics) {
|
||||
await consumer.subscribe({ topic, fromBeginning: true });
|
||||
}
|
||||
|
|
@ -89,10 +118,9 @@ await consumer.run({
|
|||
const timer = setInterval(() => {
|
||||
if (state.paused) return;
|
||||
|
||||
const transitions = alertEngine.evaluate();
|
||||
publishTransitions(transitions).catch((error) => {
|
||||
evaluateRuntimeHealthLoop().catch((error) => {
|
||||
state.last_error = serializeError(error);
|
||||
logger.error('ops_sentinel_evaluate_failed', {
|
||||
logger.error('ops_sentinel_runtime_evaluate_failed', {
|
||||
topic: config.kafkaTopicOpsAlert,
|
||||
details: {
|
||||
error: serializeError(error),
|
||||
|
|
@ -115,16 +143,28 @@ const controlApi = startControlApi({
|
|||
publish_count: state.publish_count,
|
||||
last_error: state.last_error,
|
||||
last_event_at: state.last_event_at,
|
||||
last_runtime_eval_at: state.last_runtime_eval_at,
|
||||
service_snapshots: state.service_snapshots,
|
||||
service_health: state.service_health,
|
||||
latest_runtime_alerts: state.latest_runtime_alerts,
|
||||
containment: state.containment,
|
||||
notifier: notifier.getState(),
|
||||
anomaly_samples: state.anomaly_samples.slice(-thresholds.anomalyWindowSize),
|
||||
...alertEngine.getState(),
|
||||
};
|
||||
},
|
||||
},
|
||||
healthProvider: {
|
||||
getHealth() {
|
||||
const staleMs = ageMs(state.last_runtime_eval_at);
|
||||
return {
|
||||
ok: !state.paused && (staleMs == null || staleMs <= thresholds.sentinelStaleMs),
|
||||
paused: state.paused,
|
||||
last_event_at: state.last_event_at,
|
||||
last_runtime_eval_at: state.last_runtime_eval_at,
|
||||
last_error: state.last_error,
|
||||
stale: staleMs != null && staleMs > thresholds.sentinelStaleMs,
|
||||
stale_after_ms: thresholds.sentinelStaleMs,
|
||||
};
|
||||
},
|
||||
},
|
||||
|
|
@ -147,18 +187,438 @@ const controlApi = startControlApi({
|
|||
return { ok: true, paused: false };
|
||||
},
|
||||
},
|
||||
{
|
||||
method: 'POST',
|
||||
path: '/evaluate',
|
||||
handler: async () => {
|
||||
await evaluateRuntimeHealthLoop();
|
||||
return { ok: true, evaluated_at: state.last_runtime_eval_at };
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
async function evaluateRuntimeHealthLoop() {
|
||||
const now = new Date().toISOString();
|
||||
const previousRuntimeEvalAt = state.last_runtime_eval_at;
|
||||
const serviceSnapshots = await Promise.all(monitoredServices.map(loadServiceSnapshot));
|
||||
state.service_snapshots = serviceSnapshots;
|
||||
state.last_runtime_eval_at = now;
|
||||
|
||||
const servicesByName = Object.fromEntries(serviceSnapshots.map((snapshot) => [snapshot.service, snapshot]));
|
||||
const anomalyAlerts = buildAnomalyAlerts({ servicesByName, now });
|
||||
const runtimeAlerts = buildDeterministicRuntimeAlerts({ servicesByName, now, previousRuntimeEvalAt });
|
||||
const desiredRuntimeAlerts = [...runtimeAlerts, ...anomalyAlerts];
|
||||
const transitions = alertEngine.applyRuntimeAlerts(desiredRuntimeAlerts, now);
|
||||
const activeAlerts = alertEngine.getState(now).active_alerts;
|
||||
state.service_health = [...evaluateRuntimeHealth({
|
||||
servicesByName,
|
||||
activePair: config.activePair,
|
||||
activeAlerts,
|
||||
now,
|
||||
}).values()];
|
||||
state.latest_runtime_alerts = desiredRuntimeAlerts;
|
||||
|
||||
await publishTransitions(transitions);
|
||||
await maybeContainRisk({ servicesByName, desiredRuntimeAlerts, now });
|
||||
}
|
||||
|
||||
async function loadServiceSnapshot(service) {
|
||||
const [stateResult, healthResult] = await Promise.allSettled([
|
||||
fetchUpstreamJson(`${service.base_url}/state`),
|
||||
fetchUpstreamJson(`${service.base_url}/healthz`),
|
||||
]);
|
||||
|
||||
const statePayload = stateResult.status === 'fulfilled' ? stateResult.value : null;
|
||||
const healthPayload = healthResult.status === 'fulfilled' ? healthResult.value : null;
|
||||
const error = stateResult.status === 'rejected'
|
||||
? serializeError(stateResult.reason)
|
||||
: healthResult.status === 'rejected'
|
||||
? serializeError(healthResult.reason)
|
||||
: null;
|
||||
|
||||
return {
|
||||
...service,
|
||||
reachable: Boolean(statePayload || healthPayload),
|
||||
state: statePayload,
|
||||
health: healthPayload,
|
||||
error,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchUpstreamJson(url) {
|
||||
return fetchJson(url, {
|
||||
signal: AbortSignal.timeout(config.operatorDashboardUpstreamTimeoutMs),
|
||||
});
|
||||
}
|
||||
|
||||
function buildDeterministicRuntimeAlerts({ servicesByName, now, previousRuntimeEvalAt = null }) {
|
||||
const alerts = [];
|
||||
const ingest = servicesByName['near-intents-ingest'];
|
||||
const ingestState = ingest?.state?.ingest || {};
|
||||
const ingestHealth = ingest?.health || {};
|
||||
const matchingQuoteAgeMs = ageMs(ingestState.last_matching_quote_at, now);
|
||||
const publishedAgeMs = ageMs(ingestState.last_published_at, now);
|
||||
const messageAgeMs = ageMs(ingestState.last_message_at, now);
|
||||
|
||||
if (!ingest?.reachable || ingestState.connected === false || ingestHealth.connected === false) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'near_intents_ingest_disconnected',
|
||||
severity: 'critical',
|
||||
reason: 'near-intents-ingest websocket is disconnected or unreachable',
|
||||
service_scope: 'near-intents-ingest',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
reachable: ingest?.reachable ?? false,
|
||||
connected: ingestState.connected ?? ingestHealth.connected ?? null,
|
||||
last_message_at: ingestState.last_message_at || null,
|
||||
last_connected_at: ingestState.last_connected_at || null,
|
||||
last_disconnected_at: ingestState.last_disconnected_at || null,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
if (matchingQuoteAgeMs == null || matchingQuoteAgeMs > thresholds.ingestQuoteStaleMs) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'near_intents_quotes_stale',
|
||||
severity: 'critical',
|
||||
reason: matchingQuoteAgeMs == null
|
||||
? 'near-intents-ingest has not observed a matching quote'
|
||||
: `matching quote freshness ${matchingQuoteAgeMs}ms exceeds ${thresholds.ingestQuoteStaleMs}ms`,
|
||||
service_scope: 'near-intents-ingest',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
last_matching_quote_at: ingestState.last_matching_quote_at || null,
|
||||
age_ms: matchingQuoteAgeMs,
|
||||
stale_after_ms: thresholds.ingestQuoteStaleMs,
|
||||
last_message_at: ingestState.last_message_at || null,
|
||||
message_age_ms: messageAgeMs,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
if (
|
||||
(publishedAgeMs == null || publishedAgeMs > thresholds.ingestPublishStaleMs)
|
||||
|| (
|
||||
matchingQuoteAgeMs != null
|
||||
&& matchingQuoteAgeMs <= thresholds.ingestQuoteStaleMs
|
||||
&& (publishedAgeMs == null || publishedAgeMs > thresholds.ingestPublishStaleMs)
|
||||
)
|
||||
) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'near_intents_publish_stale',
|
||||
severity: 'critical',
|
||||
reason: publishedAgeMs == null
|
||||
? 'near-intents-ingest has not published a matching quote'
|
||||
: `published quote freshness ${publishedAgeMs}ms exceeds ${thresholds.ingestPublishStaleMs}ms`,
|
||||
service_scope: 'near-intents-ingest',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
last_matching_quote_at: ingestState.last_matching_quote_at || null,
|
||||
last_published_at: ingestState.last_published_at || null,
|
||||
quote_age_ms: matchingQuoteAgeMs,
|
||||
publish_age_ms: publishedAgeMs,
|
||||
stale_after_ms: thresholds.ingestPublishStaleMs,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
const executor = servicesByName['trade-executor'];
|
||||
const relay = executor?.state?.relay || {};
|
||||
const relayAgeMs = ageMs(relay.last_message_at, now);
|
||||
if (!executor?.reachable || relay.connected === false || (relayAgeMs != null && relayAgeMs > thresholds.executorRelayStaleMs)) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'trade_executor_relay_disconnected',
|
||||
severity: 'critical',
|
||||
reason: !executor?.reachable || relay.connected === false
|
||||
? 'trade-executor solver relay is disconnected or unreachable'
|
||||
: `trade-executor relay freshness ${relayAgeMs}ms exceeds ${thresholds.executorRelayStaleMs}ms`,
|
||||
service_scope: 'trade-executor',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
reachable: executor?.reachable ?? false,
|
||||
connected: relay.connected ?? null,
|
||||
last_message_at: relay.last_message_at || null,
|
||||
age_ms: relayAgeMs,
|
||||
stale_after_ms: thresholds.executorRelayStaleMs,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
const writer = servicesByName['history-writer'];
|
||||
const writerState = writer?.state || {};
|
||||
const writerAgeMs = ageMs(writerState.last_write_at, now);
|
||||
const rawOffset = parseOffset(writerState.offsets?.[config.kafkaTopicRawNearIntentsQuote]?.offset);
|
||||
const normOffset = parseOffset(writerState.offsets?.[config.kafkaTopicNormSwapDemand]?.offset);
|
||||
const ingestPublishedCount = Number(ingestState.published_count || 0);
|
||||
const lastSample = state.anomaly_samples.at(-1) || null;
|
||||
const writerProgressed = lastSample
|
||||
? rawOffset > lastSample.raw_offset || normOffset > lastSample.norm_offset
|
||||
: true;
|
||||
|
||||
if (
|
||||
!writer?.reachable
|
||||
|| writerState.database_connectivity === false
|
||||
|| writerAgeMs == null
|
||||
|| writerAgeMs > thresholds.historyWriterStaleMs
|
||||
|| (lastSample && ingestPublishedCount > lastSample.ingest_published_count && !writerProgressed)
|
||||
) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'history_writer_stalled',
|
||||
severity: 'critical',
|
||||
reason: !writer?.reachable
|
||||
? 'history-writer is unreachable'
|
||||
: writerState.database_connectivity === false
|
||||
? 'history-writer lost database connectivity'
|
||||
: lastSample && ingestPublishedCount > lastSample.ingest_published_count && !writerProgressed
|
||||
? 'ingest published quotes but durable history offsets stopped advancing'
|
||||
: `history-writer freshness ${writerAgeMs}ms exceeds ${thresholds.historyWriterStaleMs}ms`,
|
||||
service_scope: 'history-writer',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
last_write_at: writerState.last_write_at || null,
|
||||
age_ms: writerAgeMs,
|
||||
stale_after_ms: thresholds.historyWriterStaleMs,
|
||||
raw_offset: rawOffset,
|
||||
normalized_offset: normOffset,
|
||||
ingest_published_count: ingestPublishedCount,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
const dashboard = servicesByName['operator-dashboard'];
|
||||
const dashboardState = dashboard?.state || {};
|
||||
const dashboardSourceErrorCount = Number(
|
||||
dashboardState.source_error_count
|
||||
|| dashboard?.health?.source_error_count
|
||||
|| 0,
|
||||
);
|
||||
const dashboardBootstrapAgeMs = ageMs(dashboardState.last_bootstrap_at, now);
|
||||
if (
|
||||
!dashboard?.reachable
|
||||
|| dashboardSourceErrorCount > 0
|
||||
|| (dashboardBootstrapAgeMs != null && dashboardBootstrapAgeMs > thresholds.dashboardSourceDegradedMs)
|
||||
) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'operator_dashboard_source_degraded',
|
||||
severity: 'warning',
|
||||
reason: !dashboard?.reachable
|
||||
? 'operator-dashboard is unreachable'
|
||||
: dashboardSourceErrorCount > 0
|
||||
? 'operator-dashboard has upstream source errors'
|
||||
: `operator-dashboard bootstrap freshness ${dashboardBootstrapAgeMs}ms exceeds ${thresholds.dashboardSourceDegradedMs}ms`,
|
||||
service_scope: 'operator-dashboard',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
source_error_count: dashboardSourceErrorCount,
|
||||
last_source_error_at: dashboardState.last_source_error_at || null,
|
||||
last_bootstrap_at: dashboardState.last_bootstrap_at || null,
|
||||
bootstrap_age_ms: dashboardBootstrapAgeMs,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
const selfAgeMs = ageMs(previousRuntimeEvalAt, now);
|
||||
if (selfAgeMs != null && selfAgeMs > thresholds.sentinelStaleMs) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'sentinel_stale',
|
||||
severity: 'critical',
|
||||
reason: `ops-sentinel evaluation freshness ${selfAgeMs}ms exceeds ${thresholds.sentinelStaleMs}ms`,
|
||||
service_scope: 'ops-sentinel',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
last_runtime_eval_at: state.last_runtime_eval_at,
|
||||
previous_runtime_eval_at: previousRuntimeEvalAt,
|
||||
age_ms: selfAgeMs,
|
||||
stale_after_ms: thresholds.sentinelStaleMs,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
if (notifier.getState().last_delivery_status === 'failed') {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'sentinel_alert_delivery_failed',
|
||||
severity: 'warning',
|
||||
reason: 'external alert delivery failed',
|
||||
service_scope: 'ops-sentinel',
|
||||
pair: config.activePair,
|
||||
details: notifier.getState(),
|
||||
}));
|
||||
}
|
||||
|
||||
const executorArmed = executor?.state?.armed === true;
|
||||
const criticalTruthFailure = alerts.some((alert) => (
|
||||
alert.severity === 'critical'
|
||||
&& ['near_intents_ingest_disconnected', 'near_intents_quotes_stale', 'near_intents_publish_stale', 'history_writer_stalled'].includes(alert.alert_code)
|
||||
));
|
||||
if (executorArmed && criticalTruthFailure) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'executor_armed_with_stale_truth',
|
||||
severity: 'critical',
|
||||
reason: 'trade-executor remains armed while upstream quote truth is critically stale',
|
||||
service_scope: 'trade-executor',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
armed: true,
|
||||
containment_available: true,
|
||||
recommended_action: 'disarm',
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
return alerts;
|
||||
}
|
||||
|
||||
function buildAnomalyAlerts({ servicesByName, now }) {
|
||||
const ingestState = servicesByName['near-intents-ingest']?.state?.ingest || {};
|
||||
const writerState = servicesByName['history-writer']?.state || {};
|
||||
const nextSample = {
|
||||
at: now,
|
||||
ingest_published_count: Number(ingestState.published_count || 0),
|
||||
ingest_reconnect_count: Number(ingestState.reconnect_count || 0),
|
||||
raw_offset: parseOffset(writerState.offsets?.[config.kafkaTopicRawNearIntentsQuote]?.offset),
|
||||
norm_offset: parseOffset(writerState.offsets?.[config.kafkaTopicNormSwapDemand]?.offset),
|
||||
};
|
||||
|
||||
state.anomaly_samples.push(nextSample);
|
||||
state.anomaly_samples = state.anomaly_samples.slice(-(thresholds.anomalyWindowSize + 1));
|
||||
|
||||
if (state.anomaly_samples.length < thresholds.anomalyWindowSize) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const windows = [];
|
||||
for (let index = 1; index < state.anomaly_samples.length; index += 1) {
|
||||
const previous = state.anomaly_samples[index - 1];
|
||||
const current = state.anomaly_samples[index];
|
||||
windows.push({
|
||||
quote_delta: Math.max(0, current.ingest_published_count - previous.ingest_published_count),
|
||||
reconnect_delta: Math.max(0, current.ingest_reconnect_count - previous.ingest_reconnect_count),
|
||||
durable_delta: Math.max(0, current.norm_offset - previous.norm_offset),
|
||||
});
|
||||
}
|
||||
|
||||
const currentWindow = windows.at(-1);
|
||||
const baseline = windows.slice(0, -1);
|
||||
const averageQuoteDelta = average(baseline.map((entry) => entry.quote_delta));
|
||||
const averageReconnectDelta = average(baseline.map((entry) => entry.reconnect_delta));
|
||||
const averageDurableDelta = average(baseline.map((entry) => entry.durable_delta));
|
||||
const alerts = [];
|
||||
|
||||
if (
|
||||
averageQuoteDelta > 0
|
||||
&& currentWindow.quote_delta <= averageQuoteDelta * thresholds.anomalyQuoteRateCollapseRatio
|
||||
) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'near_intents_quote_rate_collapse',
|
||||
severity: 'warning',
|
||||
reason: 'quote publish rate collapsed versus recent baseline',
|
||||
service_scope: 'near-intents-ingest',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
current_window_quote_delta: currentWindow.quote_delta,
|
||||
baseline_average_quote_delta: averageQuoteDelta,
|
||||
collapse_ratio: thresholds.anomalyQuoteRateCollapseRatio,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
if (
|
||||
averageReconnectDelta >= 0
|
||||
&& currentWindow.reconnect_delta > 0
|
||||
&& currentWindow.reconnect_delta >= Math.max(2, averageReconnectDelta * thresholds.anomalyReconnectSpikeMultiplier)
|
||||
) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'near_intents_reconnect_spike',
|
||||
severity: 'warning',
|
||||
reason: 'near-intents reconnect frequency spiked versus recent baseline',
|
||||
service_scope: 'near-intents-ingest',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
current_window_reconnect_delta: currentWindow.reconnect_delta,
|
||||
baseline_average_reconnect_delta: averageReconnectDelta,
|
||||
spike_multiplier: thresholds.anomalyReconnectSpikeMultiplier,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
if (
|
||||
currentWindow.quote_delta > 0
|
||||
&& currentWindow.durable_delta === 0
|
||||
&& averageDurableDelta >= 0
|
||||
) {
|
||||
alerts.push(buildRuntimeAlert({
|
||||
alert_code: 'near_intents_pipeline_flow_mismatch',
|
||||
severity: 'warning',
|
||||
reason: 'ingest quote flow advanced while durable writer progress stalled',
|
||||
service_scope: 'history-writer',
|
||||
pair: config.activePair,
|
||||
details: {
|
||||
current_window_quote_delta: currentWindow.quote_delta,
|
||||
current_window_durable_delta: currentWindow.durable_delta,
|
||||
baseline_average_durable_delta: averageDurableDelta,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
return alerts;
|
||||
}
|
||||
|
||||
async function maybeContainRisk({ servicesByName, desiredRuntimeAlerts, now }) {
|
||||
const executor = servicesByName['trade-executor'];
|
||||
const criticalTruthFailure = desiredRuntimeAlerts.some((alert) => (
|
||||
alert.severity === 'critical'
|
||||
&& ['near_intents_ingest_disconnected', 'near_intents_quotes_stale', 'near_intents_publish_stale', 'history_writer_stalled'].includes(alert.alert_code)
|
||||
));
|
||||
const executorArmed = executor?.state?.armed === true;
|
||||
|
||||
if (!criticalTruthFailure) {
|
||||
state.containment.executor_auto_disarmed = false;
|
||||
return;
|
||||
}
|
||||
|
||||
const sinceLastActionMs = ageMs(state.containment.last_action_at, now);
|
||||
if (
|
||||
!executorArmed
|
||||
|| state.containment.executor_auto_disarmed
|
||||
|| (sinceLastActionMs != null && sinceLastActionMs < thresholds.containmentCooldownMs)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await fetchJson(`${config.tradeExecutorControlBaseUrl}/disarm`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ reason: 'critical_quote_truth_stale' }),
|
||||
signal: AbortSignal.timeout(config.operatorDashboardUpstreamTimeoutMs),
|
||||
});
|
||||
state.containment.executor_auto_disarmed = true;
|
||||
state.containment.last_action_at = now;
|
||||
state.containment.last_action_reason = 'critical_quote_truth_stale';
|
||||
state.containment.last_action_result = result;
|
||||
} catch (error) {
|
||||
state.containment.last_action_at = now;
|
||||
state.containment.last_action_reason = 'critical_quote_truth_stale';
|
||||
state.containment.last_action_result = {
|
||||
ok: false,
|
||||
error: serializeError(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function publishTransitions(transitions) {
|
||||
for (const transition of transitions) {
|
||||
const alertEventId = `${transition.alert_code}-${transition.status}-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
|
||||
const event = buildEventEnvelope({
|
||||
source: 'ops-sentinel',
|
||||
venue: 'unrip',
|
||||
eventType: 'ops_alert',
|
||||
observedAt: transition.last_evaluated_at,
|
||||
payload: {
|
||||
alert_event_id: `${transition.alert_code}-${transition.status}-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`,
|
||||
alert_event_id: alertEventId,
|
||||
...transition,
|
||||
},
|
||||
});
|
||||
|
|
@ -167,6 +627,10 @@ async function publishTransitions(transitions) {
|
|||
key: `${transition.alert_code}:${transition.service_scope}:${transition.tx_hash || transition.pair || 'global'}`,
|
||||
});
|
||||
state.publish_count += 1;
|
||||
await notifier.notify({
|
||||
...transition,
|
||||
alert_event_id: alertEventId,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -208,6 +672,17 @@ function normalizePayloadForAlert(topic, event) {
|
|||
}
|
||||
}
|
||||
|
||||
function parseOffset(value) {
|
||||
if (value == null) return 0;
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) ? parsed : 0;
|
||||
}
|
||||
|
||||
function average(values) {
|
||||
if (!values.length) return 0;
|
||||
return values.reduce((sum, value) => sum + value, 0) / values.length;
|
||||
}
|
||||
|
||||
async function shutdown() {
|
||||
clearInterval(timer);
|
||||
await controlApi.close().catch(() => {});
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import { loadConfig } from '../lib/config.mjs';
|
|||
import { buildQuoteResponseSubmission } from '../venues/near-intents/signing.mjs';
|
||||
import { startSolverRelayWs } from '../venues/near-intents/solver-relay-ws.mjs';
|
||||
import { createVerifierClient } from '../venues/near-intents/verifier-client.mjs';
|
||||
import { ageMs } from '../core/runtime-health.mjs';
|
||||
|
||||
const config = loadConfig();
|
||||
const logger = createLogger({
|
||||
|
|
@ -215,13 +216,42 @@ const controlApi = startControlApi({
|
|||
account_id: config.nearIntentsAccountId,
|
||||
signer_public_key: signer.getPublicKey().toString(),
|
||||
signer_registered: signerRegistered,
|
||||
relay: relayClient.getState(),
|
||||
...state,
|
||||
durable_control_state: armedStateStore.getState(),
|
||||
durable_state: stateStore.getState(),
|
||||
};
|
||||
},
|
||||
},
|
||||
healthProvider: {
|
||||
getHealth() {
|
||||
const relay = relayClient.getState();
|
||||
const freshnessAgeMs = ageMs(relay.last_message_at);
|
||||
return {
|
||||
ok: relay.connected && (freshnessAgeMs == null || freshnessAgeMs <= config.opsSentinelExecutorRelayStaleMs),
|
||||
connected: relay.connected,
|
||||
relay_last_message_at: relay.last_message_at,
|
||||
relay_freshness_age_ms: freshnessAgeMs,
|
||||
paused: state.paused,
|
||||
armed: state.armed,
|
||||
reason:
|
||||
relay.connected
|
||||
? freshnessAgeMs != null && freshnessAgeMs > config.opsSentinelExecutorRelayStaleMs
|
||||
? 'solver relay stale'
|
||||
: null
|
||||
: 'solver relay disconnected',
|
||||
};
|
||||
},
|
||||
},
|
||||
routes: [
|
||||
{
|
||||
method: 'POST',
|
||||
path: '/reconnect',
|
||||
handler: () => {
|
||||
relayClient.reconnect();
|
||||
return { ok: true, reconnecting: true };
|
||||
},
|
||||
},
|
||||
{
|
||||
method: 'POST',
|
||||
path: '/arm',
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ export function createAlertEngine({
|
|||
latest_trade_result: null,
|
||||
funding_observations: {},
|
||||
active_alerts: {},
|
||||
runtime_alert_keys: new Set(),
|
||||
recent_transitions: [],
|
||||
last_evaluated_at: null,
|
||||
};
|
||||
|
|
@ -67,6 +68,14 @@ export function createAlertEngine({
|
|||
now,
|
||||
});
|
||||
},
|
||||
applyRuntimeAlerts(desiredAlerts = [], now = new Date().toISOString()) {
|
||||
return reconcileRuntimeAlertState({
|
||||
state,
|
||||
desiredAlerts,
|
||||
now,
|
||||
recentTransitionLimit,
|
||||
});
|
||||
},
|
||||
getState(now = new Date().toISOString()) {
|
||||
return summarizeState({
|
||||
state,
|
||||
|
|
@ -329,6 +338,79 @@ function reconcileAlertState({ state, desired, now, recentTransitionLimit }) {
|
|||
return transitions;
|
||||
}
|
||||
|
||||
function reconcileRuntimeAlertState({
|
||||
state,
|
||||
desiredAlerts,
|
||||
now,
|
||||
recentTransitionLimit,
|
||||
}) {
|
||||
const transitions = [];
|
||||
const desired = new Map();
|
||||
const nextRuntimeKeys = new Set();
|
||||
|
||||
for (const alert of desiredAlerts || []) {
|
||||
const key = buildAlertKey({
|
||||
alertCode: alert.alert_code,
|
||||
serviceScope: alert.service_scope,
|
||||
pair: alert.pair,
|
||||
assetId: alert.asset_id,
|
||||
txHash: alert.tx_hash,
|
||||
});
|
||||
desired.set(key, alert);
|
||||
nextRuntimeKeys.add(key);
|
||||
}
|
||||
|
||||
for (const key of state.runtime_alert_keys) {
|
||||
if (desired.has(key)) continue;
|
||||
const existing = state.active_alerts[key];
|
||||
if (!existing) continue;
|
||||
const cleared = {
|
||||
...existing,
|
||||
status: 'cleared',
|
||||
raised_at: existing.raised_at || existing.first_raised_at || now,
|
||||
cleared_at: now,
|
||||
last_evaluated_at: now,
|
||||
};
|
||||
delete state.active_alerts[key];
|
||||
transitions.push(cleared);
|
||||
}
|
||||
|
||||
for (const [key, alert] of desired.entries()) {
|
||||
const existing = state.active_alerts[key];
|
||||
if (!existing) {
|
||||
const raised = {
|
||||
...alert,
|
||||
status: 'raised',
|
||||
first_raised_at: now,
|
||||
raised_at: now,
|
||||
cleared_at: null,
|
||||
last_evaluated_at: now,
|
||||
};
|
||||
state.active_alerts[key] = raised;
|
||||
transitions.push(raised);
|
||||
continue;
|
||||
}
|
||||
|
||||
state.active_alerts[key] = {
|
||||
...existing,
|
||||
...alert,
|
||||
status: 'raised',
|
||||
raised_at: existing.raised_at || existing.first_raised_at || now,
|
||||
first_raised_at: existing.first_raised_at || existing.raised_at || now,
|
||||
cleared_at: null,
|
||||
last_evaluated_at: now,
|
||||
};
|
||||
}
|
||||
|
||||
state.runtime_alert_keys = nextRuntimeKeys;
|
||||
if (transitions.length > 0) {
|
||||
state.recent_transitions.unshift(...transitions);
|
||||
state.recent_transitions = state.recent_transitions.slice(0, recentTransitionLimit);
|
||||
}
|
||||
state.last_evaluated_at = now;
|
||||
return transitions;
|
||||
}
|
||||
|
||||
function summarizeState({ state, evaluationIntervalMs, now }) {
|
||||
const activeAlerts = Object.values(state.active_alerts)
|
||||
.sort((left, right) => timestampValue(right.first_raised_at) - timestampValue(left.first_raised_at));
|
||||
|
|
|
|||
96
src/core/alert-notifier.mjs
Normal file
96
src/core/alert-notifier.mjs
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
import { postJson } from '../lib/http.mjs';
|
||||
import { serializeError } from './log.mjs';
|
||||
|
||||
export function createAlertNotifier({
|
||||
webhookUrl = '',
|
||||
webhookTimeoutMs = 5_000,
|
||||
logger = null,
|
||||
} = {}) {
|
||||
const delivered = new Set();
|
||||
const state = {
|
||||
enabled: Boolean(webhookUrl),
|
||||
webhook_url_configured: Boolean(webhookUrl),
|
||||
last_delivery_at: null,
|
||||
last_delivery_error: null,
|
||||
last_delivery_status: null,
|
||||
last_delivery_key: null,
|
||||
sent_count: 0,
|
||||
deduped_count: 0,
|
||||
};
|
||||
|
||||
return {
|
||||
async notify(transition) {
|
||||
if (!state.enabled) return { ok: false, skipped: true, reason: 'webhook_disabled' };
|
||||
|
||||
const deliveryKey = buildDeliveryKey(transition);
|
||||
if (delivered.has(deliveryKey)) {
|
||||
state.deduped_count += 1;
|
||||
state.last_delivery_status = 'deduped';
|
||||
state.last_delivery_key = deliveryKey;
|
||||
return { ok: true, deduped: true };
|
||||
}
|
||||
|
||||
const payload = {
|
||||
source: 'unrip',
|
||||
alert: {
|
||||
alert_code: transition.alert_code,
|
||||
status: transition.status,
|
||||
severity: transition.severity,
|
||||
service_scope: transition.service_scope,
|
||||
reason: transition.reason,
|
||||
pair: transition.pair || null,
|
||||
raised_at: transition.raised_at || transition.first_raised_at || null,
|
||||
cleared_at: transition.cleared_at || null,
|
||||
last_evaluated_at: transition.last_evaluated_at || null,
|
||||
details: transition.details || {},
|
||||
},
|
||||
};
|
||||
|
||||
try {
|
||||
await postJson(webhookUrl, payload, {
|
||||
signal: AbortSignal.timeout(webhookTimeoutMs),
|
||||
});
|
||||
delivered.add(deliveryKey);
|
||||
state.sent_count += 1;
|
||||
state.last_delivery_at = new Date().toISOString();
|
||||
state.last_delivery_error = null;
|
||||
state.last_delivery_status = 'sent';
|
||||
state.last_delivery_key = deliveryKey;
|
||||
return { ok: true, deduped: false };
|
||||
} catch (error) {
|
||||
const serialized = serializeError(error);
|
||||
state.last_delivery_at = new Date().toISOString();
|
||||
state.last_delivery_error = serialized;
|
||||
state.last_delivery_status = 'failed';
|
||||
state.last_delivery_key = deliveryKey;
|
||||
logger?.error('alert_webhook_delivery_failed', {
|
||||
details: {
|
||||
error: serialized,
|
||||
delivery_key: deliveryKey,
|
||||
alert_code: transition.alert_code,
|
||||
service_scope: transition.service_scope,
|
||||
},
|
||||
});
|
||||
return { ok: false, error: serialized };
|
||||
}
|
||||
},
|
||||
getState() {
|
||||
return {
|
||||
...state,
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildDeliveryKey(transition) {
|
||||
return [
|
||||
transition.alert_code,
|
||||
transition.service_scope,
|
||||
transition.pair || '',
|
||||
transition.asset_id || '',
|
||||
transition.tx_hash || '',
|
||||
transition.status,
|
||||
transition.raised_at || transition.first_raised_at || '',
|
||||
transition.cleared_at || '',
|
||||
].join('|');
|
||||
}
|
||||
1200
src/core/operator-dashboard.mjs
Normal file
1200
src/core/operator-dashboard.mjs
Normal file
File diff suppressed because it is too large
Load diff
249
src/core/runtime-health.mjs
Normal file
249
src/core/runtime-health.mjs
Normal file
|
|
@ -0,0 +1,249 @@
|
|||
export const SERVICE_HEALTH_LEVELS = ['healthy', 'warning', 'critical', 'offline', 'paused'];
|
||||
|
||||
const HEALTH_RANK = {
|
||||
healthy: 0,
|
||||
warning: 1,
|
||||
critical: 2,
|
||||
offline: 3,
|
||||
paused: 1,
|
||||
};
|
||||
|
||||
const ALERT_RANK = {
|
||||
info: 0,
|
||||
warning: 1,
|
||||
critical: 2,
|
||||
};
|
||||
|
||||
export function createRuntimeHealthThresholds(config = {}) {
|
||||
return {
|
||||
ingestMessageStaleMs: Number(config.opsSentinelIngestMessageStaleMs || 30_000),
|
||||
ingestQuoteStaleMs: Number(config.opsSentinelIngestQuoteStaleMs || 30_000),
|
||||
ingestPublishStaleMs: Number(config.opsSentinelIngestPublishStaleMs || 30_000),
|
||||
executorRelayStaleMs: Number(config.opsSentinelExecutorRelayStaleMs || 30_000),
|
||||
historyWriterStaleMs: Number(config.opsSentinelHistoryWriterStaleMs || 45_000),
|
||||
dashboardSourceDegradedMs: Number(config.opsSentinelDashboardSourceDegradedMs || 30_000),
|
||||
sentinelStaleMs: Number(config.opsSentinelSelfStaleMs || 20_000),
|
||||
anomalyWindowSize: Number(config.opsSentinelAnomalyWindowSize || 6),
|
||||
anomalyQuoteRateCollapseRatio: Number(config.opsSentinelAnomalyQuoteRateCollapseRatio || 0.25),
|
||||
anomalyReconnectSpikeMultiplier: Number(config.opsSentinelAnomalyReconnectSpikeMultiplier || 2),
|
||||
containmentCooldownMs: Number(config.opsSentinelContainmentCooldownMs || 60_000),
|
||||
};
|
||||
}
|
||||
|
||||
export function evaluateRuntimeHealth({
|
||||
servicesByName,
|
||||
activePair,
|
||||
activeAlerts = [],
|
||||
now = new Date().toISOString(),
|
||||
} = {}) {
|
||||
const serviceHealth = new Map();
|
||||
const alertIndex = indexAlertsByService(activeAlerts);
|
||||
|
||||
for (const [service, snapshot] of Object.entries(servicesByName || {})) {
|
||||
const alerts = alertIndex.get(service) || [];
|
||||
serviceHealth.set(service, deriveServiceHealth({
|
||||
service,
|
||||
snapshot,
|
||||
activePair,
|
||||
activeAlerts: alerts,
|
||||
now,
|
||||
}));
|
||||
}
|
||||
|
||||
return serviceHealth;
|
||||
}
|
||||
|
||||
export function deriveServiceHealth({
|
||||
service,
|
||||
snapshot,
|
||||
activePair = null,
|
||||
activeAlerts = [],
|
||||
now = new Date().toISOString(),
|
||||
} = {}) {
|
||||
const state = snapshot?.state || {};
|
||||
const health = snapshot?.health || {};
|
||||
const reachable = snapshot?.reachable !== false;
|
||||
const paused = state.paused ?? health.paused ?? false;
|
||||
const highestAlertSeverity = highestAlertSeverityForService(activeAlerts);
|
||||
const freshnessAt = inferServiceFreshnessTimestamp(service, state, health);
|
||||
const freshnessAgeMs = ageMs(freshnessAt, now);
|
||||
const reasons = [];
|
||||
let status = paused ? 'paused' : reachable ? 'healthy' : 'offline';
|
||||
|
||||
if (!reachable) {
|
||||
reasons.push('service unreachable');
|
||||
}
|
||||
|
||||
if (health.ok === false && reachable) {
|
||||
status = escalateHealth(status, 'critical');
|
||||
reasons.push(health.reason || 'service health check failed');
|
||||
}
|
||||
|
||||
if (highestAlertSeverity === 'critical') {
|
||||
status = escalateHealth(status, 'critical');
|
||||
reasons.push(`critical alert active (${activeAlerts[0]?.alert_code || 'runtime'})`);
|
||||
} else if (highestAlertSeverity === 'warning') {
|
||||
status = escalateHealth(status, 'warning');
|
||||
reasons.push(`warning alert active (${activeAlerts[0]?.alert_code || 'runtime'})`);
|
||||
}
|
||||
|
||||
if (service === 'near-intents-ingest') {
|
||||
if (state.ingest?.connected === false) {
|
||||
status = escalateHealth(status, 'critical');
|
||||
reasons.push('websocket disconnected');
|
||||
}
|
||||
if (state.ingest?.last_matching_quote_at && state.ingest?.last_published_at) {
|
||||
const matchingAgeMs = ageMs(state.ingest.last_matching_quote_at, now);
|
||||
const publishedAgeMs = ageMs(state.ingest.last_published_at, now);
|
||||
if (matchingAgeMs != null && publishedAgeMs != null && publishedAgeMs > matchingAgeMs + 5_000) {
|
||||
status = escalateHealth(status, 'critical');
|
||||
reasons.push('quote publish path stalled');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (service === 'trade-executor' && state.relay?.connected === false) {
|
||||
status = escalateHealth(status, 'critical');
|
||||
reasons.push('solver relay disconnected');
|
||||
}
|
||||
|
||||
if (service === 'history-writer') {
|
||||
if (state.database_connectivity === false) {
|
||||
status = escalateHealth(status, 'critical');
|
||||
reasons.push('database connectivity failed');
|
||||
} else if (freshnessAgeMs != null && freshnessAgeMs > 45_000) {
|
||||
status = escalateHealth(status, 'warning');
|
||||
reasons.push('writer freshness degraded');
|
||||
}
|
||||
}
|
||||
|
||||
if (service === 'operator-dashboard') {
|
||||
if ((state.source_error_count || 0) > 0 || (health.source_error_count || 0) > 0) {
|
||||
status = escalateHealth(status, 'warning');
|
||||
reasons.push('dashboard source degraded');
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
['strategy-engine', 'trade-executor'].includes(service)
|
||||
&& (state.armed ?? false)
|
||||
&& hasCriticalTruthAlert(activeAlerts, activePair)
|
||||
) {
|
||||
status = escalateHealth(status, 'critical');
|
||||
reasons.push('armed while critical upstream truth is stale');
|
||||
}
|
||||
|
||||
return {
|
||||
service,
|
||||
status,
|
||||
reachable,
|
||||
paused,
|
||||
armed: state.armed ?? null,
|
||||
health_ok: status === 'healthy' || status === 'paused',
|
||||
highest_alert_severity: highestAlertSeverity,
|
||||
reasons,
|
||||
freshness_at: freshnessAt,
|
||||
freshness_age_ms: freshnessAgeMs,
|
||||
};
|
||||
}
|
||||
|
||||
export function inferServiceFreshnessTimestamp(service, state = {}, health = {}) {
|
||||
switch (service) {
|
||||
case 'near-intents-ingest':
|
||||
return (
|
||||
state.ingest?.last_published_at
|
||||
|| state.ingest?.last_matching_quote_at
|
||||
|| state.ingest?.last_message_at
|
||||
|| null
|
||||
);
|
||||
case 'market-reference-ingest':
|
||||
return state.last_published_at || null;
|
||||
case 'inventory-sync':
|
||||
return state.last_sync_at || null;
|
||||
case 'liquidity-manager':
|
||||
return state.last_refresh_at || null;
|
||||
case 'history-writer':
|
||||
return state.last_write_at || state.last_metrics_at || null;
|
||||
case 'ops-sentinel':
|
||||
return state.last_runtime_eval_at || state.last_evaluated_at || health.last_event_at || null;
|
||||
case 'strategy-engine':
|
||||
return state.latest_decision?.decision_at || state.latest_inventory_event?.ingested_at || null;
|
||||
case 'trade-executor':
|
||||
return state.relay?.last_message_at || state.last_quote_status?.created_at || null;
|
||||
case 'operator-dashboard':
|
||||
return state.last_bootstrap_at || state.last_source_error_at || null;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function buildRuntimeAlert({
|
||||
alert_code,
|
||||
severity,
|
||||
reason,
|
||||
service_scope,
|
||||
pair = null,
|
||||
asset_id = null,
|
||||
tx_hash = null,
|
||||
details = {},
|
||||
}) {
|
||||
return {
|
||||
alert_code,
|
||||
severity,
|
||||
reason,
|
||||
service_scope,
|
||||
pair,
|
||||
asset_id,
|
||||
tx_hash,
|
||||
details,
|
||||
};
|
||||
}
|
||||
|
||||
export function ageMs(value, now = new Date().toISOString()) {
|
||||
if (!value) return null;
|
||||
const left = new Date(value).getTime();
|
||||
const right = new Date(now).getTime();
|
||||
if (!Number.isFinite(left) || !Number.isFinite(right)) return null;
|
||||
return Math.max(0, right - left);
|
||||
}
|
||||
|
||||
function highestAlertSeverityForService(alerts) {
|
||||
let highest = null;
|
||||
let highestRank = -1;
|
||||
for (const alert of alerts || []) {
|
||||
const rank = ALERT_RANK[alert.severity] ?? -1;
|
||||
if (rank > highestRank) {
|
||||
highest = alert.severity;
|
||||
highestRank = rank;
|
||||
}
|
||||
}
|
||||
return highest;
|
||||
}
|
||||
|
||||
function indexAlertsByService(activeAlerts) {
|
||||
const index = new Map();
|
||||
for (const alert of activeAlerts || []) {
|
||||
const list = index.get(alert.service_scope) || [];
|
||||
list.push(alert);
|
||||
index.set(alert.service_scope, list);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
function hasCriticalTruthAlert(alerts, activePair) {
|
||||
return (alerts || []).some((alert) => (
|
||||
alert.severity === 'critical'
|
||||
&& (
|
||||
alert.pair == null
|
||||
|| alert.pair === activePair
|
||||
|| alert.alert_code.includes('stale')
|
||||
|| alert.alert_code.includes('disconnected')
|
||||
)
|
||||
));
|
||||
}
|
||||
|
||||
function escalateHealth(current, next) {
|
||||
const currentRank = HEALTH_RANK[current] ?? 0;
|
||||
const nextRank = HEALTH_RANK[next] ?? 0;
|
||||
return nextRank > currentRank ? next : current;
|
||||
}
|
||||
|
|
@ -19,6 +19,7 @@ const DEFAULTS = {
|
|||
strategyEngineControlPort: 8086,
|
||||
tradeExecutorControlPort: 8087,
|
||||
opsSentinelControlPort: 8088,
|
||||
operatorDashboardControlPort: 8090,
|
||||
kafkaBrokers: ['127.0.0.1:9092'],
|
||||
kafkaClientId: 'unrip',
|
||||
kafkaTopicRawNearIntentsQuote: 'raw.near_intents.quote',
|
||||
|
|
@ -36,6 +37,7 @@ const DEFAULTS = {
|
|||
kafkaConsumerGroupStrategy: 'strategy-engine-v1',
|
||||
kafkaConsumerGroupExecutor: 'trade-executor-v1',
|
||||
kafkaConsumerGroupOpsSentinel: 'ops-sentinel-v1',
|
||||
kafkaConsumerGroupOperatorDashboard: 'operator-dashboard-v1',
|
||||
strategyStateDir: './var/strategy-state',
|
||||
executorStateDir: './var/executor-state',
|
||||
liquidityStateDir: './var/liquidity-state',
|
||||
|
|
@ -73,6 +75,26 @@ const DEFAULTS = {
|
|||
fundingObservationStuckMs: 60 * 60 * 1000,
|
||||
opsSentinelEvaluationMs: 5_000,
|
||||
opsSentinelFundingCreditPendingMs: 5 * 60 * 1000,
|
||||
opsSentinelIngestMessageStaleMs: 30_000,
|
||||
opsSentinelIngestQuoteStaleMs: 30_000,
|
||||
opsSentinelIngestPublishStaleMs: 30_000,
|
||||
opsSentinelExecutorRelayStaleMs: 30_000,
|
||||
opsSentinelHistoryWriterStaleMs: 45_000,
|
||||
opsSentinelDashboardSourceDegradedMs: 30_000,
|
||||
opsSentinelSelfStaleMs: 20_000,
|
||||
opsSentinelAnomalyWindowSize: 6,
|
||||
opsSentinelAnomalyQuoteRateCollapseRatio: 0.25,
|
||||
opsSentinelAnomalyReconnectSpikeMultiplier: 2,
|
||||
opsSentinelContainmentCooldownMs: 60_000,
|
||||
opsSentinelAlertWebhookUrl: '',
|
||||
opsSentinelAlertWebhookTimeoutMs: 5_000,
|
||||
operatorDashboardAuthMode: 'stub',
|
||||
operatorDashboardAuthUsername: 'admin',
|
||||
operatorDashboardAuthPassword: '',
|
||||
operatorDashboardAuthRealm: 'unrip operator dashboard',
|
||||
operatorDashboardQuoteLimit: 10,
|
||||
operatorDashboardTradePageSize: 20,
|
||||
operatorDashboardUpstreamTimeoutMs: 3_000,
|
||||
};
|
||||
|
||||
function splitCsv(value) {
|
||||
|
|
@ -106,6 +128,13 @@ function buildAsset({ assetId, symbol, decimals, chain, withdrawAddress = '' })
|
|||
};
|
||||
}
|
||||
|
||||
function defaultControlBaseUrl({ serviceName, port, namespace }) {
|
||||
if (process.env.KUBERNETES_SERVICE_HOST) {
|
||||
return `http://${serviceName}.${namespace}.svc.cluster.local:${port}`;
|
||||
}
|
||||
return `http://127.0.0.1:${port}`;
|
||||
}
|
||||
|
||||
export function loadConfig({ envPath = '.env' } = {}) {
|
||||
loadDotenv(envPath);
|
||||
|
||||
|
|
@ -157,48 +186,134 @@ export function loadConfig({ envPath = '.env' } = {}) {
|
|||
process.env.NEAR_INTENTS_CONTROL_PORT,
|
||||
DEFAULTS.nearIntentsControlPort,
|
||||
),
|
||||
nearIntentsControlBaseUrl:
|
||||
process.env.NEAR_INTENTS_CONTROL_BASE_URL
|
||||
|| defaultControlBaseUrl({
|
||||
serviceName: 'near-intents-ingest',
|
||||
port: parseNumber(
|
||||
process.env.NEAR_INTENTS_CONTROL_PORT,
|
||||
DEFAULTS.nearIntentsControlPort,
|
||||
),
|
||||
namespace: projectNamespace,
|
||||
}),
|
||||
marketReferenceControlHost:
|
||||
process.env.MARKET_REFERENCE_CONTROL_HOST || DEFAULTS.nearIntentsControlHost,
|
||||
marketReferenceControlPort: parseNumber(
|
||||
process.env.MARKET_REFERENCE_CONTROL_PORT,
|
||||
DEFAULTS.marketReferenceControlPort,
|
||||
),
|
||||
marketReferenceControlBaseUrl:
|
||||
process.env.MARKET_REFERENCE_CONTROL_BASE_URL
|
||||
|| defaultControlBaseUrl({
|
||||
serviceName: 'market-reference-ingest',
|
||||
port: parseNumber(
|
||||
process.env.MARKET_REFERENCE_CONTROL_PORT,
|
||||
DEFAULTS.marketReferenceControlPort,
|
||||
),
|
||||
namespace: projectNamespace,
|
||||
}),
|
||||
inventorySyncControlHost:
|
||||
process.env.INVENTORY_SYNC_CONTROL_HOST || DEFAULTS.nearIntentsControlHost,
|
||||
inventorySyncControlPort: parseNumber(
|
||||
process.env.INVENTORY_SYNC_CONTROL_PORT,
|
||||
DEFAULTS.inventorySyncControlPort,
|
||||
),
|
||||
inventorySyncControlBaseUrl:
|
||||
process.env.INVENTORY_SYNC_CONTROL_BASE_URL
|
||||
|| defaultControlBaseUrl({
|
||||
serviceName: 'inventory-sync',
|
||||
port: parseNumber(
|
||||
process.env.INVENTORY_SYNC_CONTROL_PORT,
|
||||
DEFAULTS.inventorySyncControlPort,
|
||||
),
|
||||
namespace: projectNamespace,
|
||||
}),
|
||||
liquidityManagerControlHost:
|
||||
process.env.LIQUIDITY_MANAGER_CONTROL_HOST || DEFAULTS.nearIntentsControlHost,
|
||||
liquidityManagerControlPort: parseNumber(
|
||||
process.env.LIQUIDITY_MANAGER_CONTROL_PORT,
|
||||
DEFAULTS.liquidityManagerControlPort,
|
||||
),
|
||||
liquidityManagerControlBaseUrl:
|
||||
process.env.LIQUIDITY_MANAGER_CONTROL_BASE_URL
|
||||
|| defaultControlBaseUrl({
|
||||
serviceName: 'liquidity-manager',
|
||||
port: parseNumber(
|
||||
process.env.LIQUIDITY_MANAGER_CONTROL_PORT,
|
||||
DEFAULTS.liquidityManagerControlPort,
|
||||
),
|
||||
namespace: projectNamespace,
|
||||
}),
|
||||
historyWriterControlHost:
|
||||
process.env.HISTORY_WRITER_CONTROL_HOST || DEFAULTS.nearIntentsControlHost,
|
||||
historyWriterControlPort: parseNumber(
|
||||
process.env.HISTORY_WRITER_CONTROL_PORT,
|
||||
DEFAULTS.historyWriterControlPort,
|
||||
),
|
||||
historyWriterControlBaseUrl:
|
||||
process.env.HISTORY_WRITER_CONTROL_BASE_URL
|
||||
|| defaultControlBaseUrl({
|
||||
serviceName: 'history-writer',
|
||||
port: parseNumber(
|
||||
process.env.HISTORY_WRITER_CONTROL_PORT,
|
||||
DEFAULTS.historyWriterControlPort,
|
||||
),
|
||||
namespace: projectNamespace,
|
||||
}),
|
||||
opsSentinelControlHost:
|
||||
process.env.OPS_SENTINEL_CONTROL_HOST || DEFAULTS.nearIntentsControlHost,
|
||||
opsSentinelControlPort: parseNumber(
|
||||
process.env.OPS_SENTINEL_CONTROL_PORT,
|
||||
DEFAULTS.opsSentinelControlPort,
|
||||
),
|
||||
opsSentinelControlBaseUrl:
|
||||
process.env.OPS_SENTINEL_CONTROL_BASE_URL
|
||||
|| defaultControlBaseUrl({
|
||||
serviceName: 'ops-sentinel',
|
||||
port: parseNumber(
|
||||
process.env.OPS_SENTINEL_CONTROL_PORT,
|
||||
DEFAULTS.opsSentinelControlPort,
|
||||
),
|
||||
namespace: projectNamespace,
|
||||
}),
|
||||
strategyEngineControlHost:
|
||||
process.env.STRATEGY_ENGINE_CONTROL_HOST || DEFAULTS.nearIntentsControlHost,
|
||||
strategyEngineControlPort: parseNumber(
|
||||
process.env.STRATEGY_ENGINE_CONTROL_PORT,
|
||||
DEFAULTS.strategyEngineControlPort,
|
||||
),
|
||||
strategyEngineControlBaseUrl:
|
||||
process.env.STRATEGY_ENGINE_CONTROL_BASE_URL
|
||||
|| defaultControlBaseUrl({
|
||||
serviceName: 'strategy-engine',
|
||||
port: parseNumber(
|
||||
process.env.STRATEGY_ENGINE_CONTROL_PORT,
|
||||
DEFAULTS.strategyEngineControlPort,
|
||||
),
|
||||
namespace: projectNamespace,
|
||||
}),
|
||||
tradeExecutorControlHost:
|
||||
process.env.TRADE_EXECUTOR_CONTROL_HOST || DEFAULTS.nearIntentsControlHost,
|
||||
tradeExecutorControlPort: parseNumber(
|
||||
process.env.TRADE_EXECUTOR_CONTROL_PORT,
|
||||
DEFAULTS.tradeExecutorControlPort,
|
||||
),
|
||||
tradeExecutorControlBaseUrl:
|
||||
process.env.TRADE_EXECUTOR_CONTROL_BASE_URL
|
||||
|| defaultControlBaseUrl({
|
||||
serviceName: 'trade-executor',
|
||||
port: parseNumber(
|
||||
process.env.TRADE_EXECUTOR_CONTROL_PORT,
|
||||
DEFAULTS.tradeExecutorControlPort,
|
||||
),
|
||||
namespace: projectNamespace,
|
||||
}),
|
||||
operatorDashboardControlHost:
|
||||
process.env.OPERATOR_DASHBOARD_CONTROL_HOST || DEFAULTS.nearIntentsControlHost,
|
||||
operatorDashboardControlPort: parseNumber(
|
||||
process.env.OPERATOR_DASHBOARD_CONTROL_PORT,
|
||||
DEFAULTS.operatorDashboardControlPort,
|
||||
),
|
||||
kafkaBrokers: splitCsv(process.env.KAFKA_BROKERS).length
|
||||
? splitCsv(process.env.KAFKA_BROKERS)
|
||||
: DEFAULTS.kafkaBrokers,
|
||||
|
|
@ -233,6 +348,9 @@ export function loadConfig({ envPath = '.env' } = {}) {
|
|||
process.env.KAFKA_CONSUMER_GROUP_EXECUTOR || DEFAULTS.kafkaConsumerGroupExecutor,
|
||||
kafkaConsumerGroupOpsSentinel:
|
||||
process.env.KAFKA_CONSUMER_GROUP_OPS_SENTINEL || DEFAULTS.kafkaConsumerGroupOpsSentinel,
|
||||
kafkaConsumerGroupOperatorDashboard:
|
||||
process.env.KAFKA_CONSUMER_GROUP_OPERATOR_DASHBOARD
|
||||
|| DEFAULTS.kafkaConsumerGroupOperatorDashboard,
|
||||
strategyStateDir: process.env.STRATEGY_STATE_DIR || DEFAULTS.strategyStateDir,
|
||||
executorStateDir: process.env.EXECUTOR_STATE_DIR || DEFAULTS.executorStateDir,
|
||||
liquidityStateDir: process.env.LIQUIDITY_STATE_DIR || DEFAULTS.liquidityStateDir,
|
||||
|
|
@ -333,5 +451,75 @@ export function loadConfig({ envPath = '.env' } = {}) {
|
|||
process.env.OPS_SENTINEL_FUNDING_STUCK_MS,
|
||||
DEFAULTS.fundingObservationStuckMs,
|
||||
),
|
||||
opsSentinelIngestMessageStaleMs: parseNumber(
|
||||
process.env.OPS_SENTINEL_INGEST_MESSAGE_STALE_MS,
|
||||
DEFAULTS.opsSentinelIngestMessageStaleMs,
|
||||
),
|
||||
opsSentinelIngestQuoteStaleMs: parseNumber(
|
||||
process.env.OPS_SENTINEL_INGEST_QUOTE_STALE_MS,
|
||||
DEFAULTS.opsSentinelIngestQuoteStaleMs,
|
||||
),
|
||||
opsSentinelIngestPublishStaleMs: parseNumber(
|
||||
process.env.OPS_SENTINEL_INGEST_PUBLISH_STALE_MS,
|
||||
DEFAULTS.opsSentinelIngestPublishStaleMs,
|
||||
),
|
||||
opsSentinelExecutorRelayStaleMs: parseNumber(
|
||||
process.env.OPS_SENTINEL_EXECUTOR_RELAY_STALE_MS,
|
||||
DEFAULTS.opsSentinelExecutorRelayStaleMs,
|
||||
),
|
||||
opsSentinelHistoryWriterStaleMs: parseNumber(
|
||||
process.env.OPS_SENTINEL_HISTORY_WRITER_STALE_MS,
|
||||
DEFAULTS.opsSentinelHistoryWriterStaleMs,
|
||||
),
|
||||
opsSentinelDashboardSourceDegradedMs: parseNumber(
|
||||
process.env.OPS_SENTINEL_DASHBOARD_SOURCE_DEGRADED_MS,
|
||||
DEFAULTS.opsSentinelDashboardSourceDegradedMs,
|
||||
),
|
||||
opsSentinelSelfStaleMs: parseNumber(
|
||||
process.env.OPS_SENTINEL_SELF_STALE_MS,
|
||||
DEFAULTS.opsSentinelSelfStaleMs,
|
||||
),
|
||||
opsSentinelAnomalyWindowSize: parseNumber(
|
||||
process.env.OPS_SENTINEL_ANOMALY_WINDOW_SIZE,
|
||||
DEFAULTS.opsSentinelAnomalyWindowSize,
|
||||
),
|
||||
opsSentinelAnomalyQuoteRateCollapseRatio: parseNumber(
|
||||
process.env.OPS_SENTINEL_ANOMALY_QUOTE_RATE_COLLAPSE_RATIO,
|
||||
DEFAULTS.opsSentinelAnomalyQuoteRateCollapseRatio,
|
||||
),
|
||||
opsSentinelAnomalyReconnectSpikeMultiplier: parseNumber(
|
||||
process.env.OPS_SENTINEL_ANOMALY_RECONNECT_SPIKE_MULTIPLIER,
|
||||
DEFAULTS.opsSentinelAnomalyReconnectSpikeMultiplier,
|
||||
),
|
||||
opsSentinelContainmentCooldownMs: parseNumber(
|
||||
process.env.OPS_SENTINEL_CONTAINMENT_COOLDOWN_MS,
|
||||
DEFAULTS.opsSentinelContainmentCooldownMs,
|
||||
),
|
||||
opsSentinelAlertWebhookUrl:
|
||||
process.env.OPS_SENTINEL_ALERT_WEBHOOK_URL || DEFAULTS.opsSentinelAlertWebhookUrl,
|
||||
opsSentinelAlertWebhookTimeoutMs: parseNumber(
|
||||
process.env.OPS_SENTINEL_ALERT_WEBHOOK_TIMEOUT_MS,
|
||||
DEFAULTS.opsSentinelAlertWebhookTimeoutMs,
|
||||
),
|
||||
operatorDashboardAuthMode:
|
||||
process.env.OPERATOR_DASHBOARD_AUTH_MODE || DEFAULTS.operatorDashboardAuthMode,
|
||||
operatorDashboardAuthUsername:
|
||||
process.env.OPERATOR_DASHBOARD_AUTH_USERNAME || DEFAULTS.operatorDashboardAuthUsername,
|
||||
operatorDashboardAuthPassword:
|
||||
process.env.OPERATOR_DASHBOARD_AUTH_PASSWORD || DEFAULTS.operatorDashboardAuthPassword,
|
||||
operatorDashboardAuthRealm:
|
||||
process.env.OPERATOR_DASHBOARD_AUTH_REALM || DEFAULTS.operatorDashboardAuthRealm,
|
||||
operatorDashboardQuoteLimit: parseNumber(
|
||||
process.env.OPERATOR_DASHBOARD_QUOTE_LIMIT,
|
||||
DEFAULTS.operatorDashboardQuoteLimit,
|
||||
),
|
||||
operatorDashboardTradePageSize: parseNumber(
|
||||
process.env.OPERATOR_DASHBOARD_TRADE_PAGE_SIZE,
|
||||
DEFAULTS.operatorDashboardTradePageSize,
|
||||
),
|
||||
operatorDashboardUpstreamTimeoutMs: parseNumber(
|
||||
process.env.OPERATOR_DASHBOARD_UPSTREAM_TIMEOUT_MS,
|
||||
DEFAULTS.operatorDashboardUpstreamTimeoutMs,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
|
|
|||
188
src/operator-dashboard/static/App.jsx
Normal file
188
src/operator-dashboard/static/App.jsx
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
import { useEffect, useReducer } from 'react';
|
||||
|
||||
import BannerStack from './components/BannerStack.jsx';
|
||||
import NavRail from './components/NavRail.jsx';
|
||||
import StatusBar from './components/StatusBar.jsx';
|
||||
import { fetchJson } from './lib/api.js';
|
||||
import FundsPage from './pages/FundsPage.jsx';
|
||||
import StrategyPage from './pages/StrategyPage.jsx';
|
||||
import SystemPage from './pages/SystemPage.jsx';
|
||||
import { dashboardReducer, initialDashboardState } from './state/dashboardReducer.js';
|
||||
|
||||
const TRADE_PAGE_SIZE = 20;
|
||||
|
||||
function LoadingPanel() {
|
||||
return (
|
||||
<div className="panel">
|
||||
<h2>Loading dashboard</h2>
|
||||
<p className="panel-subtitle">Fetching session, durable history, and live service state.</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default function App() {
|
||||
const [state, dispatch] = useReducer(dashboardReducer, initialDashboardState);
|
||||
const currentPage = state.page || state.dashboard?.default_page || 'funds';
|
||||
const isReadyForSocket = Boolean(state.session && state.dashboard);
|
||||
const criticalBanner = state.dashboard?.status_bar?.highest_alert_severity === 'critical'
|
||||
? 'Critical runtime alerts are active. Dashboard health is degraded until the underlying truth path recovers.'
|
||||
: null;
|
||||
|
||||
async function loadBootstrap(page = 1) {
|
||||
const dashboard = await fetchJson(`/api/bootstrap?page=${page}&page_size=${TRADE_PAGE_SIZE}`);
|
||||
dispatch({ type: 'bootstrap.loaded', dashboard });
|
||||
return dashboard;
|
||||
}
|
||||
|
||||
async function loadTradesPage(page) {
|
||||
if (!Number.isFinite(page) || page < 1) return;
|
||||
|
||||
dispatch({ type: 'notice.changed', notice: 'Loading trade history page...' });
|
||||
dispatch({ type: 'error.changed', error: null });
|
||||
|
||||
try {
|
||||
const successfulTrades = await fetchJson(`/api/trades?page=${page}&page_size=${TRADE_PAGE_SIZE}`);
|
||||
dispatch({ type: 'trades.loaded', successfulTrades });
|
||||
dispatch({ type: 'notice.changed', notice: null });
|
||||
} catch (error) {
|
||||
dispatch({ type: 'error.changed', error: error.message });
|
||||
dispatch({ type: 'notice.changed', notice: null });
|
||||
}
|
||||
}
|
||||
|
||||
async function submitControl(service, action, body = {}, { reload = true } = {}) {
|
||||
dispatch({ type: 'notice.changed', notice: `${action} in progress` });
|
||||
dispatch({ type: 'error.changed', error: null });
|
||||
|
||||
try {
|
||||
const response = await fetchJson(`/api/control/${service}/${action}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(body || {}),
|
||||
});
|
||||
|
||||
dispatch({ type: 'control.result', result: response.result });
|
||||
dispatch({ type: 'notice.changed', notice: `${action} completed` });
|
||||
|
||||
if (reload) {
|
||||
const page = state.dashboard?.funds?.successful_trades?.page || 1;
|
||||
await loadBootstrap(page);
|
||||
}
|
||||
} catch (error) {
|
||||
dispatch({ type: 'error.changed', error: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false;
|
||||
|
||||
async function boot() {
|
||||
try {
|
||||
const session = await fetchJson('/api/session');
|
||||
if (cancelled) return;
|
||||
dispatch({ type: 'session.loaded', session });
|
||||
await loadBootstrap(1);
|
||||
} catch (error) {
|
||||
if (cancelled) return;
|
||||
dispatch({ type: 'error.changed', error: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
boot();
|
||||
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isReadyForSocket) return undefined;
|
||||
|
||||
let disposed = false;
|
||||
let reconnectTimer = null;
|
||||
let socket = null;
|
||||
|
||||
function connect() {
|
||||
if (disposed) return;
|
||||
|
||||
dispatch({ type: 'websocket.state.changed', websocketState: 'connecting' });
|
||||
const scheme = window.location.protocol === 'https:' ? 'wss' : 'ws';
|
||||
socket = new WebSocket(`${scheme}://${window.location.host}/ws`);
|
||||
|
||||
socket.addEventListener('open', () => {
|
||||
if (disposed) return;
|
||||
dispatch({ type: 'websocket.state.changed', websocketState: 'connected' });
|
||||
dispatch({ type: 'error.changed', error: null });
|
||||
});
|
||||
|
||||
socket.addEventListener('close', () => {
|
||||
if (disposed) return;
|
||||
dispatch({ type: 'websocket.state.changed', websocketState: 'disconnected' });
|
||||
reconnectTimer = window.setTimeout(connect, 2000);
|
||||
});
|
||||
|
||||
socket.addEventListener('error', () => {
|
||||
if (disposed) return;
|
||||
dispatch({ type: 'websocket.state.changed', websocketState: 'degraded' });
|
||||
});
|
||||
|
||||
socket.addEventListener('message', (event) => {
|
||||
if (disposed) return;
|
||||
dispatch({ type: 'socket.message.received', payload: JSON.parse(event.data) });
|
||||
});
|
||||
}
|
||||
|
||||
connect();
|
||||
|
||||
return () => {
|
||||
disposed = true;
|
||||
if (reconnectTimer) window.clearTimeout(reconnectTimer);
|
||||
socket?.close();
|
||||
};
|
||||
}, [isReadyForSocket]);
|
||||
|
||||
return (
|
||||
<div className="shell">
|
||||
<BannerStack
|
||||
criticalBanner={criticalBanner}
|
||||
error={state.error}
|
||||
notice={state.notice}
|
||||
sourceErrors={state.dashboard?.source_errors || []}
|
||||
/>
|
||||
|
||||
{!state.dashboard ? (
|
||||
<LoadingPanel />
|
||||
) : (
|
||||
<>
|
||||
<StatusBar status={state.dashboard.status_bar} websocketState={state.websocketState} />
|
||||
|
||||
<div className="app-grid">
|
||||
<NavRail
|
||||
activePage={currentPage}
|
||||
onPageChange={(page) => dispatch({ type: 'page.changed', page })}
|
||||
/>
|
||||
|
||||
<main className="content">
|
||||
{currentPage === 'funds' ? (
|
||||
<FundsPage
|
||||
funds={state.dashboard.funds}
|
||||
lastControlResult={state.lastControlResult}
|
||||
onControl={submitControl}
|
||||
onTradesPageChange={loadTradesPage}
|
||||
/>
|
||||
) : null}
|
||||
{currentPage === 'strategy' ? (
|
||||
<StrategyPage strategy={state.dashboard.strategy} />
|
||||
) : null}
|
||||
{currentPage === 'system' ? (
|
||||
<SystemPage onControl={submitControl} system={state.dashboard.system} />
|
||||
) : null}
|
||||
</main>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
18
src/operator-dashboard/static/components/BannerStack.jsx
Normal file
18
src/operator-dashboard/static/components/BannerStack.jsx
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
export default function BannerStack({ notice, error, sourceErrors, criticalBanner }) {
|
||||
return (
|
||||
<>
|
||||
{criticalBanner ? <div className="banner error">{criticalBanner}</div> : null}
|
||||
{notice ? <div className="banner ok">{notice}</div> : null}
|
||||
{error ? <div className="banner error">{error}</div> : null}
|
||||
{sourceErrors?.length ? (
|
||||
<div className="banner error">
|
||||
{sourceErrors.map((item) => (
|
||||
<div key={`${item.source}:${item.error?.message || 'unknown'}`}>
|
||||
{`${item.source}: ${item.error?.message || 'source unavailable'}`}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
) : null}
|
||||
</>
|
||||
);
|
||||
}
|
||||
23
src/operator-dashboard/static/components/ServiceCard.jsx
Normal file
23
src/operator-dashboard/static/components/ServiceCard.jsx
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import Pill from './Pill.jsx';
|
||||
import { formatAge, formatBoolean } from '../lib/format.js';
|
||||
|
||||
export default function ServiceCard({ service }) {
|
||||
const healthLabel = service.health_status || (service.health_ok ? 'healthy' : service.reachable ? 'degraded' : 'offline');
|
||||
|
||||
return (
|
||||
<div className="service-card">
|
||||
<div className="service-head">
|
||||
<strong>{service.label}</strong>
|
||||
<Pill label={healthLabel} stateLabel={healthLabel} />
|
||||
</div>
|
||||
<div className="service-detail">
|
||||
<div>{`Paused ${formatBoolean(service.paused)}`}</div>
|
||||
<div>{`Armed ${formatBoolean(service.armed)}`}</div>
|
||||
<div>{`Freshness ${formatAge(service.freshness_age_ms)}`}</div>
|
||||
{service.health_reasons?.length ? <div>{service.health_reasons.join(' | ')}</div> : null}
|
||||
<div className="mono">{service.base_url}</div>
|
||||
{service.last_error ? <div>{JSON.stringify(service.last_error)}</div> : null}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
113
src/operator-dashboard/static/pages/SystemPage.jsx
Normal file
113
src/operator-dashboard/static/pages/SystemPage.jsx
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import AlertsGrid from '../components/AlertsGrid.jsx';
|
||||
import MetricCard from '../components/MetricCard.jsx';
|
||||
import ServiceCard from '../components/ServiceCard.jsx';
|
||||
import TableFrame from '../components/TableFrame.jsx';
|
||||
import { formatBoolean, formatTimestamp } from '../lib/format.js';
|
||||
|
||||
export default function SystemPage({ system, onControl }) {
|
||||
return (
|
||||
<>
|
||||
<section className="panel">
|
||||
<div className="panel-head">
|
||||
<div>
|
||||
<div className="eyebrow">Runtime health</div>
|
||||
<h2>System</h2>
|
||||
<div className="panel-subtitle">
|
||||
Service health, alerting truth, writer freshness, and only safe control actions.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="button-row">
|
||||
{system.controls.map((control) => (
|
||||
<button
|
||||
key={`${control.service}:${control.action}`}
|
||||
className="button secondary"
|
||||
onClick={() => onControl(control.service, control.action)}
|
||||
type="button"
|
||||
>
|
||||
{control.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section className="panel">
|
||||
<div className="panel-head">
|
||||
<div>
|
||||
<div className="eyebrow">Service view</div>
|
||||
<h3>Health and freshness</h3>
|
||||
</div>
|
||||
</div>
|
||||
<div className="service-grid">
|
||||
{system.service_health.map((service) => (
|
||||
<ServiceCard key={service.service} service={service} />
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section className="section-grid">
|
||||
<div className="panel">
|
||||
<div className="panel-head">
|
||||
<div>
|
||||
<div className="eyebrow">Alert state</div>
|
||||
<h3>Active alerts</h3>
|
||||
</div>
|
||||
</div>
|
||||
<AlertsGrid items={system.alerts.active} />
|
||||
</div>
|
||||
<div className="panel">
|
||||
<div className="panel-head">
|
||||
<div>
|
||||
<div className="eyebrow">Alert history</div>
|
||||
<h3>Recent transitions</h3>
|
||||
</div>
|
||||
</div>
|
||||
<AlertsGrid emptyMessage="No alert transitions are recorded yet." items={system.alerts.recent} />
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section className="panel">
|
||||
<div className="panel-head">
|
||||
<div>
|
||||
<div className="eyebrow">Persistence</div>
|
||||
<h3>Writer offsets and durability</h3>
|
||||
</div>
|
||||
</div>
|
||||
<div className="metric-grid">
|
||||
<MetricCard label="Database connectivity" meta={formatTimestamp(system.persistence.last_write_at)} value={formatBoolean(system.persistence.database_connectivity)} />
|
||||
<MetricCard
|
||||
label="Last metrics write"
|
||||
meta={system.persistence.metrics_error ? 'Metrics error present' : 'Metrics healthy'}
|
||||
value={formatTimestamp(system.persistence.last_metrics_at)}
|
||||
/>
|
||||
</div>
|
||||
<TableFrame style={{ marginTop: 14 }}>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Topic</th>
|
||||
<th>Partition</th>
|
||||
<th>Offset</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{Object.entries(system.persistence.offsets || {}).length ? (
|
||||
Object.entries(system.persistence.offsets || {}).map(([topic, offset]) => (
|
||||
<tr key={topic}>
|
||||
<td className="mono">{topic}</td>
|
||||
<td>{String(offset.partition ?? '')}</td>
|
||||
<td className="mono">{String(offset.offset ?? '')}</td>
|
||||
</tr>
|
||||
))
|
||||
) : (
|
||||
<tr>
|
||||
<td colSpan="3">No offsets captured yet.</td>
|
||||
</tr>
|
||||
)}
|
||||
</tbody>
|
||||
</table>
|
||||
</TableFrame>
|
||||
</section>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
|
@ -101,3 +101,29 @@ test('executor submission failure produces an alert event and clears on recovery
|
|||
assert.equal(transitions[0].alert_code, 'executor_submission_failed');
|
||||
assert.equal(transitions[0].status, 'cleared');
|
||||
});
|
||||
|
||||
test('runtime alerts raise and clear independently from event-derived alerts', () => {
|
||||
const engine = createEngine();
|
||||
|
||||
let transitions = engine.applyRuntimeAlerts([{
|
||||
alert_code: 'near_intents_quotes_stale',
|
||||
severity: 'critical',
|
||||
reason: 'quotes are stale',
|
||||
service_scope: 'near-intents-ingest',
|
||||
pair: 'nep141:btc.omft.near->nep141:eure.omft.near',
|
||||
asset_id: null,
|
||||
tx_hash: null,
|
||||
details: {
|
||||
age_ms: 100_000,
|
||||
},
|
||||
}], '2026-04-03T08:00:00.000Z');
|
||||
|
||||
assert.equal(transitions.length, 1);
|
||||
assert.equal(transitions[0].alert_code, 'near_intents_quotes_stale');
|
||||
assert.equal(transitions[0].status, 'raised');
|
||||
|
||||
transitions = engine.applyRuntimeAlerts([], '2026-04-03T08:00:05.000Z');
|
||||
assert.equal(transitions.length, 1);
|
||||
assert.equal(transitions[0].alert_code, 'near_intents_quotes_stale');
|
||||
assert.equal(transitions[0].status, 'cleared');
|
||||
});
|
||||
|
|
|
|||
53
test/alert-notifier.test.mjs
Normal file
53
test/alert-notifier.test.mjs
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import http from 'node:http';
|
||||
import assert from 'node:assert/strict';
|
||||
import test from 'node:test';
|
||||
|
||||
import { createAlertNotifier } from '../src/core/alert-notifier.mjs';
|
||||
|
||||
test('alert notifier dedupes repeated deliveries and records clear transitions separately', async () => {
|
||||
const requests = [];
|
||||
const server = http.createServer(async (req, res) => {
|
||||
let body = '';
|
||||
for await (const chunk of req) body += chunk;
|
||||
requests.push(JSON.parse(body));
|
||||
res.statusCode = 200;
|
||||
res.end('{}');
|
||||
});
|
||||
|
||||
await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve));
|
||||
const address = server.address();
|
||||
const notifier = createAlertNotifier({
|
||||
webhookUrl: `http://127.0.0.1:${address.port}/alerts`,
|
||||
});
|
||||
|
||||
const raised = {
|
||||
alert_code: 'near_intents_quotes_stale',
|
||||
status: 'raised',
|
||||
severity: 'critical',
|
||||
service_scope: 'near-intents-ingest',
|
||||
reason: 'quote truth stale',
|
||||
pair: 'btc->eure',
|
||||
raised_at: '2026-04-08T10:00:00.000Z',
|
||||
cleared_at: null,
|
||||
last_evaluated_at: '2026-04-08T10:00:00.000Z',
|
||||
details: {},
|
||||
};
|
||||
const cleared = {
|
||||
...raised,
|
||||
status: 'cleared',
|
||||
cleared_at: '2026-04-08T10:01:00.000Z',
|
||||
};
|
||||
|
||||
const first = await notifier.notify(raised);
|
||||
const second = await notifier.notify(raised);
|
||||
const third = await notifier.notify(cleared);
|
||||
|
||||
assert.equal(first.ok, true);
|
||||
assert.equal(second.deduped, true);
|
||||
assert.equal(third.ok, true);
|
||||
assert.equal(requests.length, 2);
|
||||
assert.equal(requests[0].alert.status, 'raised');
|
||||
assert.equal(requests[1].alert.status, 'cleared');
|
||||
|
||||
await new Promise((resolve, reject) => server.close((error) => (error ? reject(error) : resolve())));
|
||||
});
|
||||
605
test/operator-dashboard.test.mjs
Normal file
605
test/operator-dashboard.test.mjs
Normal file
|
|
@ -0,0 +1,605 @@
|
|||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
|
||||
import {
|
||||
applyDashboardLiveEvent,
|
||||
buildDashboardBootstrap,
|
||||
buildProfitabilitySummary,
|
||||
createDashboardLiveState,
|
||||
resolveDashboardControl,
|
||||
} from '../src/core/operator-dashboard.mjs';
|
||||
import {
|
||||
buildDashboardSessionToken,
|
||||
parseBasicAuthorizationHeader,
|
||||
resolveDashboardRequestAuth,
|
||||
} from '../src/core/operator-dashboard-auth.mjs';
|
||||
|
||||
function buildConfig() {
|
||||
const tradingBtc = {
|
||||
assetId: 'nep141:btc.omft.near',
|
||||
symbol: 'BTC',
|
||||
decimals: 8,
|
||||
chain: 'btc:mainnet',
|
||||
};
|
||||
const tradingEure = {
|
||||
assetId: 'nep141:eure.omft.near',
|
||||
symbol: 'EURe',
|
||||
decimals: 18,
|
||||
chain: 'eth:100',
|
||||
};
|
||||
|
||||
return {
|
||||
activePair: `${tradingBtc.assetId}->${tradingEure.assetId}`,
|
||||
operatorDashboardQuoteLimit: 10,
|
||||
tradingBtc,
|
||||
tradingEure,
|
||||
assetRegistry: new Map([
|
||||
[tradingBtc.assetId, tradingBtc],
|
||||
[tradingEure.assetId, tradingEure],
|
||||
]),
|
||||
};
|
||||
}
|
||||
|
||||
test('profitability summary separates baseline, hold, market move, and trading contribution', () => {
|
||||
const summary = buildProfitabilitySummary({
|
||||
metric: {
|
||||
computed_at: '2026-04-04T09:05:00.000Z',
|
||||
baseline_anchor_at: '2026-04-04T08:00:00.000Z',
|
||||
baseline_status: 'active',
|
||||
payload: {
|
||||
current_portfolio_value_eure: '110',
|
||||
baseline_portfolio_value_eure_at_baseline_price: '100',
|
||||
baseline_portfolio_value_eure_at_current_price: '105',
|
||||
},
|
||||
},
|
||||
successfulTradeSummary: {
|
||||
total: 4,
|
||||
last_successful_trade_at: '2026-04-04T09:00:00.000Z',
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(summary.pnl_vs_deposit_baseline_eure, '10');
|
||||
assert.equal(summary.pnl_vs_simple_hold_eure, '5');
|
||||
assert.equal(summary.market_move_contribution_eure, '5');
|
||||
assert.equal(summary.trading_contribution_eure, '5');
|
||||
assert.equal(summary.computed_at, '2026-04-04T09:05:00.000Z');
|
||||
assert.equal(summary.recent_trade_count, 4);
|
||||
assert.equal(summary.last_successful_trade_at, '2026-04-04T09:00:00.000Z');
|
||||
});
|
||||
|
||||
test('profitability summary flags cash-flow-adjusted benchmarks after later funding changes', () => {
|
||||
const summary = buildProfitabilitySummary({
|
||||
metric: {
|
||||
computed_at: '2026-04-07T15:43:30.463Z',
|
||||
baseline_anchor_at: '2026-04-02T18:10:43.569Z',
|
||||
baseline_status: 'active',
|
||||
payload: {
|
||||
current_portfolio_value_eure: '144.627100025978799978',
|
||||
baseline_portfolio_value_eure_at_baseline_price: '141.7921998',
|
||||
baseline_portfolio_value_eure_at_current_price: '142.8458998',
|
||||
external_cash_flows: {
|
||||
flow_count: 2,
|
||||
deposit_count: 1,
|
||||
withdrawal_count: 1,
|
||||
latest_effective_at: '2026-04-07T15:20:54.757Z',
|
||||
net_value_eure_at_flow_time: '23.9999998',
|
||||
},
|
||||
},
|
||||
},
|
||||
successfulTradeSummary: {
|
||||
total: 7,
|
||||
last_successful_trade_at: '2026-04-02T20:17:44.768Z',
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(summary.external_flow_adjusted, true);
|
||||
assert.equal(summary.external_flow_count, 2);
|
||||
assert.equal(summary.pnl_vs_deposit_baseline_eure, '2.834900225978799978');
|
||||
assert.equal(summary.pnl_vs_simple_hold_eure, '1.781200225978799978');
|
||||
assert.equal(summary.market_move_contribution_eure, '1.0537');
|
||||
assert.match(summary.caveats[0], /external cash flows/);
|
||||
});
|
||||
|
||||
test('control routing only resolves the allowlisted safe dashboard actions', () => {
|
||||
const refresh = resolveDashboardControl({
|
||||
service: 'liquidity-manager',
|
||||
action: 'refresh',
|
||||
});
|
||||
const risky = resolveDashboardControl({
|
||||
service: 'strategy-engine',
|
||||
action: 'arm',
|
||||
});
|
||||
|
||||
assert.equal(refresh?.path, '/refresh');
|
||||
assert.equal(refresh?.risk_class, 'safe');
|
||||
assert.equal(risky, null);
|
||||
});
|
||||
|
||||
test('basic auth resolves operator identity and reuses a session cookie', () => {
|
||||
const authorizationHeader = `Basic ${Buffer.from('admin:secret-password').toString('base64')}`;
|
||||
const first = resolveDashboardRequestAuth({
|
||||
mode: 'basic',
|
||||
authorizationHeader,
|
||||
username: 'admin',
|
||||
password: 'secret-password',
|
||||
});
|
||||
|
||||
const token = buildDashboardSessionToken({
|
||||
username: 'admin',
|
||||
password: 'secret-password',
|
||||
});
|
||||
const second = resolveDashboardRequestAuth({
|
||||
mode: 'basic',
|
||||
cookieHeader: `operator_dashboard_session=${token}`,
|
||||
username: 'admin',
|
||||
password: 'secret-password',
|
||||
});
|
||||
|
||||
assert.equal(parseBasicAuthorizationHeader(authorizationHeader).username, 'admin');
|
||||
assert.equal(first.authenticated, true);
|
||||
assert.equal(first.setSessionCookie, true);
|
||||
assert.equal(second.authenticated, true);
|
||||
assert.equal(second.via, 'session_cookie');
|
||||
});
|
||||
|
||||
test('live quote updates stay capped at ten items and successful trades update live counters', () => {
|
||||
const config = buildConfig();
|
||||
const state = createDashboardLiveState({
|
||||
config,
|
||||
successfulTradeCount: 2,
|
||||
lastSuccessfulTradeAt: '2026-04-04T08:00:00.000Z',
|
||||
});
|
||||
|
||||
for (let index = 0; index < 11; index += 1) {
|
||||
applyDashboardLiveEvent(state, {
|
||||
topic: 'norm.swap_demand',
|
||||
event: {
|
||||
observed_at: `2026-04-04T08:00:${String(index).padStart(2, '0')}.000Z`,
|
||||
ingested_at: `2026-04-04T08:00:${String(index).padStart(2, '0')}.000Z`,
|
||||
payload: {
|
||||
quote_id: `quote-${index}`,
|
||||
asset_in: config.tradingBtc.assetId,
|
||||
asset_out: config.tradingEure.assetId,
|
||||
pair: config.activePair,
|
||||
request_kind: 'exact_in',
|
||||
amount_in: '100',
|
||||
amount_out: '200',
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const updates = applyDashboardLiveEvent(state, {
|
||||
topic: 'exec.trade_result',
|
||||
event: {
|
||||
observed_at: '2026-04-04T08:30:00.000Z',
|
||||
ingested_at: '2026-04-04T08:30:00.000Z',
|
||||
payload: {
|
||||
status: 'submitted',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(state.recent_quotes.length, 10);
|
||||
assert.equal(state.recent_quotes[0].quote_id, 'quote-10');
|
||||
assert.equal(state.recent_quotes.at(-1).quote_id, 'quote-1');
|
||||
assert.equal(state.successful_trade_count, 3);
|
||||
assert.equal(state.last_successful_trade_at, '2026-04-04T08:30:00.000Z');
|
||||
assert.equal(updates[0].type, 'status_bar.updated');
|
||||
});
|
||||
|
||||
test('bootstrap aggregation keeps Funds as default and carries live control state', () => {
|
||||
const config = buildConfig();
|
||||
const bootstrap = buildDashboardBootstrap({
|
||||
config,
|
||||
auth: {
|
||||
authenticated: true,
|
||||
subject: 'local-operator',
|
||||
mode: 'stub',
|
||||
roles: ['operator'],
|
||||
},
|
||||
portfolioMetric: {
|
||||
computed_at: '2026-04-04T09:05:00.000Z',
|
||||
baseline_anchor_at: '2026-04-04T08:00:00.000Z',
|
||||
baseline_status: 'active',
|
||||
payload: {
|
||||
current_portfolio_value_eure: '110',
|
||||
baseline_portfolio_value_eure_at_baseline_price: '100',
|
||||
baseline_portfolio_value_eure_at_current_price: '105',
|
||||
},
|
||||
},
|
||||
inventorySnapshot: {
|
||||
ingested_at: '2026-04-04T09:00:00.000Z',
|
||||
payload: {
|
||||
synced_at: '2026-04-04T09:00:00.000Z',
|
||||
reconciliation_status: 'ok',
|
||||
spendable: {
|
||||
[config.tradingBtc.assetId]: '100000000',
|
||||
[config.tradingEure.assetId]: '1000000000000000000',
|
||||
},
|
||||
pending_inbound: {
|
||||
[config.tradingBtc.assetId]: '0',
|
||||
[config.tradingEure.assetId]: '0',
|
||||
},
|
||||
pending_outbound: {
|
||||
[config.tradingBtc.assetId]: '0',
|
||||
[config.tradingEure.assetId]: '0',
|
||||
},
|
||||
},
|
||||
},
|
||||
marketPrice: {
|
||||
ingested_at: '2026-04-04T09:00:00.000Z',
|
||||
payload: {
|
||||
observed_at: '2026-04-04T09:00:00.000Z',
|
||||
eure_per_btc: '100',
|
||||
},
|
||||
},
|
||||
recentQuotes: [],
|
||||
successfulTrades: {
|
||||
page: 1,
|
||||
page_size: 20,
|
||||
total: 0,
|
||||
total_pages: 1,
|
||||
items: [],
|
||||
},
|
||||
successfulTradeSummary: {
|
||||
total: 1,
|
||||
last_successful_trade_at: '2026-04-04T09:30:00.000Z',
|
||||
},
|
||||
fundingObservations: [
|
||||
{
|
||||
payload: {
|
||||
funding_observation_id: 'fund-1',
|
||||
asset_id: config.tradingBtc.assetId,
|
||||
chain: config.tradingBtc.chain,
|
||||
funding_handle: 'btc-address',
|
||||
tx_hash: 'tx-1',
|
||||
status: 'CREDITED',
|
||||
amount: '100000000',
|
||||
confirmations: 3,
|
||||
first_seen_at: '2026-04-04T07:30:00.000Z',
|
||||
last_seen_at: '2026-04-04T07:40:00.000Z',
|
||||
credited_at: '2026-04-04T07:45:00.000Z',
|
||||
},
|
||||
},
|
||||
],
|
||||
recentTradeDecisions: [
|
||||
{
|
||||
observed_at: '2026-04-04T09:10:00.000Z',
|
||||
ingested_at: '2026-04-04T09:10:01.000Z',
|
||||
payload: {
|
||||
decision_id: 'decision-1',
|
||||
quote_id: 'quote-1',
|
||||
pair: config.activePair,
|
||||
decision: 'rejected',
|
||||
decision_reason: 'strategy_disarmed',
|
||||
},
|
||||
},
|
||||
],
|
||||
recentAlertTransitions: [],
|
||||
serviceSnapshots: [
|
||||
{
|
||||
service: 'liquidity-manager',
|
||||
label: 'Liquidity Manager',
|
||||
base_url: 'http://liquidity-manager',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
paused: false,
|
||||
funding_observer_paused: false,
|
||||
withdrawals_frozen: true,
|
||||
withdrawal_defaults: {
|
||||
[config.tradingBtc.assetId]: 'btc-destination',
|
||||
},
|
||||
deposit_addresses: {
|
||||
[config.tradingBtc.chain]: {
|
||||
address: 'btc-address',
|
||||
refreshed_at: '2026-04-04T09:00:00.000Z',
|
||||
},
|
||||
},
|
||||
tracked_withdrawals: {},
|
||||
},
|
||||
},
|
||||
{
|
||||
service: 'ops-sentinel',
|
||||
label: 'Ops Sentinel',
|
||||
base_url: 'http://ops-sentinel',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
active_alerts: [],
|
||||
recent_transitions: [],
|
||||
},
|
||||
},
|
||||
{
|
||||
service: 'strategy-engine',
|
||||
label: 'Strategy Engine',
|
||||
base_url: 'http://strategy-engine',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
armed: true,
|
||||
paused: false,
|
||||
threshold_pct: 2,
|
||||
max_notional_eure: 5,
|
||||
latest_decision: {
|
||||
decision_id: 'decision-1',
|
||||
quote_id: 'quote-1',
|
||||
pair: config.activePair,
|
||||
decision: 'rejected',
|
||||
decision_reason: 'strategy_disarmed',
|
||||
},
|
||||
recent_decisions: [{
|
||||
decision_id: 'decision-1',
|
||||
quote_id: 'quote-1',
|
||||
pair: config.activePair,
|
||||
decision: 'rejected',
|
||||
decision_reason: 'strategy_disarmed',
|
||||
}],
|
||||
skipped_counts: {},
|
||||
},
|
||||
},
|
||||
{
|
||||
service: 'trade-executor',
|
||||
label: 'Trade Executor',
|
||||
base_url: 'http://trade-executor',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
armed: true,
|
||||
paused: false,
|
||||
draining: false,
|
||||
in_flight_count: 0,
|
||||
completed_count: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
service: 'history-writer',
|
||||
label: 'History Writer',
|
||||
base_url: 'http://history-writer',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
database_connectivity: true,
|
||||
offsets: {},
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(bootstrap.default_page, 'funds');
|
||||
assert.equal(bootstrap.funds.profitability.computed_at, '2026-04-04T09:05:00.000Z');
|
||||
assert.equal(bootstrap.funds.funding.control_state.withdrawals_frozen, true);
|
||||
assert.equal(bootstrap.funds.funding.handles[0].address, 'btc-address');
|
||||
assert.equal(bootstrap.status_bar.strategy_armed, true);
|
||||
assert.equal(bootstrap.status_bar.executor_armed, true);
|
||||
assert.equal(bootstrap.strategy.strategy_state.recent_decisions[0].decision_at, '2026-04-04T09:10:00.000Z');
|
||||
assert.equal(bootstrap.strategy.strategy_state.recent_decisions[0].decision_reason, 'strategy_disarmed');
|
||||
});
|
||||
|
||||
test('system service health uses sentinel-derived severity so stale ingest is never shown healthy', () => {
|
||||
const config = buildConfig();
|
||||
const bootstrap = buildDashboardBootstrap({
|
||||
config,
|
||||
auth: {
|
||||
authenticated: true,
|
||||
subject: 'local-operator',
|
||||
mode: 'stub',
|
||||
roles: ['operator'],
|
||||
},
|
||||
portfolioMetric: null,
|
||||
inventorySnapshot: null,
|
||||
marketPrice: null,
|
||||
recentQuotes: [],
|
||||
successfulTrades: {
|
||||
page: 1,
|
||||
page_size: 20,
|
||||
total: 0,
|
||||
total_pages: 1,
|
||||
items: [],
|
||||
},
|
||||
successfulTradeSummary: {
|
||||
total: 0,
|
||||
last_successful_trade_at: null,
|
||||
},
|
||||
fundingObservations: [],
|
||||
recentTradeDecisions: [],
|
||||
recentAlertTransitions: [],
|
||||
serviceSnapshots: [
|
||||
{
|
||||
service: 'near-intents-ingest',
|
||||
label: 'Intents Ingest',
|
||||
base_url: 'http://near-intents-ingest',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
ingest: {
|
||||
connected: true,
|
||||
last_message_at: '2026-04-04T09:00:00.000Z',
|
||||
last_matching_quote_at: '2026-04-04T09:00:00.000Z',
|
||||
last_published_at: '2026-04-03T02:12:00.000Z',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
service: 'ops-sentinel',
|
||||
label: 'Ops Sentinel',
|
||||
base_url: 'http://ops-sentinel',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
active_alerts: [{
|
||||
alert_code: 'near_intents_publish_stale',
|
||||
status: 'raised',
|
||||
severity: 'critical',
|
||||
reason: 'published quote freshness is stale',
|
||||
service_scope: 'near-intents-ingest',
|
||||
pair: config.activePair,
|
||||
raised_at: '2026-04-04T09:30:00.000Z',
|
||||
first_raised_at: '2026-04-04T09:30:00.000Z',
|
||||
cleared_at: null,
|
||||
last_evaluated_at: '2026-04-04T09:30:00.000Z',
|
||||
details: {
|
||||
publish_age_ms: 110_880_000,
|
||||
},
|
||||
}],
|
||||
recent_transitions: [],
|
||||
service_health: [{
|
||||
service: 'near-intents-ingest',
|
||||
status: 'critical',
|
||||
reachable: true,
|
||||
paused: false,
|
||||
armed: null,
|
||||
health_ok: false,
|
||||
highest_alert_severity: 'critical',
|
||||
reasons: ['critical alert active (near_intents_publish_stale)'],
|
||||
freshness_at: '2026-04-03T02:12:00.000Z',
|
||||
freshness_age_ms: 110_880_000,
|
||||
}],
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const ingest = bootstrap.system.service_health.find((service) => service.service === 'near-intents-ingest');
|
||||
assert.equal(ingest.health_ok, false);
|
||||
assert.equal(ingest.health_status, 'critical');
|
||||
assert.match(ingest.health_reasons.join(' '), /critical alert active/);
|
||||
assert.equal(bootstrap.status_bar.highest_alert_severity, 'critical');
|
||||
});
|
||||
|
||||
test('funding summary includes credited bridge deposits without observer-backed funding observations', () => {
|
||||
const config = buildConfig();
|
||||
const bootstrap = buildDashboardBootstrap({
|
||||
config,
|
||||
auth: {
|
||||
authenticated: true,
|
||||
subject: 'local-operator',
|
||||
mode: 'stub',
|
||||
roles: ['operator'],
|
||||
},
|
||||
portfolioMetric: null,
|
||||
inventorySnapshot: null,
|
||||
marketPrice: null,
|
||||
recentQuotes: [],
|
||||
successfulTrades: {
|
||||
page: 1,
|
||||
page_size: 20,
|
||||
total: 0,
|
||||
total_pages: 1,
|
||||
items: [],
|
||||
},
|
||||
successfulTradeSummary: {
|
||||
total: 0,
|
||||
last_successful_trade_at: null,
|
||||
},
|
||||
fundingObservations: [],
|
||||
recentDepositStatuses: [
|
||||
{
|
||||
observed_at: '2026-04-07T15:20:00.000Z',
|
||||
ingested_at: '2026-04-07T15:20:01.000Z',
|
||||
payload: {
|
||||
action_type: 'deposit_status_observed',
|
||||
chain: config.tradingEure.chain,
|
||||
asset_id: config.tradingEure.assetId,
|
||||
status: 'COMPLETED',
|
||||
details: {
|
||||
tx_hash: 'eth-tx-1',
|
||||
address: '0xdeposit',
|
||||
amount: '24999999800000000000',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
recentTradeDecisions: [],
|
||||
recentAlertTransitions: [],
|
||||
serviceSnapshots: [
|
||||
{
|
||||
service: 'liquidity-manager',
|
||||
label: 'Liquidity Manager',
|
||||
base_url: 'http://liquidity-manager',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
paused: false,
|
||||
funding_observer_paused: false,
|
||||
withdrawals_frozen: true,
|
||||
withdrawal_defaults: {},
|
||||
deposit_addresses: {
|
||||
[config.tradingEure.chain]: {
|
||||
address: '0xdeposit',
|
||||
refreshed_at: '2026-04-07T15:20:10.000Z',
|
||||
},
|
||||
},
|
||||
deposits: {
|
||||
eurDeposit: {
|
||||
tx_hash: 'eth-tx-1',
|
||||
chain: config.tradingEure.chain,
|
||||
asset_id: config.tradingEure.assetId,
|
||||
amount: '24999999800000000000',
|
||||
address: '0xdeposit',
|
||||
status: 'COMPLETED',
|
||||
},
|
||||
},
|
||||
tracked_withdrawals: {},
|
||||
last_refresh_at: '2026-04-07T15:20:10.000Z',
|
||||
},
|
||||
},
|
||||
{
|
||||
service: 'ops-sentinel',
|
||||
label: 'Ops Sentinel',
|
||||
base_url: 'http://ops-sentinel',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
active_alerts: [],
|
||||
recent_transitions: [],
|
||||
},
|
||||
},
|
||||
{
|
||||
service: 'strategy-engine',
|
||||
label: 'Strategy Engine',
|
||||
base_url: 'http://strategy-engine',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
armed: true,
|
||||
paused: false,
|
||||
recent_decisions: [],
|
||||
skipped_counts: {},
|
||||
},
|
||||
},
|
||||
{
|
||||
service: 'trade-executor',
|
||||
label: 'Trade Executor',
|
||||
base_url: 'http://trade-executor',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
armed: true,
|
||||
paused: false,
|
||||
draining: false,
|
||||
in_flight_count: 0,
|
||||
completed_count: 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
service: 'history-writer',
|
||||
label: 'History Writer',
|
||||
base_url: 'http://history-writer',
|
||||
reachable: true,
|
||||
health: { ok: true },
|
||||
state: {
|
||||
database_connectivity: true,
|
||||
offsets: {},
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(bootstrap.funds.funding.latest_observed_at, '2026-04-07T15:20:00.000Z');
|
||||
assert.equal(bootstrap.funds.funding.credited_deposits[0].asset_id, config.tradingEure.assetId);
|
||||
assert.equal(bootstrap.funds.funding.credited_deposits[0].amount, '24.9999998');
|
||||
assert.equal(bootstrap.funds.funding.recent_observations[0].tx_hash, 'eth-tx-1');
|
||||
assert.equal(bootstrap.funds.recent_deposits[0].tx_hash, 'eth-tx-1');
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue