Disable automatic executor containment
All checks were successful
deploy / deploy (push) Successful in 32s

Proof: Remove repo-owned automatic safety disarms and operator alert severity surfaces so arming state is no longer silently reverted by stale-quote alerts.

Assumptions: The operator now wants arming to remain explicit and durable even when quote-truth checks are stale or noisy, and simple reachability/state is a better surface than derived alert severity for now.

Still fake: The upstream quote-truth and health heuristics remain unreliable; this change removes their automatic containment effect instead of fixing their underlying accuracy.
This commit is contained in:
philipp 2026-04-09 23:42:22 +02:00
parent 208be20a1c
commit 65d3cff595
11 changed files with 107 additions and 182 deletions

View file

@ -14,7 +14,6 @@ import {
createRuntimeHealthThresholds,
evaluateRuntimeHealth,
shouldRaiseIngestPublishStale,
shouldContainExecutorForAlerts,
} from '../core/runtime-health.mjs';
import {
assertFundingObservationEvent,
@ -25,7 +24,6 @@ import {
assertTradeResult,
} from '../core/schemas.mjs';
import { loadConfig } from '../lib/config.mjs';
import { fetchJson } from '../lib/http.mjs';
const config = loadConfig();
const thresholds = createRuntimeHealthThresholds(config);
@ -65,7 +63,7 @@ const state = {
service_health: [],
latest_runtime_alerts: [],
containment: {
executor_auto_disarmed: false,
executor_auto_disarmed: null,
last_action_at: null,
last_action_reason: null,
last_action_result: null,
@ -100,11 +98,9 @@ await consumer.run({
try {
const event = parseEventMessage(message.value.toString());
const payload = normalizePayloadForAlert(topic, event);
const transitions = alertEngine.applyEvent(topic, payload);
normalizePayloadForAlert(topic, event);
state.last_error = null;
state.last_event_at = new Date().toISOString();
await publishTransitions(transitions);
} catch (error) {
state.last_error = serializeError(error);
logger.error('ops_sentinel_consume_failed', {
@ -148,11 +144,12 @@ const controlApi = startControlApi({
last_runtime_eval_at: state.last_runtime_eval_at,
service_snapshots: state.service_snapshots,
service_health: state.service_health,
latest_runtime_alerts: state.latest_runtime_alerts,
latest_runtime_alerts: [],
containment: state.containment,
notifier: notifier.getState(),
anomaly_samples: state.anomaly_samples.slice(-thresholds.anomalyWindowSize),
...alertEngine.getState(),
active_alerts: [],
recent_transitions: [],
};
},
},
@ -211,18 +208,20 @@ async function evaluateRuntimeHealthLoop() {
const anomalyAlerts = buildAnomalyAlerts({ servicesByName, now });
const runtimeAlerts = buildDeterministicRuntimeAlerts({ servicesByName, now, previousRuntimeEvalAt });
const desiredRuntimeAlerts = [...runtimeAlerts, ...anomalyAlerts];
const transitions = alertEngine.applyRuntimeAlerts(desiredRuntimeAlerts, now);
const activeAlerts = alertEngine.getState(now).active_alerts;
state.service_health = [...evaluateRuntimeHealth({
servicesByName,
activePair: config.activePair,
activeAlerts,
activeAlerts: [],
now,
}).values()];
state.latest_runtime_alerts = desiredRuntimeAlerts;
await publishTransitions(transitions);
await maybeContainRisk({ servicesByName, desiredRuntimeAlerts, now });
state.latest_runtime_alerts = [];
state.containment.executor_auto_disarmed = null;
state.containment.last_action_at = now;
state.containment.last_action_reason = 'automatic_executor_containment_disabled';
state.containment.last_action_result = {
ok: true,
automatic_containment_enabled: false,
};
}
async function loadServiceSnapshot(service) {
@ -563,46 +562,16 @@ function buildAnomalyAlerts({ servicesByName, now }) {
}
async function maybeContainRisk({ servicesByName, desiredRuntimeAlerts, now }) {
const executor = servicesByName['trade-executor'];
const criticalTruthFailure = shouldContainExecutorForAlerts(desiredRuntimeAlerts);
const executorArmed = executor?.state?.armed === true;
if (!criticalTruthFailure) {
state.containment.executor_auto_disarmed = false;
return;
}
const sinceLastActionMs = ageMs(state.containment.last_action_at, now);
if (
!executorArmed
|| state.containment.executor_auto_disarmed
|| (sinceLastActionMs != null && sinceLastActionMs < thresholds.containmentCooldownMs)
) {
return;
}
try {
const result = await fetchJson(`${config.tradeExecutorControlBaseUrl}/disarm`, {
method: 'POST',
headers: {
'content-type': 'application/json',
},
body: JSON.stringify({ reason: 'critical_quote_truth_stale' }),
signal: AbortSignal.timeout(config.operatorDashboardUpstreamTimeoutMs),
});
state.containment.executor_auto_disarmed = true;
void servicesByName;
void desiredRuntimeAlerts;
state.containment.executor_auto_disarmed = null;
state.containment.last_action_at = now;
state.containment.last_action_reason = 'critical_quote_truth_stale';
state.containment.last_action_result = result;
} catch (error) {
state.containment.last_action_at = now;
state.containment.last_action_reason = 'critical_quote_truth_stale';
state.containment.last_action_reason = 'automatic_executor_containment_disabled';
state.containment.last_action_result = {
ok: false,
error: serializeError(error),
ok: true,
automatic_containment_enabled: false,
};
}
}
async function publishTransitions(transitions) {
for (const transition of transitions) {

View file

@ -1,7 +1,7 @@
import { unitsToNumber } from './assets.mjs';
import { summarizeFundingObservations } from './funding-observations.mjs';
import { resolveDashboardRequestAuth } from './operator-dashboard-auth.mjs';
import { deriveServiceHealth, inferServiceFreshnessTimestamp as inferRuntimeFreshnessTimestamp } from './runtime-health.mjs';
import { inferServiceFreshnessTimestamp as inferRuntimeFreshnessTimestamp } from './runtime-health.mjs';
export const DASHBOARD_LIVE_QUOTE_LIMIT = 10;
@ -170,8 +170,8 @@ const CONTROL_DEFINITIONS = [
action: 'pause',
method: 'POST',
path: '/pause',
label: 'Pause Alerts',
description: 'Pause alert evaluation without changing trade arming state.',
label: 'Pause Sentinel',
description: 'Pause background observation without changing trade arming state.',
page: 'system',
risk_class: 'safe',
},
@ -180,8 +180,8 @@ const CONTROL_DEFINITIONS = [
action: 'resume',
method: 'POST',
path: '/resume',
label: 'Resume Alerts',
description: 'Resume alert evaluation.',
label: 'Resume Sentinel',
description: 'Resume background observation.',
page: 'system',
risk_class: 'safe',
},
@ -286,23 +286,7 @@ export function applyDashboardLiveEvent(state, { topic, event }) {
status_bar: buildLiveStatusBar(state),
}];
case 'ops.alert': {
const alert = normalizeAlert(event.payload);
const key = buildAlertKey(alert);
if (alert.status === 'raised') {
state.active_alerts.set(key, alert);
} else if (alert.status === 'cleared') {
state.active_alerts.delete(key);
}
return [{
type: 'alerts.updated',
alerts: {
active_alert_count: state.active_alerts.size,
highest_alert_severity: highestAlertSeverity([...state.active_alerts.values()]),
},
}, {
type: 'status_bar.updated',
status_bar: buildLiveStatusBar(state),
}];
return [];
}
case 'exec.trade_result':
if (event.payload.status !== 'submitted') return [];
@ -509,8 +493,8 @@ export function buildLiveStatusBar(state) {
btcAsset: state.btc_asset,
eureAsset: state.eure_asset,
}),
active_alert_count: state.active_alerts.size,
highest_alert_severity: highestAlertSeverity([...state.active_alerts.values()]),
active_alert_count: 0,
highest_alert_severity: null,
recent_submission_count: state.recent_submission_count,
last_submission_at: state.last_submission_at,
};
@ -534,8 +518,8 @@ function buildStatusBar({
inventory_freshness_ms: ageMs(
inventorySnapshot?.payload?.synced_at || inventorySnapshot?.ingested_at,
),
active_alert_count: activeAlerts.length,
highest_alert_severity: highestAlertSeverity(activeAlerts),
active_alert_count: 0,
highest_alert_severity: null,
strategy_armed: servicesByName['strategy-engine']?.state?.armed ?? null,
executor_armed: servicesByName['trade-executor']?.state?.armed ?? null,
current_total_portfolio_value_eure: profitability.current_total_portfolio_value_eure,
@ -1060,9 +1044,7 @@ function buildStrategySummary({
account_id: executorState.account_id || null,
signer_public_key: executorState.signer_public_key || null,
},
relevant_alerts: activeAlerts.filter((alert) => (
['strategy-engine', 'trade-executor', 'liquidity-manager'].includes(alert.service_scope)
)),
relevant_alerts: [],
omitted_controls: [
'Strategy arm and disarm are intentionally absent in this turn.',
'Executor drain remains intentionally absent in this turn.',
@ -1073,20 +1055,19 @@ function buildStrategySummary({
function buildSystemSummary({ servicesByName, activeAlerts, recentAlerts }) {
const historyWriterState = servicesByName['history-writer']?.state || {};
const sentinelServiceHealth = new Map(
(servicesByName['ops-sentinel']?.state?.service_health || []).map((entry) => [entry.service, entry]),
);
void activeAlerts;
void recentAlerts;
return {
service_health: Object.values(servicesByName).map((snapshot) => (
summarizeServiceSnapshot(snapshot, {
authoritativeHealth: sentinelServiceHealth.get(snapshot.service) || null,
activeAlerts,
authoritativeHealth: null,
activeAlerts: [],
})
)),
alerts: {
active: activeAlerts,
recent: recentAlerts,
active: [],
recent: [],
},
persistence: {
database_connectivity: historyWriterState.database_connectivity ?? null,
@ -1105,28 +1086,28 @@ function buildSystemSummary({ servicesByName, activeAlerts, recentAlerts }) {
function summarizeServiceSnapshot(snapshot, { authoritativeHealth = null, activeAlerts = [] } = {}) {
const state = snapshot.state || {};
const health = snapshot.health || {};
const derived = authoritativeHealth || deriveServiceHealth({
service: snapshot.service,
snapshot,
activeAlerts: activeAlerts.filter((alert) => alert.service_scope === snapshot.service),
});
const freshnessAt = derived.freshness_at || inferServiceFreshnessTimestamp(snapshot.service, state, health);
void authoritativeHealth;
void activeAlerts;
const freshnessAt = inferServiceFreshnessTimestamp(snapshot.service, state, health);
const reachable = snapshot.reachable !== false;
const online = reachable && health.ok !== false;
const healthStatus = online ? 'online' : reachable ? 'reachable' : 'offline';
return {
service: snapshot.service,
label: snapshot.label,
base_url: snapshot.base_url,
reachable: snapshot.reachable,
health_ok: derived.health_ok,
health_status: derived.status,
health_label: derived.label || derived.status,
health_reasons: derived.reasons || [],
highest_alert_severity: derived.highest_alert_severity || null,
paused: derived.paused ?? state.paused ?? health.paused ?? null,
armed: derived.armed ?? state.armed ?? null,
reachable,
health_ok: online,
health_status: healthStatus,
health_label: healthStatus,
health_reasons: [],
highest_alert_severity: null,
paused: state.paused ?? health.paused ?? null,
armed: state.armed ?? null,
draining: state.draining ?? null,
freshness_at: freshnessAt,
freshness_age_ms: derived.freshness_age_ms ?? ageMs(freshnessAt),
freshness_age_ms: ageMs(freshnessAt),
last_error: state.last_error || health.last_error || null,
summary: buildServiceSummary(snapshot.service, state),
};

View file

@ -319,15 +319,8 @@ export function shouldRaiseIngestPublishStale({
}
export function shouldContainExecutorForAlerts(alerts = []) {
const containmentAlertCodes = new Set([
'near_intents_ingest_disconnected',
'near_intents_publish_stale',
'history_writer_stalled',
]);
return (alerts || []).some((alert) => (
alert?.severity === 'critical' && containmentAlertCodes.has(alert.alert_code)
));
void alerts;
return false;
}
export function ageMs(value, now = new Date().toISOString()) {

View file

@ -24,9 +24,7 @@ export default function App() {
const [state, dispatch] = useReducer(dashboardReducer, initialDashboardState);
const currentPage = state.page || state.dashboard?.default_page || 'funds';
const isReadyForSocket = Boolean(state.session && state.dashboard);
const criticalBanner = state.dashboard?.status_bar?.highest_alert_severity === 'critical'
? 'Critical runtime alerts are active. Dashboard health is degraded until the underlying truth path recovers.'
: null;
const criticalBanner = null;
async function loadBootstrap(page = 1) {
const dashboard = await fetchJson(`/api/bootstrap?page=${page}&page_size=${TRADE_PAGE_SIZE}`);

View file

@ -2,7 +2,7 @@ import Pill from './Pill.jsx';
import { formatAge, formatBoolean } from '../lib/format.js';
export default function ServiceCard({ service }) {
const healthLabel = service.health_label || service.health_status || (service.health_ok ? 'healthy' : service.reachable ? 'degraded' : 'offline');
const healthLabel = service.health_label || service.health_status || (service.reachable ? 'online' : 'offline');
return (
<div className="service-card">
@ -11,10 +11,10 @@ export default function ServiceCard({ service }) {
<Pill label={healthLabel} stateLabel={healthLabel} />
</div>
<div className="service-detail">
<div>{`Reachable ${formatBoolean(service.reachable)}`}</div>
<div>{`Paused ${formatBoolean(service.paused)}`}</div>
<div>{`Armed ${formatBoolean(service.armed)}`}</div>
<div>{`Freshness ${formatAge(service.freshness_age_ms)}`}</div>
{service.health_reasons?.length ? <div>{service.health_reasons.join(' | ')}</div> : null}
<div className="mono">{service.base_url}</div>
{service.last_error ? <div>{JSON.stringify(service.last_error)}</div> : null}
</div>

View file

@ -23,7 +23,6 @@ export default function StatusBar({ status, websocketState }) {
['Reference BTC/EUR', formatEur(status.latest_reference_price_eure_per_btc)],
['Market Freshness', formatAge(status.market_freshness_ms)],
['Inventory Freshness', formatAge(status.inventory_freshness_ms)],
['Alerts', `${status.active_alert_count || 0} ${status.highest_alert_severity ? `(${status.highest_alert_severity})` : ''}`.trim()],
['Strategy Armed', formatBoolean(status.strategy_armed)],
['Executor Armed', formatBoolean(status.executor_armed)],
[SUBMISSION_COPY.statusTileLabel, `${status.recent_submission_count || 0} ${SUBMISSION_COPY.statusTileValueSuffix}`],

View file

@ -1,4 +1,3 @@
import AlertsGrid from '../components/AlertsGrid.jsx';
import EmptyState from '../components/EmptyState.jsx';
import MetricCard from '../components/MetricCard.jsx';
import Pill from '../components/Pill.jsx';
@ -110,7 +109,7 @@ export default function StrategyPage({ strategy }) {
<div className="panel strategy-side-panel">
<div className="panel-head">
<div>
<div className="eyebrow">Guard rails</div>
<div className="eyebrow">Controls</div>
<h3>Omitted risky controls</h3>
</div>
</div>
@ -119,8 +118,6 @@ export default function StrategyPage({ strategy }) {
<div key={item}>{item}</div>
))}
</EmptyState>
<h3 style={{ marginTop: 18 }}>Relevant alerts</h3>
<AlertsGrid items={strategy.relevant_alerts} />
</div>
</section>
</>

View file

@ -1,4 +1,3 @@
import AlertsGrid from '../components/AlertsGrid.jsx';
import MetricCard from '../components/MetricCard.jsx';
import ServiceCard from '../components/ServiceCard.jsx';
import TableFrame from '../components/TableFrame.jsx';
@ -13,7 +12,7 @@ export default function SystemPage({ system, onControl }) {
<div className="eyebrow">Runtime health</div>
<h2>System</h2>
<div className="panel-subtitle">
Service health, alerting truth, writer freshness, and only safe control actions.
Current service reachability, operator controls, and durable writer state.
</div>
</div>
</div>
@ -35,7 +34,7 @@ export default function SystemPage({ system, onControl }) {
<div className="panel-head">
<div>
<div className="eyebrow">Service view</div>
<h3>Health and freshness</h3>
<h3>Current state and freshness</h3>
</div>
</div>
<div className="service-grid">
@ -45,27 +44,6 @@ export default function SystemPage({ system, onControl }) {
</div>
</section>
<section className="section-grid">
<div className="panel">
<div className="panel-head">
<div>
<div className="eyebrow">Alert state</div>
<h3>Active alerts</h3>
</div>
</div>
<AlertsGrid items={system.alerts.active} />
</div>
<div className="panel">
<div className="panel-head">
<div>
<div className="eyebrow">Alert history</div>
<h3>Recent transitions</h3>
</div>
</div>
<AlertsGrid emptyMessage="No alert transitions are recorded yet." items={system.alerts.recent} />
</div>
</section>
<section className="panel">
<div className="panel-head">
<div>

View file

@ -51,18 +51,6 @@ function applySocketMessage(dashboard, payload, session) {
},
},
};
case 'alerts.updated':
return {
session,
dashboard: {
...dashboard,
status_bar: {
...dashboard.status_bar,
active_alert_count: payload.alerts.active_alert_count,
highest_alert_severity: payload.alerts.highest_alert_severity,
},
},
};
default:
return { dashboard, session };
}

View file

@ -4,6 +4,7 @@ import assert from 'node:assert/strict';
import {
applyDashboardLiveEvent,
buildDashboardBootstrap,
buildLiveStatusBar,
buildProfitabilitySummary,
createDashboardLiveState,
deriveQuoteLifecycleRows,
@ -201,6 +202,30 @@ test('live quote updates stay capped at ten items and submitted results update l
assert.equal(updates[0].type, 'status_bar.updated');
});
test('live dashboard ignores ops alert events so alert severity cannot re-enter operator state', () => {
const config = buildConfig();
const state = createDashboardLiveState({ config });
const updates = applyDashboardLiveEvent(state, {
topic: 'ops.alert',
event: {
observed_at: '2026-04-04T08:30:00.000Z',
ingested_at: '2026-04-04T08:30:00.000Z',
payload: {
alert_code: 'near_intents_publish_stale',
status: 'raised',
severity: 'critical',
service_scope: 'near-intents-ingest',
},
},
});
assert.deepEqual(updates, []);
assert.equal(state.active_alerts.size, 0);
assert.equal(buildLiveStatusBar(state).active_alert_count, 0);
assert.equal(buildLiveStatusBar(state).highest_alert_severity, null);
});
test('lifecycle derivation keeps executor blocking distinct from strategy rejection', () => {
const rows = deriveQuoteLifecycleRows({
recentQuotes: [{
@ -562,7 +587,7 @@ test('bootstrap normalizes actionable decision vocabulary before exposing it to
assert.doesNotMatch(JSON.stringify(bootstrap), /Actionable/);
});
test('system service health uses sentinel-derived severity so stale ingest is never shown healthy', () => {
test('system service state ignores sentinel alert severity and keeps alert surfaces empty', () => {
const config = buildConfig();
const bootstrap = buildDashboardBootstrap({
config,
@ -648,14 +673,16 @@ test('system service health uses sentinel-derived severity so stale ingest is ne
});
const ingest = bootstrap.system.service_health.find((service) => service.service === 'near-intents-ingest');
assert.equal(ingest.health_ok, false);
assert.equal(ingest.health_status, 'warning');
assert.equal(ingest.health_label, 'no recent quotes');
assert.match(ingest.health_reasons.join(' '), /connected, no recent quotes/);
assert.equal(bootstrap.status_bar.highest_alert_severity, 'critical');
assert.equal(ingest.health_ok, true);
assert.equal(ingest.health_status, 'online');
assert.equal(ingest.health_label, 'online');
assert.deepEqual(ingest.health_reasons, []);
assert.equal(bootstrap.status_bar.highest_alert_severity, null);
assert.deepEqual(bootstrap.system.alerts.active, []);
assert.deepEqual(bootstrap.system.alerts.recent, []);
});
test('ingest disconnected still renders as a critical transport failure', () => {
test('ingest disconnected renders as basic reachability state without alert severity', () => {
const config = buildConfig();
const bootstrap = buildDashboardBootstrap({
config,
@ -737,12 +764,12 @@ test('ingest disconnected still renders as a critical transport failure', () =>
});
const ingest = bootstrap.system.service_health.find((service) => service.service === 'near-intents-ingest');
assert.equal(ingest.health_status, 'critical');
assert.equal(ingest.health_label, 'disconnected');
assert.match(ingest.health_reasons.join(' '), /websocket disconnected/);
assert.equal(ingest.health_status, 'reachable');
assert.equal(ingest.health_label, 'reachable');
assert.deepEqual(ingest.health_reasons, []);
});
test('recent alert history collapses repeated flapping transitions into one readable entry', () => {
test('recent alert history remains empty even when sentinel exposes flapping transitions', () => {
const config = buildConfig();
const bootstrap = buildDashboardBootstrap({
config,
@ -825,12 +852,7 @@ test('recent alert history collapses repeated flapping transitions into one read
],
});
assert.equal(bootstrap.system.alerts.recent.length, 1);
assert.equal(bootstrap.system.alerts.recent[0].alert_code, 'near_intents_quotes_stale');
assert.equal(bootstrap.system.alerts.recent[0].status, 'raised');
assert.equal(bootstrap.system.alerts.recent[0].transition_count, 3);
assert.equal(bootstrap.system.alerts.recent[0].raised_count, 2);
assert.equal(bootstrap.system.alerts.recent[0].cleared_count, 1);
assert.deepEqual(bootstrap.system.alerts.recent, []);
});
test('funding summary includes credited bridge deposits without observer-backed funding observations', () => {

View file

@ -26,24 +26,24 @@ test('publish stale raises after a matching quote exists but no publish follows'
}), true);
});
test('executor containment ignores quote-stale-only conditions', () => {
test('executor containment stays disabled for quote-stale-only conditions', () => {
assert.equal(shouldContainExecutorForAlerts([{
alert_code: 'near_intents_quotes_stale',
severity: 'critical',
}]), false);
});
test('executor containment still triggers on broken truth path alerts', () => {
test('executor containment stays disabled even for broken truth path alerts', () => {
assert.equal(shouldContainExecutorForAlerts([{
alert_code: 'near_intents_ingest_disconnected',
severity: 'critical',
}]), true);
}]), false);
assert.equal(shouldContainExecutorForAlerts([{
alert_code: 'near_intents_publish_stale',
severity: 'critical',
}]), true);
}]), false);
assert.equal(shouldContainExecutorForAlerts([{
alert_code: 'history_writer_stalled',
severity: 'critical',
}]), true);
}]), false);
});