362 lines
12 KiB
Bash
Executable file
362 lines
12 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -uo pipefail
|
|
|
|
APP_DIR="${ORDERBOOKS_APP_DIR:-$(pwd)}"
|
|
PYTHON_BIN="${ORDERBOOKS_PYTHON:-python3}"
|
|
RCLONE_BIN="${ORDERBOOKS_RCLONE_BIN:-/usr/bin/rclone}"
|
|
RCLONE_DEST_BASE="${ORDERBOOKS_RCLONE_DEST:-gdrive:orderbooks/polymarket/soak-test}"
|
|
|
|
SOAK_DATE="${ORDERBOOKS_SOAK_DATE:-$(date -u +%F)}"
|
|
SOAK_ID="${ORDERBOOKS_SOAK_ID:-soak_test_${SOAK_DATE}}"
|
|
SOAK_SECONDS="${ORDERBOOKS_SOAK_SECONDS:-86400}"
|
|
CYCLE_SECONDS="${ORDERBOOKS_SOAK_CYCLE_SECONDS:-300}"
|
|
INTERVAL_SECONDS="${ORDERBOOKS_SOAK_INTERVAL_SECONDS:-30}"
|
|
MARKET_LIMIT="${ORDERBOOKS_SOAK_MARKET_LIMIT:-2}"
|
|
MARKET_END_SAFETY_SECONDS="${ORDERBOOKS_SOAK_MARKET_END_SAFETY_SECONDS:-420}"
|
|
REQUEST_TIMEOUT_SECONDS="${ORDERBOOKS_SOAK_REQUEST_TIMEOUT_SECONDS:-15}"
|
|
MAX_RETRIES="${ORDERBOOKS_SOAK_MAX_RETRIES:-2}"
|
|
BACKOFF_SECONDS="${ORDERBOOKS_SOAK_BACKOFF_SECONDS:-2}"
|
|
DISCOVERY_LIMIT="${ORDERBOOKS_SOAK_DISCOVERY_LIMIT:-100}"
|
|
DISCOVERY_MAX_PAGES="${ORDERBOOKS_SOAK_DISCOVERY_MAX_PAGES:-3}"
|
|
DISCOVERY_TIMEOUT="${ORDERBOOKS_SOAK_DISCOVERY_TIMEOUT:-15}"
|
|
|
|
LOCAL_ROOT="${ORDERBOOKS_SOAK_LOCAL_ROOT:-data/soak_test/${SOAK_DATE}}"
|
|
MANIFEST_ROOT="${ORDERBOOKS_SOAK_MANIFEST_ROOT:-data/manifests/${SOAK_ID}}"
|
|
START_MANIFEST="${ORDERBOOKS_SOAK_START_MANIFEST:-data/manifests/${SOAK_ID}_start.json}"
|
|
FINAL_MANIFEST="${ORDERBOOKS_SOAK_FINAL_MANIFEST:-data/manifests/${SOAK_ID}_final.json}"
|
|
|
|
DISCOVERY_DIR="${LOCAL_ROOT}/discovery"
|
|
LIVE_DIR="${LOCAL_ROOT}/live_sample"
|
|
LOG_DIR="${LOCAL_ROOT}/logs"
|
|
PID_FILE="${LOCAL_ROOT}/soak.pid"
|
|
CYCLES_JSONL="${MANIFEST_ROOT}/cycles.jsonl"
|
|
LOG_FILE="${LOG_DIR}/soak.log"
|
|
REMOTE_DEST="${RCLONE_DEST_BASE%/}/${SOAK_DATE}"
|
|
|
|
STOP_REQUESTED=0
|
|
STOP_SIGNAL=""
|
|
CURRENT_CHILD_PID=""
|
|
CURRENT_PHASE="initializing"
|
|
CURRENT_CYCLE_ID=""
|
|
START_WRITTEN=0
|
|
FINAL_WRITTEN=0
|
|
|
|
cd "${APP_DIR}" || exit 2
|
|
mkdir -p "${DISCOVERY_DIR}" "${LIVE_DIR}" "${LOG_DIR}" "${MANIFEST_ROOT}" "$(dirname "${START_MANIFEST}")" "$(dirname "${FINAL_MANIFEST}")"
|
|
|
|
STARTED_AT="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
START_EPOCH="$(date -u +%s)"
|
|
END_EPOCH="$((START_EPOCH + SOAK_SECONDS))"
|
|
EXPECTED_COMPLETION_AT="$(date -u -d "@${END_EPOCH}" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || python3 - <<PY
|
|
import datetime as dt
|
|
print(dt.datetime.fromtimestamp(${END_EPOCH}, dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z"))
|
|
PY
|
|
)"
|
|
|
|
safe_log() {
|
|
printf '%s %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$*" >> "${LOG_FILE}" 2>/dev/null || true
|
|
}
|
|
|
|
log() {
|
|
printf '%s %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$*" | tee -a "${LOG_FILE}"
|
|
}
|
|
|
|
handle_signal() {
|
|
local signal_name="$1"
|
|
STOP_REQUESTED=1
|
|
STOP_SIGNAL="${signal_name}"
|
|
safe_log "SIGNAL received=${signal_name} phase=${CURRENT_PHASE} cycle_id=${CURRENT_CYCLE_ID:-none}"
|
|
if [[ -n "${CURRENT_CHILD_PID}" ]] && kill -0 "${CURRENT_CHILD_PID}" 2>/dev/null; then
|
|
case "${signal_name}" in
|
|
SIGINT) kill -INT "${CURRENT_CHILD_PID}" 2>/dev/null || true ;;
|
|
SIGTERM) kill -TERM "${CURRENT_CHILD_PID}" 2>/dev/null || true ;;
|
|
SIGHUP) kill -HUP "${CURRENT_CHILD_PID}" 2>/dev/null || true ;;
|
|
esac
|
|
fi
|
|
}
|
|
|
|
write_start_manifest() {
|
|
local tmp_path="${START_MANIFEST}.tmp"
|
|
python3 - "$tmp_path" "$START_MANIFEST" <<PY
|
|
import json
|
|
import os
|
|
import pathlib
|
|
|
|
tmp_path = pathlib.Path(os.sys.argv[1])
|
|
final_path = pathlib.Path(os.sys.argv[2])
|
|
manifest = {
|
|
"schema_name": "soak_test_start_manifest",
|
|
"schema_version": 1,
|
|
"checkpoint_id": 8,
|
|
"checkpoint_name": "24h Soak Test Plan",
|
|
"status": "STARTED",
|
|
"started_at_utc": "${STARTED_AT}",
|
|
"expected_completion_at_utc": "${EXPECTED_COMPLETION_AT}",
|
|
"soak_seconds": int("${SOAK_SECONDS}"),
|
|
"cycle_seconds": int("${CYCLE_SECONDS}"),
|
|
"pid": int("$$"),
|
|
"pid_file": "${PID_FILE}",
|
|
"log_file": "${LOG_FILE}",
|
|
"local_root": "${LOCAL_ROOT}",
|
|
"manifest_root": "${MANIFEST_ROOT}",
|
|
"remote_dest": "${REMOTE_DEST}",
|
|
"raw_output_dir": "${LIVE_DIR}",
|
|
"discovery_dir": "${DISCOVERY_DIR}",
|
|
"cycles_jsonl": "${CYCLES_JSONL}",
|
|
"gate_status": "IN_PROGRESS",
|
|
"production_ready": False,
|
|
"notes": [
|
|
"This is a real 24h soak start marker, not a completion report.",
|
|
"Checkpoint 8 cannot pass until 24 real hours elapse and final metrics are validated.",
|
|
],
|
|
}
|
|
tmp_path.parent.mkdir(parents=True, exist_ok=True)
|
|
tmp_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
os.replace(tmp_path, final_path)
|
|
PY
|
|
START_WRITTEN=1
|
|
}
|
|
|
|
write_cycle_record() {
|
|
local record="$1"
|
|
printf '%s\n' "${record}" >> "${CYCLES_JSONL}"
|
|
}
|
|
|
|
write_final_manifest() {
|
|
local final_status="$1"
|
|
local gate_status="$2"
|
|
local exit_reason="$3"
|
|
local ended_at
|
|
local tmp_path
|
|
ended_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
tmp_path="${FINAL_MANIFEST}.tmp"
|
|
python3 - "$tmp_path" "$FINAL_MANIFEST" <<PY
|
|
import json
|
|
import os
|
|
import pathlib
|
|
|
|
tmp_path = pathlib.Path(os.sys.argv[1])
|
|
final_path = pathlib.Path(os.sys.argv[2])
|
|
cycles_path = pathlib.Path("${CYCLES_JSONL}")
|
|
cycles = []
|
|
if cycles_path.exists():
|
|
cycles = [json.loads(line) for line in cycles_path.read_text(encoding="utf-8").splitlines() if line.strip()]
|
|
manifest = {
|
|
"schema_name": "soak_test_final_manifest",
|
|
"schema_version": 1,
|
|
"checkpoint_id": 8,
|
|
"checkpoint_name": "24h Soak Test Plan",
|
|
"status": "${final_status}",
|
|
"gate_status": "${gate_status}",
|
|
"exit_reason": "${exit_reason}",
|
|
"started_at_utc": "${STARTED_AT}",
|
|
"ended_at_utc": "${ended_at}",
|
|
"expected_completion_at_utc": "${EXPECTED_COMPLETION_AT}",
|
|
"soak_seconds": int("${SOAK_SECONDS}"),
|
|
"cycle_seconds": int("${CYCLE_SECONDS}"),
|
|
"cycles": cycles,
|
|
"cycle_count": len(cycles),
|
|
"ok_cycle_count": sum(1 for cycle in cycles if cycle.get("status") == "OK"),
|
|
"error_cycle_count": sum(1 for cycle in cycles if cycle.get("status") == "ERROR"),
|
|
"interrupted_cycle_count": sum(1 for cycle in cycles if cycle.get("status") == "INTERRUPTED"),
|
|
"pid": int("$$"),
|
|
"pid_file": "${PID_FILE}",
|
|
"log_file": "${LOG_FILE}",
|
|
"local_root": "${LOCAL_ROOT}",
|
|
"manifest_root": "${MANIFEST_ROOT}",
|
|
"remote_dest": "${REMOTE_DEST}",
|
|
"stop_requested": bool(int("${STOP_REQUESTED}")),
|
|
"stop_signal": "${STOP_SIGNAL}",
|
|
"current_phase_at_exit": "${CURRENT_PHASE}",
|
|
"current_cycle_id_at_exit": "${CURRENT_CYCLE_ID}",
|
|
"production_ready": False,
|
|
"notes": [
|
|
"This marker is written by the soak controller on completion, interruption, or error.",
|
|
"Checkpoint 8 cannot be PASS until 24 real hours elapse and final metrics are validated.",
|
|
],
|
|
}
|
|
tmp_path.parent.mkdir(parents=True, exist_ok=True)
|
|
tmp_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
os.replace(tmp_path, final_path)
|
|
PY
|
|
FINAL_WRITTEN=1
|
|
}
|
|
|
|
cleanup_on_exit() {
|
|
local rc=$?
|
|
if [[ "${START_WRITTEN}" -eq 1 && "${FINAL_WRITTEN}" -eq 0 ]]; then
|
|
if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
|
|
write_final_manifest "INTERRUPTED" "INTERRUPTED" "${STOP_SIGNAL:-signal}"
|
|
elif [[ "${rc}" -ne 0 ]]; then
|
|
write_final_manifest "ERROR" "ERROR" "exit_code_${rc}"
|
|
else
|
|
write_final_manifest "ERROR" "ERROR" "exited_without_final_marker"
|
|
fi
|
|
fi
|
|
if [[ -f "${PID_FILE}" ]] && [[ "$(cat "${PID_FILE}" 2>/dev/null)" == "$$" ]]; then
|
|
rm -f "${PID_FILE}"
|
|
fi
|
|
exit "${rc}"
|
|
}
|
|
|
|
run_logged() {
|
|
"$@" >> "${LOG_FILE}" 2>&1 &
|
|
CURRENT_CHILD_PID="$!"
|
|
wait "${CURRENT_CHILD_PID}"
|
|
local rc=$?
|
|
if [[ "${STOP_REQUESTED}" -eq 1 ]] && kill -0 "${CURRENT_CHILD_PID}" 2>/dev/null; then
|
|
wait "${CURRENT_CHILD_PID}"
|
|
rc=$?
|
|
fi
|
|
CURRENT_CHILD_PID=""
|
|
return "${rc}"
|
|
}
|
|
|
|
trap 'handle_signal SIGINT' INT
|
|
trap 'handle_signal SIGTERM' TERM
|
|
trap 'handle_signal SIGHUP' HUP
|
|
trap cleanup_on_exit EXIT
|
|
|
|
echo "$$" > "${PID_FILE}"
|
|
write_start_manifest
|
|
test -s "${START_MANIFEST}" || exit 3
|
|
|
|
log "START soak_id=${SOAK_ID} pid=$$ expected_completion=${EXPECTED_COMPLETION_AT}"
|
|
|
|
cycle_index=0
|
|
error_seen=0
|
|
while true; do
|
|
now_epoch="$(date -u +%s)"
|
|
remaining="$((END_EPOCH - now_epoch))"
|
|
if [[ "${remaining}" -le 0 ]]; then
|
|
break
|
|
fi
|
|
if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
|
|
break
|
|
fi
|
|
if [[ "${remaining}" -lt 30 ]]; then
|
|
log "SKIP final tiny remaining window seconds=${remaining}"
|
|
break
|
|
fi
|
|
|
|
cycle_index="$((cycle_index + 1))"
|
|
cycle_id="$(date -u +%Y%m%dT%H%M%SZ)"
|
|
CURRENT_CYCLE_ID="${cycle_id}"
|
|
run_seconds="${CYCLE_SECONDS}"
|
|
if [[ "${remaining}" -lt "${run_seconds}" ]]; then
|
|
run_seconds="${remaining}"
|
|
fi
|
|
|
|
discovery_json="${DISCOVERY_DIR}/polymarket_btc_markets_${cycle_id}.json"
|
|
discovery_manifest="${DISCOVERY_DIR}/polymarket_btc_markets_manifest_${cycle_id}.json"
|
|
discovery_markdown="${DISCOVERY_DIR}/polymarket_btc_markets_${cycle_id}.md"
|
|
collector_manifest="${MANIFEST_ROOT}/collector_${cycle_id}.json"
|
|
upload_manifest="${MANIFEST_ROOT}/upload_${cycle_id}.json"
|
|
cycle_started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
|
|
log "CYCLE ${cycle_index} start id=${cycle_id} run_seconds=${run_seconds}"
|
|
|
|
discovery_exit=0
|
|
CURRENT_PHASE="discovery"
|
|
run_logged "${PYTHON_BIN}" scripts/discover_polymarket_btc_markets.py \
|
|
--output-json "${discovery_json}" \
|
|
--manifest "${discovery_manifest}" \
|
|
--markdown "${discovery_markdown}" \
|
|
--limit "${DISCOVERY_LIMIT}" \
|
|
--max-pages "${DISCOVERY_MAX_PAGES}" \
|
|
--timeout "${DISCOVERY_TIMEOUT}" || discovery_exit=$?
|
|
|
|
collector_exit=0
|
|
if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
|
|
collector_exit=98
|
|
elif [[ "${discovery_exit}" -eq 0 ]]; then
|
|
CURRENT_PHASE="collector"
|
|
run_logged "${PYTHON_BIN}" scripts/collect_polymarket_orderbooks.py \
|
|
--config config/polymarket_collector.vps.example.yaml \
|
|
--discovery-path "${discovery_json}" \
|
|
--output-dir "${LIVE_DIR}" \
|
|
--manifest-path "${collector_manifest}" \
|
|
--market-limit "${MARKET_LIMIT}" \
|
|
--interval-seconds "${INTERVAL_SECONDS}" \
|
|
--duration-seconds "${run_seconds}" \
|
|
--request-timeout-seconds "${REQUEST_TIMEOUT_SECONDS}" \
|
|
--max-retries "${MAX_RETRIES}" \
|
|
--backoff-seconds "${BACKOFF_SECONDS}" \
|
|
--market-end-safety-seconds "${MARKET_END_SAFETY_SECONDS}" || collector_exit=$?
|
|
else
|
|
collector_exit=99
|
|
fi
|
|
|
|
upload_exit=0
|
|
if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
|
|
upload_exit=98
|
|
elif [[ "${collector_exit}" -eq 0 ]]; then
|
|
CURRENT_PHASE="upload"
|
|
run_logged scripts/upload_archive_rclone.sh \
|
|
--execute \
|
|
--data-dir "${LOCAL_ROOT}" \
|
|
--raw-dir "${LIVE_DIR}" \
|
|
--source-manifest-dir "${MANIFEST_ROOT}" \
|
|
--manifest-dir "${MANIFEST_ROOT}" \
|
|
--manifest-path "${upload_manifest}" \
|
|
--dest "${REMOTE_DEST}" \
|
|
--min-age-seconds 0 \
|
|
--rclone-bin "${RCLONE_BIN}" || upload_exit=$?
|
|
else
|
|
upload_exit=99
|
|
fi
|
|
|
|
cycle_ended_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
|
|
cycle_status="INTERRUPTED"
|
|
elif [[ "${discovery_exit}" -eq 0 && "${collector_exit}" -eq 0 && "${upload_exit}" -eq 0 ]]; then
|
|
cycle_status="OK"
|
|
else
|
|
cycle_status="ERROR"
|
|
error_seen=1
|
|
fi
|
|
|
|
record="$(python3 - <<PY
|
|
import json
|
|
print(json.dumps({
|
|
"cycle_index": ${cycle_index},
|
|
"cycle_id": "${cycle_id}",
|
|
"started_at_utc": "${cycle_started_at}",
|
|
"ended_at_utc": "${cycle_ended_at}",
|
|
"run_seconds": int("${run_seconds}"),
|
|
"discovery_manifest": "${discovery_manifest}",
|
|
"collector_manifest": "${collector_manifest}",
|
|
"upload_manifest": "${upload_manifest}",
|
|
"discovery_exit": int("${discovery_exit}"),
|
|
"collector_exit": int("${collector_exit}"),
|
|
"upload_exit": int("${upload_exit}"),
|
|
"status": "${cycle_status}",
|
|
"stop_signal": "${STOP_SIGNAL}",
|
|
}, sort_keys=True))
|
|
PY
|
|
)"
|
|
write_cycle_record "${record}"
|
|
log "CYCLE ${cycle_index} end id=${cycle_id} status=${cycle_status} discovery_exit=${discovery_exit} collector_exit=${collector_exit} upload_exit=${upload_exit}"
|
|
|
|
CURRENT_PHASE="sleep"
|
|
CURRENT_CYCLE_ID=""
|
|
if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
|
|
break
|
|
fi
|
|
sleep 5 &
|
|
CURRENT_CHILD_PID="$!"
|
|
wait "${CURRENT_CHILD_PID}" || true
|
|
CURRENT_CHILD_PID=""
|
|
done
|
|
|
|
CURRENT_PHASE="finalizing"
|
|
CURRENT_CYCLE_ID=""
|
|
if [[ "${STOP_REQUESTED}" -eq 1 ]]; then
|
|
write_final_manifest "INTERRUPTED" "INTERRUPTED" "${STOP_SIGNAL:-signal}"
|
|
elif [[ "${error_seen}" -eq 1 ]]; then
|
|
write_final_manifest "ERROR" "ERROR" "cycle_error"
|
|
else
|
|
write_final_manifest "COMPLETED_NEEDS_REVIEW" "NEEDS_REVIEW" "elapsed"
|
|
fi
|
|
|
|
log "END soak_id=${SOAK_ID} final_manifest=${FINAL_MANIFEST} status_written=1"
|