#!/usr/bin/env bash set -uo pipefail APP_DIR="$(pwd)" PYTHON_BIN="${ORDERBOOKS_PYTHON:-python3}" RCLONE_BIN="${ORDERBOOKS_RCLONE_BIN:-rclone}" RCLONE_REMOTE="${ORDERBOOKS_RCLONE_DEST:-}" DATA_DIR="" MANIFEST_DIR="" LOG_DIR="" MIN_FREE_GIB="${ORDERBOOKS_PREFLIGHT_MIN_FREE_GIB:-5}" REMOTE_TIMEOUT_SECONDS="${ORDERBOOKS_PREFLIGHT_REMOTE_TIMEOUT_SECONDS:-30}" FAILURES=0 WARNINGS=0 usage() { cat <<'EOF' Usage: scripts/vps_preflight_check.sh [options] Read-only VPS cutover preflight for the Polymarket order-book collector. Default behavior checks the repository, local tooling, unit syntax, disk space, and rclone availability. It does not print rclone config and does not require secrets. Options: --app-dir DIR Repository checkout path. Default: current directory. --python-bin PATH Python interpreter. Default: ORDERBOOKS_PYTHON or python3. --rclone-bin PATH rclone binary. Default: ORDERBOOKS_RCLONE_BIN or rclone. --rclone-remote REMOTE Optional remote/path to check read-only, e.g. gdrive:orderbooks/polymarket. --data-dir DIR Optional target data directory to create/check writable. --manifest-dir DIR Optional target manifest directory to create/check writable. --log-dir DIR Optional target log directory to create/check writable. --min-free-gib N Minimum free GiB for checked filesystems. Default: 5. --remote-timeout-seconds N Timeout for rclone remote read check. Default: 30. --help Show this help. Directory options intentionally create missing directories before checking writability. Omit them for a repo-only read-only check. EOF } log_pass() { printf 'PASS %s\n' "$*"; } log_info() { printf 'INFO %s\n' "$*"; } log_warn() { WARNINGS=$((WARNINGS + 1)); printf 'WARN %s\n' "$*"; } log_fail() { FAILURES=$((FAILURES + 1)); printf 'FAIL %s\n' "$*"; } run_quiet() { "$@" >/dev/null 2>&1; } while [[ $# -gt 0 ]]; do case "$1" in --app-dir) APP_DIR="$2"; shift 2 ;; --python-bin) PYTHON_BIN="$2"; shift 2 ;; --rclone-bin) RCLONE_BIN="$2"; shift 2 ;; --rclone-remote) RCLONE_REMOTE="$2"; shift 2 ;; --data-dir) DATA_DIR="$2"; shift 2 ;; --manifest-dir) MANIFEST_DIR="$2"; shift 2 ;; --log-dir) LOG_DIR="$2"; shift 2 ;; --min-free-gib) MIN_FREE_GIB="$2"; shift 2 ;; --remote-timeout-seconds) REMOTE_TIMEOUT_SECONDS="$2"; shift 2 ;; --help) usage; exit 0 ;; *) log_fail "unknown argument: $1"; usage >&2; exit 2 ;; esac done APP_DIR="${APP_DIR%/}" if [[ ! -d "${APP_DIR}" ]]; then log_fail "app directory does not exist: ${APP_DIR}" printf 'SUMMARY failures=%s warnings=%s\n' "${FAILURES}" "${WARNINGS}" exit 1 fi cd "${APP_DIR}" || { log_fail "could not cd to app directory: ${APP_DIR}" printf 'SUMMARY failures=%s warnings=%s\n' "${FAILURES}" "${WARNINGS}" exit 1 } check_python() { if command -v "${PYTHON_BIN}" >/dev/null 2>&1; then version="$("${PYTHON_BIN}" --version 2>&1 || true)" log_pass "python available: ${PYTHON_BIN} (${version})" else log_fail "python not found: ${PYTHON_BIN}" fi } check_required_files() { local missing=0 file local required=( "scripts/discover_polymarket_btc_markets.py" "scripts/collect_polymarket_orderbooks.py" "scripts/normalize_polymarket_orderbooks.py" "scripts/run_polymarket_collector_cycle.sh" "scripts/upload_archive_rclone.sh" "scripts/vps_runtime_smoke_check.sh" "config/polymarket_collector.vps.example.yaml" "docs/VPS_DEPLOYMENT.md" "docs/GOOGLE_DRIVE_OFFLOAD.md" "systemd/polymarket-orderbook-collector.service" "systemd/polymarket-orderbook-uploader.service" "systemd/polymarket-orderbook-uploader.timer" ) for file in "${required[@]}"; do if [[ -f "${file}" ]]; then log_pass "required file exists: ${file}" else missing=1 log_fail "required file missing: ${file}" fi done return "${missing}" } check_python_compile() { if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then log_fail "cannot compile Python scripts because Python is missing" return fi if run_quiet "${PYTHON_BIN}" - <<'PY' from pathlib import Path paths = [ Path("scripts/discover_polymarket_btc_markets.py"), Path("scripts/collect_polymarket_orderbooks.py"), Path("scripts/normalize_polymarket_orderbooks.py"), ] for path in paths: source = path.read_text(encoding="utf-8") compile(source, str(path), "exec") PY then log_pass "collector/discovery/normalization Python scripts compile without bytecode writes" else log_fail "Python no-bytecode compile check failed" fi } check_shell_syntax() { local failed=0 script for script in scripts/*.sh; do [[ -f "${script}" ]] || continue if bash -n "${script}" >/dev/null 2>&1; then log_pass "bash syntax ok: ${script}" else failed=1 log_fail "bash syntax failed: ${script}" fi done return "${failed}" } check_systemd_units() { local units=( "systemd/polymarket-orderbook-collector.service" "systemd/polymarket-orderbook-uploader.service" "systemd/polymarket-orderbook-uploader.timer" ) if command -v systemd-analyze >/dev/null 2>&1; then if systemd-analyze verify "${units[@]}" >/dev/null 2>&1; then log_pass "systemd units parse with systemd-analyze" else log_fail "systemd-analyze verify failed for one or more units" fi else log_warn "systemd-analyze unavailable; skipped unit parse check" fi } remote_name_from_dest() { local dest="$1" case "${dest}" in *:*) printf '%s:\n' "${dest%%:*}" ;; *) printf '\n' ;; esac } run_with_timeout() { if command -v timeout >/dev/null 2>&1; then timeout "${REMOTE_TIMEOUT_SECONDS}" "$@" else "$@" fi } check_rclone() { if [[ -x "${RCLONE_BIN}" ]] || command -v "${RCLONE_BIN}" >/dev/null 2>&1; then version="$("${RCLONE_BIN}" version 2>/dev/null | head -n 1 || true)" log_pass "rclone available: ${RCLONE_BIN} (${version})" else log_fail "rclone not found: ${RCLONE_BIN}" return fi if [[ -z "${RCLONE_REMOTE}" ]]; then log_info "no rclone remote provided; skipped remote access check" return fi local remote_name remote_name="$(remote_name_from_dest "${RCLONE_REMOTE}")" if [[ -z "${remote_name}" ]]; then log_fail "rclone remote must include a remote name ending in ':': ${RCLONE_REMOTE}" return fi if "${RCLONE_BIN}" listremotes 2>/dev/null | grep -Fxq "${remote_name}"; then log_pass "rclone remote is configured: ${remote_name}" else log_fail "rclone remote is not configured or not visible to this user: ${remote_name}" return fi if run_with_timeout "${RCLONE_BIN}" lsf --max-depth 1 "${RCLONE_REMOTE}" >/dev/null 2>&1; then log_pass "rclone remote read check succeeded without printing config: ${RCLONE_REMOTE}" else log_fail "rclone remote read check failed or timed out: ${RCLONE_REMOTE}" fi } check_target_dir() { local label="$1" path="$2" if [[ -z "${path}" ]]; then log_info "no ${label} directory provided; skipped create/write check" return fi if mkdir -p "${path}" >/dev/null 2>&1 && [[ -d "${path}" && -w "${path}" ]]; then log_pass "${label} directory exists and is writable: ${path}" else log_fail "${label} directory cannot be created or is not writable: ${path}" fi } check_disk_free() { local target="$1" label="$2" available_kib min_kib if [[ ! -e "${target}" ]]; then log_warn "disk target does not exist, skipping ${label}: ${target}" return fi available_kib="$(df -Pk "${target}" | awk 'NR==2 {print $4}')" min_kib=$((MIN_FREE_GIB * 1024 * 1024)) if [[ -n "${available_kib}" && "${available_kib}" -ge "${min_kib}" ]]; then log_pass "disk free ok for ${label}: available_kib=${available_kib} min_gib=${MIN_FREE_GIB}" else log_fail "disk free below threshold for ${label}: available_kib=${available_kib:-unknown} min_gib=${MIN_FREE_GIB}" fi } check_secret_requirements() { local files=( "config/polymarket_collector.vps.example.yaml" "systemd/polymarket-orderbook-collector.service" "systemd/polymarket-orderbook-uploader.service" "systemd/polymarket-orderbook-uploader.timer" "scripts/run_polymarket_collector_cycle.sh" "scripts/upload_archive_rclone.sh" ) if grep -E -i '(api[_-]?key|private[_-]?key|mnemonic|wallet|password|client[_-]?secret|access[_-]?token|refresh[_-]?token)' "${files[@]}" >/dev/null 2>&1; then log_fail "secret-like credential requirement found in runtime config, units, or scripts" else log_pass "no API keys, private keys, mnemonics, wallets, or passwords are required by runtime files" fi log_info "rclone credentials, if used, must remain machine-local outside the repository" } check_python check_required_files check_python_compile check_shell_syntax check_systemd_units check_rclone check_target_dir "data" "${DATA_DIR}" check_target_dir "manifest" "${MANIFEST_DIR}" check_target_dir "log" "${LOG_DIR}" check_disk_free "." "repository" if [[ -n "${DATA_DIR}" && -d "${DATA_DIR}" ]]; then check_disk_free "${DATA_DIR}" "data directory" fi check_secret_requirements printf 'SUMMARY failures=%s warnings=%s\n' "${FAILURES}" "${WARNINGS}" if [[ "${FAILURES}" -eq 0 ]]; then exit 0 fi exit 1