orderbooks/scripts/build_vps_deploy_bundle.sh
philipp 284e465588
Some checks failed
deploy / deploy (push) Has been cancelled
Prepare Kubernetes orderbooks deployment
2026-04-18 11:23:28 +02:00

366 lines
12 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
APP_DIR="${ORDERBOOKS_APP_DIR:-$(pwd)}"
OUTPUT_DIR="${ORDERBOOKS_VPS_BUNDLE_OUTPUT_DIR:-artifacts/vps}"
TIMESTAMP="${ORDERBOOKS_VPS_BUNDLE_TIMESTAMP:-$(date -u +%Y%m%dT%H%M%SZ)}"
BUNDLE_BASENAME="orderbooks_vps_deploy_${TIMESTAMP}"
TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz"
MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json"
usage() {
cat <<'EOF'
Usage: scripts/build_vps_deploy_bundle.sh [options]
Build a deployable VPS bundle from the current working tree. The bundle is
intended to be copied to a VPS and unpacked under /opt/orderbooks.
Options:
--app-dir DIR Source working tree. Default: ORDERBOOKS_APP_DIR or current directory.
--output-dir DIR Bundle output directory. Default: artifacts/vps.
--timestamp TS Override UTC timestamp used in artifact names.
--help Show this help.
The bundle uses a narrow allowlist and excludes live data, caches, git metadata,
virtualenvs, rclone config, private keys, wallets, mnemonics, and generated
artifacts. It does not print secrets and does not write Python bytecode.
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--app-dir)
APP_DIR="$2"
shift 2
;;
--output-dir)
OUTPUT_DIR="$2"
TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz"
MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json"
shift 2
;;
--timestamp)
TIMESTAMP="$2"
BUNDLE_BASENAME="orderbooks_vps_deploy_${TIMESTAMP}"
TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz"
MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json"
shift 2
;;
--help)
usage
exit 0
;;
*)
echo "Unknown argument: $1" >&2
usage >&2
exit 2
;;
esac
done
APP_DIR="${APP_DIR%/}"
if [[ ! -d "${APP_DIR}" ]]; then
echo "Source app directory does not exist: ${APP_DIR}" >&2
exit 1
fi
mkdir -p "${OUTPUT_DIR}"
cd "${APP_DIR}"
if [[ -e "${TARBALL}" || -e "${MANIFEST}" ]]; then
echo "Refusing to overwrite existing bundle artifact: ${TARBALL} or ${MANIFEST}" >&2
exit 1
fi
FILELIST="$(mktemp)"
trap 'rm -f "${FILELIST}"' EXIT
PYTHONDONTWRITEBYTECODE=1 python3 - "${FILELIST}" "${MANIFEST}" "${TARBALL}" "${TIMESTAMP}" <<'PY_BUNDLE_SELECT'
import datetime as dt
import fnmatch
import hashlib
import json
import os
import sys
from pathlib import Path
filelist_path = Path(sys.argv[1])
manifest_path = Path(sys.argv[2])
tarball_path = Path(sys.argv[3])
timestamp = sys.argv[4]
root = Path.cwd()
allowed_files = [
Path("AGENTS.md"),
Path("ROADMAP.md"),
]
allowed_dirs = [
Path("config"),
Path("docs"),
Path("scripts"),
Path("systemd"),
Path("reports/checkpoints"),
]
allowed_globs = [
"data/manifests/checkpoint_*.json",
]
excluded_patterns = [
".git/",
".venv/",
"artifacts/",
"data/soak_test/",
"data/live_sample/",
"data/normalized_sample/",
"**/__pycache__/",
"**/*.pyc",
"**/*.pyo",
"**/.pytest_cache/",
"**/.mypy_cache/",
"**/.ruff_cache/",
"**/rclone.conf",
"**/.env",
"**/*.pem",
"**/*.key",
"**/*.p12",
"**/*.pfx",
"**/id_rsa*",
"**/id_ed25519*",
"**/*mnemonic*",
"**/*wallet*",
"**/*credential*",
"**/*secret*",
]
required_files = [
"AGENTS.md",
"ROADMAP.md",
"config/polymarket_collector.vps.example.yaml",
"config/rclone.example.md",
"docs/VPS_CUTOVER_RUNBOOK.md",
"docs/VPS_DEPLOYMENT.md",
"docs/GOOGLE_DRIVE_OFFLOAD.md",
"scripts/build_vps_deploy_bundle.sh",
"scripts/vps_preflight_check.sh",
"scripts/vps_runtime_smoke_check.sh",
"scripts/run_polymarket_collector_cycle.sh",
"scripts/upload_archive_rclone.sh",
"scripts/discover_polymarket_btc_markets.py",
"scripts/collect_polymarket_orderbooks.py",
"scripts/normalize_polymarket_orderbooks.py",
"systemd/polymarket-orderbook-collector.service",
"systemd/polymarket-orderbook-uploader.service",
"systemd/polymarket-orderbook-uploader.timer",
]
forbidden_path_fragments = [
"/.git/",
"/.venv/",
"/__pycache__/",
"/data/soak_test/",
"/data/live_sample/",
"/data/normalized_sample/",
"/artifacts/",
]
forbidden_names = {
"rclone.conf",
".env",
"id_rsa",
"id_ed25519",
}
forbidden_suffixes = {
".pyc",
".pyo",
".pem",
".key",
".p12",
".pfx",
}
secretish_name_tokens = [
"mnemonic",
"wallet",
"credential",
"secret",
]
def as_posix(path: Path) -> str:
return path.as_posix()
def is_forbidden(path: Path) -> tuple[bool, str | None]:
rel = as_posix(path)
wrapped = f"/{rel}/" if path.is_dir() else f"/{rel}"
if path.is_absolute() or ".." in path.parts:
return True, "absolute_or_parent_path"
for fragment in forbidden_path_fragments:
if fragment in wrapped:
return True, f"forbidden_fragment:{fragment}"
if any(part in {".git", ".venv", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache"} for part in path.parts):
return True, "forbidden_cache_or_metadata_dir"
lower_name = path.name.lower()
if lower_name in forbidden_names:
return True, f"forbidden_name:{path.name}"
if path.suffix.lower() in forbidden_suffixes:
return True, f"forbidden_suffix:{path.suffix}"
if any(token in lower_name for token in secretish_name_tokens):
return True, f"secretish_name:{path.name}"
if rel.startswith(("data/soak_test/", "data/live_sample/", "data/normalized_sample/", "artifacts/")):
return True, "forbidden_prefix"
return False, None
def iter_allowed_files():
seen = set()
for path in allowed_files:
if path.is_file() and path not in seen:
seen.add(path)
yield path
for directory in allowed_dirs:
if not directory.exists():
continue
for path in sorted(directory.rglob("*")):
if path.is_file() and path not in seen:
seen.add(path)
yield path
for pattern in allowed_globs:
for path in sorted(root.glob(pattern)):
if path.is_file() and path not in seen:
seen.add(path)
yield path
def sha256_file(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
included = []
excluded = []
for path in iter_allowed_files():
forbidden, reason = is_forbidden(path)
if forbidden:
excluded.append({"path": as_posix(path), "reason": reason})
continue
stat = path.stat()
included.append({
"path": as_posix(path),
"bytes": stat.st_size,
"sha256": sha256_file(path),
})
included_paths = sorted(item["path"] for item in included)
missing_required = sorted(path for path in required_files if path not in included_paths)
if missing_required:
raise SystemExit(f"missing required bundle files: {missing_required}")
if not included:
raise SystemExit("bundle file list is empty")
filelist_path.write_bytes(b"".join(path.encode("utf-8") + b"\0" for path in included_paths))
created_at = dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
manifest = {
"schema_name": "vps_deploy_bundle_manifest",
"schema_version": 1,
"created_at_utc": created_at,
"timestamp": timestamp,
"tarball_path": as_posix(tarball_path),
"manifest_path": as_posix(manifest_path),
"source_root": str(root),
"bundle_intent": "Copy to a VPS and unpack under /opt/orderbooks; VPS execution remains pending.",
"production_ready": False,
"vps_deployed": False,
"included_roots": [str(path) for path in allowed_files + allowed_dirs] + allowed_globs,
"excluded_patterns": excluded_patterns,
"required_files": required_files,
"included_file_count": len(included),
"included_files": included,
"excluded_selected_files": excluded,
"missing_required_files": missing_required,
"validation": {
"required_files_present_before_tar": not missing_required,
"forbidden_paths_absent_before_tar": True,
"tarball_validation_completed": False,
},
}
manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
PY_BUNDLE_SELECT
tar --create --gzip --file "${TARBALL}" --null --files-from "${FILELIST}" --owner=0 --group=0 --numeric-owner
PYTHONDONTWRITEBYTECODE=1 python3 - "${TARBALL}" "${MANIFEST}" <<'PY_BUNDLE_VALIDATE'
import hashlib
import json
import sys
import tarfile
from pathlib import Path
tarball_path = Path(sys.argv[1])
manifest_path = Path(sys.argv[2])
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
required_files = set(manifest["required_files"])
def sha256_file(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def forbidden_reason(name: str) -> str | None:
parts = name.split("/")
lower_name = parts[-1].lower()
if name.startswith("/") or any(part == ".." for part in parts):
return "absolute_or_parent_path"
if parts[0] in {".git", ".venv", "artifacts"}:
return f"forbidden_top_level:{parts[0]}"
if len(parts) >= 2 and parts[0] == "data" and parts[1] in {"soak_test", "live_sample", "normalized_sample"}:
return f"forbidden_data_dir:data/{parts[1]}"
if any(part in {".git", ".venv", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache"} for part in parts):
return "forbidden_cache_or_metadata_dir"
if lower_name in {"rclone.conf", ".env", "id_rsa", "id_ed25519"}:
return f"forbidden_name:{lower_name}"
if any(lower_name.endswith(suffix) for suffix in (".pyc", ".pyo", ".pem", ".key", ".p12", ".pfx")):
return "forbidden_suffix"
if any(token in lower_name for token in ("mnemonic", "wallet", "credential", "secret")):
return "secretish_name"
return None
with tarfile.open(tarball_path, "r:gz") as archive:
members = [member for member in archive.getmembers() if member.isfile()]
names = sorted(member.name for member in members)
forbidden = [{"path": name, "reason": forbidden_reason(name)} for name in names if forbidden_reason(name)]
missing_required = sorted(required_files - set(names))
if forbidden or missing_required:
manifest["validation"].update({
"tarball_validation_completed": True,
"forbidden_paths_absent_in_tarball": not forbidden,
"required_files_present_in_tarball": not missing_required,
"forbidden_paths_in_tarball": forbidden,
"missing_required_files_in_tarball": missing_required,
})
manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
raise SystemExit(f"bundle validation failed forbidden={forbidden} missing_required={missing_required}")
manifest["tarball_bytes"] = tarball_path.stat().st_size
manifest["tarball_sha256"] = sha256_file(tarball_path)
manifest["tarball_content_count"] = len(names)
manifest["tarball_contents"] = names
manifest["validation"].update({
"tarball_validation_completed": True,
"forbidden_paths_absent_in_tarball": True,
"required_files_present_in_tarball": True,
"forbidden_paths_in_tarball": [],
"missing_required_files_in_tarball": [],
})
manifest["gate_status"] = "PASS"
manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
PY_BUNDLE_VALIDATE
printf 'BUNDLE_TARBALL=%s\n' "${TARBALL}"
printf 'BUNDLE_MANIFEST=%s\n' "${MANIFEST}"
python3 - <<'PY_PRINT' "${MANIFEST}"
import json
import sys
from pathlib import Path
m = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
print(f"BUNDLE_SHA256={m['tarball_sha256']}")
print(f"BUNDLE_BYTES={m['tarball_bytes']}")
print(f"BUNDLE_FILE_COUNT={m['tarball_content_count']}")
PY_PRINT