#!/usr/bin/env bash set -euo pipefail APP_DIR="${ORDERBOOKS_APP_DIR:-$(pwd)}" OUTPUT_DIR="${ORDERBOOKS_VPS_BUNDLE_OUTPUT_DIR:-artifacts/vps}" TIMESTAMP="${ORDERBOOKS_VPS_BUNDLE_TIMESTAMP:-$(date -u +%Y%m%dT%H%M%SZ)}" BUNDLE_BASENAME="orderbooks_vps_deploy_${TIMESTAMP}" TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz" MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json" usage() { cat <<'EOF' Usage: scripts/build_vps_deploy_bundle.sh [options] Build a deployable VPS bundle from the current working tree. The bundle is intended to be copied to a VPS and unpacked under /opt/orderbooks. Options: --app-dir DIR Source working tree. Default: ORDERBOOKS_APP_DIR or current directory. --output-dir DIR Bundle output directory. Default: artifacts/vps. --timestamp TS Override UTC timestamp used in artifact names. --help Show this help. The bundle uses a narrow allowlist and excludes live data, caches, git metadata, virtualenvs, rclone config, private keys, wallets, mnemonics, and generated artifacts. It does not print secrets and does not write Python bytecode. EOF } while [[ $# -gt 0 ]]; do case "$1" in --app-dir) APP_DIR="$2" shift 2 ;; --output-dir) OUTPUT_DIR="$2" TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz" MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json" shift 2 ;; --timestamp) TIMESTAMP="$2" BUNDLE_BASENAME="orderbooks_vps_deploy_${TIMESTAMP}" TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz" MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json" shift 2 ;; --help) usage exit 0 ;; *) echo "Unknown argument: $1" >&2 usage >&2 exit 2 ;; esac done APP_DIR="${APP_DIR%/}" if [[ ! -d "${APP_DIR}" ]]; then echo "Source app directory does not exist: ${APP_DIR}" >&2 exit 1 fi mkdir -p "${OUTPUT_DIR}" cd "${APP_DIR}" if [[ -e "${TARBALL}" || -e "${MANIFEST}" ]]; then echo "Refusing to overwrite existing bundle artifact: ${TARBALL} or ${MANIFEST}" >&2 exit 1 fi FILELIST="$(mktemp)" trap 'rm -f "${FILELIST}"' EXIT PYTHONDONTWRITEBYTECODE=1 python3 - "${FILELIST}" "${MANIFEST}" "${TARBALL}" "${TIMESTAMP}" <<'PY_BUNDLE_SELECT' import datetime as dt import fnmatch import hashlib import json import os import sys from pathlib import Path filelist_path = Path(sys.argv[1]) manifest_path = Path(sys.argv[2]) tarball_path = Path(sys.argv[3]) timestamp = sys.argv[4] root = Path.cwd() allowed_files = [ Path("AGENTS.md"), Path("ROADMAP.md"), ] allowed_dirs = [ Path("config"), Path("docs"), Path("scripts"), Path("systemd"), Path("reports/checkpoints"), ] allowed_globs = [ "data/manifests/checkpoint_*.json", ] excluded_patterns = [ ".git/", ".venv/", "artifacts/", "data/soak_test/", "data/live_sample/", "data/normalized_sample/", "**/__pycache__/", "**/*.pyc", "**/*.pyo", "**/.pytest_cache/", "**/.mypy_cache/", "**/.ruff_cache/", "**/rclone.conf", "**/.env", "**/*.pem", "**/*.key", "**/*.p12", "**/*.pfx", "**/id_rsa*", "**/id_ed25519*", "**/*mnemonic*", "**/*wallet*", "**/*credential*", "**/*secret*", ] required_files = [ "AGENTS.md", "ROADMAP.md", "config/polymarket_collector.vps.example.yaml", "config/rclone.example.md", "docs/VPS_CUTOVER_RUNBOOK.md", "docs/VPS_DEPLOYMENT.md", "docs/GOOGLE_DRIVE_OFFLOAD.md", "scripts/build_vps_deploy_bundle.sh", "scripts/vps_preflight_check.sh", "scripts/vps_runtime_smoke_check.sh", "scripts/run_polymarket_collector_cycle.sh", "scripts/upload_archive_rclone.sh", "scripts/discover_polymarket_btc_markets.py", "scripts/collect_polymarket_orderbooks.py", "scripts/normalize_polymarket_orderbooks.py", "systemd/polymarket-orderbook-collector.service", "systemd/polymarket-orderbook-uploader.service", "systemd/polymarket-orderbook-uploader.timer", ] forbidden_path_fragments = [ "/.git/", "/.venv/", "/__pycache__/", "/data/soak_test/", "/data/live_sample/", "/data/normalized_sample/", "/artifacts/", ] forbidden_names = { "rclone.conf", ".env", "id_rsa", "id_ed25519", } forbidden_suffixes = { ".pyc", ".pyo", ".pem", ".key", ".p12", ".pfx", } secretish_name_tokens = [ "mnemonic", "wallet", "credential", "secret", ] def as_posix(path: Path) -> str: return path.as_posix() def is_forbidden(path: Path) -> tuple[bool, str | None]: rel = as_posix(path) wrapped = f"/{rel}/" if path.is_dir() else f"/{rel}" if path.is_absolute() or ".." in path.parts: return True, "absolute_or_parent_path" for fragment in forbidden_path_fragments: if fragment in wrapped: return True, f"forbidden_fragment:{fragment}" if any(part in {".git", ".venv", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache"} for part in path.parts): return True, "forbidden_cache_or_metadata_dir" lower_name = path.name.lower() if lower_name in forbidden_names: return True, f"forbidden_name:{path.name}" if path.suffix.lower() in forbidden_suffixes: return True, f"forbidden_suffix:{path.suffix}" if any(token in lower_name for token in secretish_name_tokens): return True, f"secretish_name:{path.name}" if rel.startswith(("data/soak_test/", "data/live_sample/", "data/normalized_sample/", "artifacts/")): return True, "forbidden_prefix" return False, None def iter_allowed_files(): seen = set() for path in allowed_files: if path.is_file() and path not in seen: seen.add(path) yield path for directory in allowed_dirs: if not directory.exists(): continue for path in sorted(directory.rglob("*")): if path.is_file() and path not in seen: seen.add(path) yield path for pattern in allowed_globs: for path in sorted(root.glob(pattern)): if path.is_file() and path not in seen: seen.add(path) yield path def sha256_file(path: Path) -> str: digest = hashlib.sha256() with path.open("rb") as handle: for chunk in iter(lambda: handle.read(1024 * 1024), b""): digest.update(chunk) return digest.hexdigest() included = [] excluded = [] for path in iter_allowed_files(): forbidden, reason = is_forbidden(path) if forbidden: excluded.append({"path": as_posix(path), "reason": reason}) continue stat = path.stat() included.append({ "path": as_posix(path), "bytes": stat.st_size, "sha256": sha256_file(path), }) included_paths = sorted(item["path"] for item in included) missing_required = sorted(path for path in required_files if path not in included_paths) if missing_required: raise SystemExit(f"missing required bundle files: {missing_required}") if not included: raise SystemExit("bundle file list is empty") filelist_path.write_bytes(b"".join(path.encode("utf-8") + b"\0" for path in included_paths)) created_at = dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") manifest = { "schema_name": "vps_deploy_bundle_manifest", "schema_version": 1, "created_at_utc": created_at, "timestamp": timestamp, "tarball_path": as_posix(tarball_path), "manifest_path": as_posix(manifest_path), "source_root": str(root), "bundle_intent": "Copy to a VPS and unpack under /opt/orderbooks; VPS execution remains pending.", "production_ready": False, "vps_deployed": False, "included_roots": [str(path) for path in allowed_files + allowed_dirs] + allowed_globs, "excluded_patterns": excluded_patterns, "required_files": required_files, "included_file_count": len(included), "included_files": included, "excluded_selected_files": excluded, "missing_required_files": missing_required, "validation": { "required_files_present_before_tar": not missing_required, "forbidden_paths_absent_before_tar": True, "tarball_validation_completed": False, }, } manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8") PY_BUNDLE_SELECT tar --create --gzip --file "${TARBALL}" --null --files-from "${FILELIST}" --owner=0 --group=0 --numeric-owner PYTHONDONTWRITEBYTECODE=1 python3 - "${TARBALL}" "${MANIFEST}" <<'PY_BUNDLE_VALIDATE' import hashlib import json import sys import tarfile from pathlib import Path tarball_path = Path(sys.argv[1]) manifest_path = Path(sys.argv[2]) manifest = json.loads(manifest_path.read_text(encoding="utf-8")) required_files = set(manifest["required_files"]) def sha256_file(path: Path) -> str: digest = hashlib.sha256() with path.open("rb") as handle: for chunk in iter(lambda: handle.read(1024 * 1024), b""): digest.update(chunk) return digest.hexdigest() def forbidden_reason(name: str) -> str | None: parts = name.split("/") lower_name = parts[-1].lower() if name.startswith("/") or any(part == ".." for part in parts): return "absolute_or_parent_path" if parts[0] in {".git", ".venv", "artifacts"}: return f"forbidden_top_level:{parts[0]}" if len(parts) >= 2 and parts[0] == "data" and parts[1] in {"soak_test", "live_sample", "normalized_sample"}: return f"forbidden_data_dir:data/{parts[1]}" if any(part in {".git", ".venv", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache"} for part in parts): return "forbidden_cache_or_metadata_dir" if lower_name in {"rclone.conf", ".env", "id_rsa", "id_ed25519"}: return f"forbidden_name:{lower_name}" if any(lower_name.endswith(suffix) for suffix in (".pyc", ".pyo", ".pem", ".key", ".p12", ".pfx")): return "forbidden_suffix" if any(token in lower_name for token in ("mnemonic", "wallet", "credential", "secret")): return "secretish_name" return None with tarfile.open(tarball_path, "r:gz") as archive: members = [member for member in archive.getmembers() if member.isfile()] names = sorted(member.name for member in members) forbidden = [{"path": name, "reason": forbidden_reason(name)} for name in names if forbidden_reason(name)] missing_required = sorted(required_files - set(names)) if forbidden or missing_required: manifest["validation"].update({ "tarball_validation_completed": True, "forbidden_paths_absent_in_tarball": not forbidden, "required_files_present_in_tarball": not missing_required, "forbidden_paths_in_tarball": forbidden, "missing_required_files_in_tarball": missing_required, }) manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8") raise SystemExit(f"bundle validation failed forbidden={forbidden} missing_required={missing_required}") manifest["tarball_bytes"] = tarball_path.stat().st_size manifest["tarball_sha256"] = sha256_file(tarball_path) manifest["tarball_content_count"] = len(names) manifest["tarball_contents"] = names manifest["validation"].update({ "tarball_validation_completed": True, "forbidden_paths_absent_in_tarball": True, "required_files_present_in_tarball": True, "forbidden_paths_in_tarball": [], "missing_required_files_in_tarball": [], }) manifest["gate_status"] = "PASS" manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8") PY_BUNDLE_VALIDATE printf 'BUNDLE_TARBALL=%s\n' "${TARBALL}" printf 'BUNDLE_MANIFEST=%s\n' "${MANIFEST}" python3 - <<'PY_PRINT' "${MANIFEST}" import json import sys from pathlib import Path m = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8")) print(f"BUNDLE_SHA256={m['tarball_sha256']}") print(f"BUNDLE_BYTES={m['tarball_bytes']}") print(f"BUNDLE_FILE_COUNT={m['tarball_content_count']}") PY_PRINT