366 lines
12 KiB
Bash
Executable file
366 lines
12 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
APP_DIR="${ORDERBOOKS_APP_DIR:-$(pwd)}"
|
|
OUTPUT_DIR="${ORDERBOOKS_VPS_BUNDLE_OUTPUT_DIR:-artifacts/vps}"
|
|
TIMESTAMP="${ORDERBOOKS_VPS_BUNDLE_TIMESTAMP:-$(date -u +%Y%m%dT%H%M%SZ)}"
|
|
BUNDLE_BASENAME="orderbooks_vps_deploy_${TIMESTAMP}"
|
|
TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz"
|
|
MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json"
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage: scripts/build_vps_deploy_bundle.sh [options]
|
|
|
|
Build a deployable VPS bundle from the current working tree. The bundle is
|
|
intended to be copied to a VPS and unpacked under /opt/orderbooks.
|
|
|
|
Options:
|
|
--app-dir DIR Source working tree. Default: ORDERBOOKS_APP_DIR or current directory.
|
|
--output-dir DIR Bundle output directory. Default: artifacts/vps.
|
|
--timestamp TS Override UTC timestamp used in artifact names.
|
|
--help Show this help.
|
|
|
|
The bundle uses a narrow allowlist and excludes live data, caches, git metadata,
|
|
virtualenvs, rclone config, private keys, wallets, mnemonics, and generated
|
|
artifacts. It does not print secrets and does not write Python bytecode.
|
|
EOF
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--app-dir)
|
|
APP_DIR="$2"
|
|
shift 2
|
|
;;
|
|
--output-dir)
|
|
OUTPUT_DIR="$2"
|
|
TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz"
|
|
MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json"
|
|
shift 2
|
|
;;
|
|
--timestamp)
|
|
TIMESTAMP="$2"
|
|
BUNDLE_BASENAME="orderbooks_vps_deploy_${TIMESTAMP}"
|
|
TARBALL="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}.tar.gz"
|
|
MANIFEST="${OUTPUT_DIR%/}/${BUNDLE_BASENAME}_manifest.json"
|
|
shift 2
|
|
;;
|
|
--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "Unknown argument: $1" >&2
|
|
usage >&2
|
|
exit 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
APP_DIR="${APP_DIR%/}"
|
|
if [[ ! -d "${APP_DIR}" ]]; then
|
|
echo "Source app directory does not exist: ${APP_DIR}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
mkdir -p "${OUTPUT_DIR}"
|
|
cd "${APP_DIR}"
|
|
|
|
if [[ -e "${TARBALL}" || -e "${MANIFEST}" ]]; then
|
|
echo "Refusing to overwrite existing bundle artifact: ${TARBALL} or ${MANIFEST}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
FILELIST="$(mktemp)"
|
|
trap 'rm -f "${FILELIST}"' EXIT
|
|
|
|
PYTHONDONTWRITEBYTECODE=1 python3 - "${FILELIST}" "${MANIFEST}" "${TARBALL}" "${TIMESTAMP}" <<'PY_BUNDLE_SELECT'
|
|
import datetime as dt
|
|
import fnmatch
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
filelist_path = Path(sys.argv[1])
|
|
manifest_path = Path(sys.argv[2])
|
|
tarball_path = Path(sys.argv[3])
|
|
timestamp = sys.argv[4]
|
|
root = Path.cwd()
|
|
|
|
allowed_files = [
|
|
Path("AGENTS.md"),
|
|
Path("ROADMAP.md"),
|
|
]
|
|
allowed_dirs = [
|
|
Path("config"),
|
|
Path("docs"),
|
|
Path("scripts"),
|
|
Path("systemd"),
|
|
Path("reports/checkpoints"),
|
|
]
|
|
allowed_globs = [
|
|
"data/manifests/checkpoint_*.json",
|
|
]
|
|
excluded_patterns = [
|
|
".git/",
|
|
".venv/",
|
|
"artifacts/",
|
|
"data/soak_test/",
|
|
"data/live_sample/",
|
|
"data/normalized_sample/",
|
|
"**/__pycache__/",
|
|
"**/*.pyc",
|
|
"**/*.pyo",
|
|
"**/.pytest_cache/",
|
|
"**/.mypy_cache/",
|
|
"**/.ruff_cache/",
|
|
"**/rclone.conf",
|
|
"**/.env",
|
|
"**/*.pem",
|
|
"**/*.key",
|
|
"**/*.p12",
|
|
"**/*.pfx",
|
|
"**/id_rsa*",
|
|
"**/id_ed25519*",
|
|
"**/*mnemonic*",
|
|
"**/*wallet*",
|
|
"**/*credential*",
|
|
"**/*secret*",
|
|
]
|
|
required_files = [
|
|
"AGENTS.md",
|
|
"ROADMAP.md",
|
|
"config/polymarket_collector.vps.example.yaml",
|
|
"config/rclone.example.md",
|
|
"docs/VPS_CUTOVER_RUNBOOK.md",
|
|
"docs/VPS_DEPLOYMENT.md",
|
|
"docs/GOOGLE_DRIVE_OFFLOAD.md",
|
|
"scripts/build_vps_deploy_bundle.sh",
|
|
"scripts/vps_preflight_check.sh",
|
|
"scripts/vps_runtime_smoke_check.sh",
|
|
"scripts/run_polymarket_collector_cycle.sh",
|
|
"scripts/upload_archive_rclone.sh",
|
|
"scripts/discover_polymarket_btc_markets.py",
|
|
"scripts/collect_polymarket_orderbooks.py",
|
|
"scripts/normalize_polymarket_orderbooks.py",
|
|
"systemd/polymarket-orderbook-collector.service",
|
|
"systemd/polymarket-orderbook-uploader.service",
|
|
"systemd/polymarket-orderbook-uploader.timer",
|
|
]
|
|
|
|
forbidden_path_fragments = [
|
|
"/.git/",
|
|
"/.venv/",
|
|
"/__pycache__/",
|
|
"/data/soak_test/",
|
|
"/data/live_sample/",
|
|
"/data/normalized_sample/",
|
|
"/artifacts/",
|
|
]
|
|
forbidden_names = {
|
|
"rclone.conf",
|
|
".env",
|
|
"id_rsa",
|
|
"id_ed25519",
|
|
}
|
|
forbidden_suffixes = {
|
|
".pyc",
|
|
".pyo",
|
|
".pem",
|
|
".key",
|
|
".p12",
|
|
".pfx",
|
|
}
|
|
secretish_name_tokens = [
|
|
"mnemonic",
|
|
"wallet",
|
|
"credential",
|
|
"secret",
|
|
]
|
|
|
|
def as_posix(path: Path) -> str:
|
|
return path.as_posix()
|
|
|
|
def is_forbidden(path: Path) -> tuple[bool, str | None]:
|
|
rel = as_posix(path)
|
|
wrapped = f"/{rel}/" if path.is_dir() else f"/{rel}"
|
|
if path.is_absolute() or ".." in path.parts:
|
|
return True, "absolute_or_parent_path"
|
|
for fragment in forbidden_path_fragments:
|
|
if fragment in wrapped:
|
|
return True, f"forbidden_fragment:{fragment}"
|
|
if any(part in {".git", ".venv", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache"} for part in path.parts):
|
|
return True, "forbidden_cache_or_metadata_dir"
|
|
lower_name = path.name.lower()
|
|
if lower_name in forbidden_names:
|
|
return True, f"forbidden_name:{path.name}"
|
|
if path.suffix.lower() in forbidden_suffixes:
|
|
return True, f"forbidden_suffix:{path.suffix}"
|
|
if any(token in lower_name for token in secretish_name_tokens):
|
|
return True, f"secretish_name:{path.name}"
|
|
if rel.startswith(("data/soak_test/", "data/live_sample/", "data/normalized_sample/", "artifacts/")):
|
|
return True, "forbidden_prefix"
|
|
return False, None
|
|
|
|
def iter_allowed_files():
|
|
seen = set()
|
|
for path in allowed_files:
|
|
if path.is_file() and path not in seen:
|
|
seen.add(path)
|
|
yield path
|
|
for directory in allowed_dirs:
|
|
if not directory.exists():
|
|
continue
|
|
for path in sorted(directory.rglob("*")):
|
|
if path.is_file() and path not in seen:
|
|
seen.add(path)
|
|
yield path
|
|
for pattern in allowed_globs:
|
|
for path in sorted(root.glob(pattern)):
|
|
if path.is_file() and path not in seen:
|
|
seen.add(path)
|
|
yield path
|
|
|
|
def sha256_file(path: Path) -> str:
|
|
digest = hashlib.sha256()
|
|
with path.open("rb") as handle:
|
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
digest.update(chunk)
|
|
return digest.hexdigest()
|
|
|
|
included = []
|
|
excluded = []
|
|
for path in iter_allowed_files():
|
|
forbidden, reason = is_forbidden(path)
|
|
if forbidden:
|
|
excluded.append({"path": as_posix(path), "reason": reason})
|
|
continue
|
|
stat = path.stat()
|
|
included.append({
|
|
"path": as_posix(path),
|
|
"bytes": stat.st_size,
|
|
"sha256": sha256_file(path),
|
|
})
|
|
|
|
included_paths = sorted(item["path"] for item in included)
|
|
missing_required = sorted(path for path in required_files if path not in included_paths)
|
|
if missing_required:
|
|
raise SystemExit(f"missing required bundle files: {missing_required}")
|
|
if not included:
|
|
raise SystemExit("bundle file list is empty")
|
|
|
|
filelist_path.write_bytes(b"".join(path.encode("utf-8") + b"\0" for path in included_paths))
|
|
created_at = dt.datetime.now(dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
|
manifest = {
|
|
"schema_name": "vps_deploy_bundle_manifest",
|
|
"schema_version": 1,
|
|
"created_at_utc": created_at,
|
|
"timestamp": timestamp,
|
|
"tarball_path": as_posix(tarball_path),
|
|
"manifest_path": as_posix(manifest_path),
|
|
"source_root": str(root),
|
|
"bundle_intent": "Copy to a VPS and unpack under /opt/orderbooks; VPS execution remains pending.",
|
|
"production_ready": False,
|
|
"vps_deployed": False,
|
|
"included_roots": [str(path) for path in allowed_files + allowed_dirs] + allowed_globs,
|
|
"excluded_patterns": excluded_patterns,
|
|
"required_files": required_files,
|
|
"included_file_count": len(included),
|
|
"included_files": included,
|
|
"excluded_selected_files": excluded,
|
|
"missing_required_files": missing_required,
|
|
"validation": {
|
|
"required_files_present_before_tar": not missing_required,
|
|
"forbidden_paths_absent_before_tar": True,
|
|
"tarball_validation_completed": False,
|
|
},
|
|
}
|
|
manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
PY_BUNDLE_SELECT
|
|
|
|
tar --create --gzip --file "${TARBALL}" --null --files-from "${FILELIST}" --owner=0 --group=0 --numeric-owner
|
|
|
|
PYTHONDONTWRITEBYTECODE=1 python3 - "${TARBALL}" "${MANIFEST}" <<'PY_BUNDLE_VALIDATE'
|
|
import hashlib
|
|
import json
|
|
import sys
|
|
import tarfile
|
|
from pathlib import Path
|
|
|
|
tarball_path = Path(sys.argv[1])
|
|
manifest_path = Path(sys.argv[2])
|
|
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
|
required_files = set(manifest["required_files"])
|
|
|
|
def sha256_file(path: Path) -> str:
|
|
digest = hashlib.sha256()
|
|
with path.open("rb") as handle:
|
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
digest.update(chunk)
|
|
return digest.hexdigest()
|
|
|
|
def forbidden_reason(name: str) -> str | None:
|
|
parts = name.split("/")
|
|
lower_name = parts[-1].lower()
|
|
if name.startswith("/") or any(part == ".." for part in parts):
|
|
return "absolute_or_parent_path"
|
|
if parts[0] in {".git", ".venv", "artifacts"}:
|
|
return f"forbidden_top_level:{parts[0]}"
|
|
if len(parts) >= 2 and parts[0] == "data" and parts[1] in {"soak_test", "live_sample", "normalized_sample"}:
|
|
return f"forbidden_data_dir:data/{parts[1]}"
|
|
if any(part in {".git", ".venv", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache"} for part in parts):
|
|
return "forbidden_cache_or_metadata_dir"
|
|
if lower_name in {"rclone.conf", ".env", "id_rsa", "id_ed25519"}:
|
|
return f"forbidden_name:{lower_name}"
|
|
if any(lower_name.endswith(suffix) for suffix in (".pyc", ".pyo", ".pem", ".key", ".p12", ".pfx")):
|
|
return "forbidden_suffix"
|
|
if any(token in lower_name for token in ("mnemonic", "wallet", "credential", "secret")):
|
|
return "secretish_name"
|
|
return None
|
|
|
|
with tarfile.open(tarball_path, "r:gz") as archive:
|
|
members = [member for member in archive.getmembers() if member.isfile()]
|
|
names = sorted(member.name for member in members)
|
|
|
|
forbidden = [{"path": name, "reason": forbidden_reason(name)} for name in names if forbidden_reason(name)]
|
|
missing_required = sorted(required_files - set(names))
|
|
if forbidden or missing_required:
|
|
manifest["validation"].update({
|
|
"tarball_validation_completed": True,
|
|
"forbidden_paths_absent_in_tarball": not forbidden,
|
|
"required_files_present_in_tarball": not missing_required,
|
|
"forbidden_paths_in_tarball": forbidden,
|
|
"missing_required_files_in_tarball": missing_required,
|
|
})
|
|
manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
raise SystemExit(f"bundle validation failed forbidden={forbidden} missing_required={missing_required}")
|
|
|
|
manifest["tarball_bytes"] = tarball_path.stat().st_size
|
|
manifest["tarball_sha256"] = sha256_file(tarball_path)
|
|
manifest["tarball_content_count"] = len(names)
|
|
manifest["tarball_contents"] = names
|
|
manifest["validation"].update({
|
|
"tarball_validation_completed": True,
|
|
"forbidden_paths_absent_in_tarball": True,
|
|
"required_files_present_in_tarball": True,
|
|
"forbidden_paths_in_tarball": [],
|
|
"missing_required_files_in_tarball": [],
|
|
})
|
|
manifest["gate_status"] = "PASS"
|
|
manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
PY_BUNDLE_VALIDATE
|
|
|
|
printf 'BUNDLE_TARBALL=%s\n' "${TARBALL}"
|
|
printf 'BUNDLE_MANIFEST=%s\n' "${MANIFEST}"
|
|
python3 - <<'PY_PRINT' "${MANIFEST}"
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
m = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
|
|
print(f"BUNDLE_SHA256={m['tarball_sha256']}")
|
|
print(f"BUNDLE_BYTES={m['tarball_bytes']}")
|
|
print(f"BUNDLE_FILE_COUNT={m['tarball_content_count']}")
|
|
PY_PRINT
|