299 lines
9.4 KiB
Bash
Executable file
299 lines
9.4 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -uo pipefail
|
|
|
|
SCRIPT_NAME="orderbooks_verified_file_purger"
|
|
SCRIPT_VERSION="0.1.0"
|
|
|
|
MODE="dry-run"
|
|
DATA_DIR="${ORDERBOOKS_UPLOAD_DATA_DIR:-${ORDERBOOKS_DATA_DIR:-/var/lib/orderbooks}}"
|
|
MANIFEST_DIR="${ORDERBOOKS_UPLOAD_MANIFEST_DIR:-}"
|
|
MANIFEST_PATH="${ORDERBOOKS_PURGE_MANIFEST_PATH:-}"
|
|
VERIFIED_INDEX_PATH="${ORDERBOOKS_UPLOAD_VERIFIED_INDEX_PATH:-}"
|
|
RETENTION_DAYS="${ORDERBOOKS_UPLOAD_RETENTION_DAYS:-7}"
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage: scripts/purge_uploaded_local_files.sh [options]
|
|
|
|
Deletes local files only when they have prior verified-upload evidence in the
|
|
verified-upload index and are older than the retention window.
|
|
|
|
Options:
|
|
--dry-run Plan purge only (default).
|
|
--execute Delete eligible local files.
|
|
--data-dir DIR Base data directory. Default: /var/lib/orderbooks.
|
|
--manifest-dir DIR Purge manifest output directory. Default: DATA_DIR/manifests.
|
|
--manifest-path PATH Exact purge manifest path.
|
|
--verified-index-path PATH Verified-upload index path. Default: MANIFEST_DIR/upload_verified_index.json.
|
|
--retention-days N Keep at least N days locally. Default: 7.
|
|
--help Show this help.
|
|
EOF
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--dry-run)
|
|
MODE="dry-run"
|
|
shift
|
|
;;
|
|
--execute)
|
|
MODE="execute"
|
|
shift
|
|
;;
|
|
--data-dir)
|
|
DATA_DIR="$2"
|
|
shift 2
|
|
;;
|
|
--manifest-dir)
|
|
MANIFEST_DIR="$2"
|
|
shift 2
|
|
;;
|
|
--manifest-path)
|
|
MANIFEST_PATH="$2"
|
|
shift 2
|
|
;;
|
|
--verified-index-path)
|
|
VERIFIED_INDEX_PATH="$2"
|
|
shift 2
|
|
;;
|
|
--retention-days)
|
|
RETENTION_DAYS="$2"
|
|
shift 2
|
|
;;
|
|
--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "Unknown argument: $1" >&2
|
|
usage >&2
|
|
exit 2
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [[ -z "${MANIFEST_DIR}" ]]; then
|
|
MANIFEST_DIR="${DATA_DIR%/}/manifests"
|
|
fi
|
|
if [[ -z "${VERIFIED_INDEX_PATH}" ]]; then
|
|
VERIFIED_INDEX_PATH="${MANIFEST_DIR%/}/upload_verified_index.json"
|
|
fi
|
|
|
|
STARTED_AT="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)"
|
|
if [[ -z "${MANIFEST_PATH}" ]]; then
|
|
MANIFEST_PATH="${MANIFEST_DIR%/}/purge_uploaded_local_${RUN_ID}.json"
|
|
fi
|
|
|
|
mkdir -p "$(dirname "${MANIFEST_PATH}")"
|
|
|
|
export SCRIPT_NAME SCRIPT_VERSION MODE DATA_DIR MANIFEST_DIR MANIFEST_PATH VERIFIED_INDEX_PATH RETENTION_DAYS STARTED_AT
|
|
|
|
python3 - <<'PY'
|
|
import datetime as dt
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
script_name = os.environ["SCRIPT_NAME"]
|
|
script_version = os.environ["SCRIPT_VERSION"]
|
|
mode = os.environ["MODE"]
|
|
data_dir = Path(os.environ["DATA_DIR"]).resolve()
|
|
manifest_dir = Path(os.environ["MANIFEST_DIR"]).resolve()
|
|
manifest_path = Path(os.environ["MANIFEST_PATH"]).resolve()
|
|
verified_index_path = Path(os.environ["VERIFIED_INDEX_PATH"]).resolve()
|
|
retention_days = int(os.environ["RETENTION_DAYS"])
|
|
started_at = os.environ["STARTED_AT"]
|
|
now = dt.datetime.now(dt.UTC)
|
|
ended_at = now.replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
|
cutoff = now - dt.timedelta(days=retention_days)
|
|
|
|
|
|
def iso_z_from_ts(ts: float) -> str:
|
|
return dt.datetime.fromtimestamp(ts, dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
|
|
|
|
|
def sha256_file(path: Path) -> str:
|
|
digest = hashlib.sha256()
|
|
with path.open("rb") as handle:
|
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
digest.update(chunk)
|
|
return digest.hexdigest()
|
|
|
|
|
|
def write_atomic_json(path: Path, payload: dict) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
with tempfile.NamedTemporaryFile("w", encoding="utf-8", dir=str(path.parent), delete=False) as tmp:
|
|
json.dump(payload, tmp, indent=2, sort_keys=True)
|
|
tmp.write("\n")
|
|
tmp_path = Path(tmp.name)
|
|
os.replace(tmp_path, path)
|
|
|
|
|
|
manifest = {
|
|
"schema_name": "purge_uploaded_local_manifest",
|
|
"schema_version": 1,
|
|
"purger": {"name": script_name, "version": script_version},
|
|
"started_at_utc": started_at,
|
|
"ended_at_utc": ended_at,
|
|
"command_mode": mode,
|
|
"operation_status": None,
|
|
"gate_status": "PASS",
|
|
"config": {
|
|
"data_dir": str(data_dir),
|
|
"manifest_dir": str(manifest_dir),
|
|
"manifest_path": str(manifest_path),
|
|
"verified_index_path": str(verified_index_path),
|
|
"retention_days": retention_days,
|
|
},
|
|
"verified_index": {
|
|
"path": str(verified_index_path),
|
|
"exists": verified_index_path.exists(),
|
|
"record_count_before": 0,
|
|
"record_count_after": 0,
|
|
},
|
|
"candidate_files": [],
|
|
"deleted_local_files": [],
|
|
"skipped_files": [],
|
|
"counts": {
|
|
"eligible": 0,
|
|
"deleted": 0,
|
|
"within_retention": 0,
|
|
"already_absent": 0,
|
|
"protected": 0,
|
|
"sha256_mismatch": 0,
|
|
"invalid_records": 0,
|
|
},
|
|
"warnings": [],
|
|
"known_gaps": [
|
|
"Purge trusts prior verified-upload evidence in the local verified index and does not re-run rclone copy/check during deletion.",
|
|
"Protected local state files, including the verified-upload index itself, are not deleted by this script.",
|
|
],
|
|
}
|
|
|
|
if not verified_index_path.exists():
|
|
manifest["operation_status"] = "NO_VERIFIED_INDEX"
|
|
write_atomic_json(manifest_path, manifest)
|
|
print(json.dumps({
|
|
"gate_status": manifest["gate_status"],
|
|
"operation_status": manifest["operation_status"],
|
|
"manifest_path": str(manifest_path),
|
|
"eligible_files": 0,
|
|
"deleted_files": 0,
|
|
}, indent=2, sort_keys=True))
|
|
sys.exit(0)
|
|
|
|
try:
|
|
index_doc = json.loads(verified_index_path.read_text(encoding="utf-8"))
|
|
except Exception as exc:
|
|
manifest["operation_status"] = "INDEX_READ_FAILED"
|
|
manifest["gate_status"] = "FAIL"
|
|
manifest["warnings"].append(f"failed to read verified-upload index: {exc}")
|
|
write_atomic_json(manifest_path, manifest)
|
|
print(json.dumps({
|
|
"gate_status": manifest["gate_status"],
|
|
"operation_status": manifest["operation_status"],
|
|
"manifest_path": str(manifest_path),
|
|
"eligible_files": 0,
|
|
"deleted_files": 0,
|
|
}, indent=2, sort_keys=True))
|
|
sys.exit(1)
|
|
|
|
records = index_doc.get("records", [])
|
|
manifest["verified_index"]["record_count_before"] = len(records)
|
|
protected_path = verified_index_path.resolve()
|
|
index_changed = False
|
|
deleted_at = ended_at
|
|
|
|
for record in records:
|
|
relative_path = record.get("relative_path")
|
|
sha256_expected = record.get("sha256")
|
|
if not relative_path or not sha256_expected:
|
|
manifest["counts"]["invalid_records"] += 1
|
|
manifest["skipped_files"].append({
|
|
"relative_path": relative_path,
|
|
"reason": "invalid_index_record",
|
|
})
|
|
continue
|
|
|
|
local_path = (data_dir / relative_path).resolve()
|
|
if local_path == protected_path:
|
|
manifest["counts"]["protected"] += 1
|
|
continue
|
|
|
|
if not local_path.exists():
|
|
manifest["counts"]["already_absent"] += 1
|
|
if mode == "execute" and record.get("local_deleted_at_utc") is None:
|
|
record["local_deleted_at_utc"] = deleted_at
|
|
index_changed = True
|
|
continue
|
|
|
|
stat = local_path.stat()
|
|
mtime = dt.datetime.fromtimestamp(stat.st_mtime, dt.UTC)
|
|
if mtime >= cutoff:
|
|
manifest["counts"]["within_retention"] += 1
|
|
continue
|
|
|
|
sha256_actual = sha256_file(local_path)
|
|
if sha256_actual != sha256_expected:
|
|
manifest["counts"]["sha256_mismatch"] += 1
|
|
manifest["skipped_files"].append({
|
|
"relative_path": relative_path,
|
|
"local_path": str(local_path),
|
|
"kind": record.get("kind"),
|
|
"reason": "sha256_mismatch",
|
|
"expected_sha256": sha256_expected,
|
|
"actual_sha256": sha256_actual,
|
|
})
|
|
continue
|
|
|
|
candidate = {
|
|
"relative_path": relative_path,
|
|
"local_path": str(local_path),
|
|
"kind": record.get("kind"),
|
|
"bytes": stat.st_size,
|
|
"mtime_utc": iso_z_from_ts(stat.st_mtime),
|
|
"sha256": sha256_actual,
|
|
"first_verified_at_utc": record.get("first_verified_at_utc"),
|
|
"last_verified_at_utc": record.get("last_verified_at_utc"),
|
|
"last_verified_by_manifest": record.get("last_verified_by_manifest"),
|
|
}
|
|
manifest["candidate_files"].append(candidate)
|
|
manifest["counts"]["eligible"] += 1
|
|
|
|
if mode == "execute":
|
|
local_path.unlink()
|
|
record["local_deleted_at_utc"] = deleted_at
|
|
index_changed = True
|
|
manifest["deleted_local_files"].append({**candidate, "deleted_at_utc": deleted_at})
|
|
|
|
manifest["counts"]["deleted"] = len(manifest["deleted_local_files"])
|
|
manifest["verified_index"]["record_count_after"] = len(records)
|
|
|
|
if mode == "execute" and index_changed:
|
|
index_doc["updated_at_utc"] = ended_at
|
|
write_atomic_json(verified_index_path, index_doc)
|
|
|
|
if manifest["operation_status"] is None:
|
|
if manifest["counts"]["eligible"] == 0:
|
|
manifest["operation_status"] = "NO_ELIGIBLE_FILES"
|
|
elif mode == "dry-run":
|
|
manifest["operation_status"] = "DRY_RUN_PASS"
|
|
else:
|
|
manifest["operation_status"] = "PURGE_PASS"
|
|
|
|
write_atomic_json(manifest_path, manifest)
|
|
print(json.dumps({
|
|
"gate_status": manifest["gate_status"],
|
|
"operation_status": manifest["operation_status"],
|
|
"manifest_path": str(manifest_path),
|
|
"eligible_files": manifest["counts"]["eligible"],
|
|
"deleted_files": manifest["counts"]["deleted"],
|
|
}, indent=2, sort_keys=True))
|
|
|
|
if manifest["gate_status"] != "PASS":
|
|
sys.exit(1)
|
|
PY
|