From 38c78112521e8247bbb83325693564749707ea59 Mon Sep 17 00:00:00 2001 From: philipp Date: Sat, 18 Apr 2026 11:33:23 +0200 Subject: [PATCH] Fix Kubernetes smoke pod restart selection --- scripts/k8s_runtime_smoke_check.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/k8s_runtime_smoke_check.sh b/scripts/k8s_runtime_smoke_check.sh index 9bff5d5..08495cb 100755 --- a/scripts/k8s_runtime_smoke_check.sh +++ b/scripts/k8s_runtime_smoke_check.sh @@ -107,20 +107,27 @@ def run_json(command, input_text=None, timeout=None): def pod_ready(pod): + if pod.get('metadata', {}).get('deletionTimestamp'): + return False if pod.get('status', {}).get('phase') != 'Running': return False statuses = pod.get('status', {}).get('containerStatuses') or [] return bool(statuses) and all(status.get('ready') for status in statuses) -def get_collector_pod(): +def get_collector_pod(exclude_names=None): + exclude_names = set(exclude_names or []) selector = 'app.kubernetes.io/name=orderbooks,app.kubernetes.io/component=collector' deadline = time.time() + wait_seconds last = None while time.time() <= deadline: pods = run_json([kubectl, '-n', namespace, 'get', 'pods', '-l', selector, '-o', 'json']) items = pods.get('items', []) - ready = [pod for pod in items if pod_ready(pod)] + ready = [ + pod + for pod in items + if pod_ready(pod) and pod.get('metadata', {}).get('name') not in exclude_names + ] if ready: ready.sort(key=lambda pod: pod.get('metadata', {}).get('creationTimestamp', '')) return ready[-1]['metadata']['name'], ready[-1] @@ -396,6 +403,7 @@ summary = { 'cronjob': cronjob, 'raw_dir': raw_dir, 'manifest_dir': manifest_dir, + 'wait_seconds': wait_seconds, 'upload_min_age_seconds': upload_min_age_seconds, 'checks': checks, 'failures': failures, @@ -416,7 +424,7 @@ try: rollout_after = run([kubectl, '-n', namespace, 'rollout', 'status', f'deployment/{deployment}', f'--timeout={wait_seconds}s']) if rollout_after['exit_code'] != 0: raise RuntimeError('collector deployment did not recover after pod delete') - new_pod, new_pod_obj = get_collector_pod() + new_pod, new_pod_obj = get_collector_pod(exclude_names={pod_name}) old_check = exec_python(new_pod, raw_check_code, [old_file['path'], old_file['actual_sha256'], str(old_file['rows_parsed'])]) if not old_check.get('sha256_matches') or not old_check.get('row_count_matches'): raise RuntimeError('old raw file changed or stopped parsing after pod restart')