Fix Kubernetes smoke pod restart selection
All checks were successful
deploy / deploy (push) Successful in 16s
All checks were successful
deploy / deploy (push) Successful in 16s
This commit is contained in:
parent
e86de1b6a9
commit
38c7811252
1 changed files with 11 additions and 3 deletions
|
|
@ -107,20 +107,27 @@ def run_json(command, input_text=None, timeout=None):
|
|||
|
||||
|
||||
def pod_ready(pod):
|
||||
if pod.get('metadata', {}).get('deletionTimestamp'):
|
||||
return False
|
||||
if pod.get('status', {}).get('phase') != 'Running':
|
||||
return False
|
||||
statuses = pod.get('status', {}).get('containerStatuses') or []
|
||||
return bool(statuses) and all(status.get('ready') for status in statuses)
|
||||
|
||||
|
||||
def get_collector_pod():
|
||||
def get_collector_pod(exclude_names=None):
|
||||
exclude_names = set(exclude_names or [])
|
||||
selector = 'app.kubernetes.io/name=orderbooks,app.kubernetes.io/component=collector'
|
||||
deadline = time.time() + wait_seconds
|
||||
last = None
|
||||
while time.time() <= deadline:
|
||||
pods = run_json([kubectl, '-n', namespace, 'get', 'pods', '-l', selector, '-o', 'json'])
|
||||
items = pods.get('items', [])
|
||||
ready = [pod for pod in items if pod_ready(pod)]
|
||||
ready = [
|
||||
pod
|
||||
for pod in items
|
||||
if pod_ready(pod) and pod.get('metadata', {}).get('name') not in exclude_names
|
||||
]
|
||||
if ready:
|
||||
ready.sort(key=lambda pod: pod.get('metadata', {}).get('creationTimestamp', ''))
|
||||
return ready[-1]['metadata']['name'], ready[-1]
|
||||
|
|
@ -396,6 +403,7 @@ summary = {
|
|||
'cronjob': cronjob,
|
||||
'raw_dir': raw_dir,
|
||||
'manifest_dir': manifest_dir,
|
||||
'wait_seconds': wait_seconds,
|
||||
'upload_min_age_seconds': upload_min_age_seconds,
|
||||
'checks': checks,
|
||||
'failures': failures,
|
||||
|
|
@ -416,7 +424,7 @@ try:
|
|||
rollout_after = run([kubectl, '-n', namespace, 'rollout', 'status', f'deployment/{deployment}', f'--timeout={wait_seconds}s'])
|
||||
if rollout_after['exit_code'] != 0:
|
||||
raise RuntimeError('collector deployment did not recover after pod delete')
|
||||
new_pod, new_pod_obj = get_collector_pod()
|
||||
new_pod, new_pod_obj = get_collector_pod(exclude_names={pod_name})
|
||||
old_check = exec_python(new_pod, raw_check_code, [old_file['path'], old_file['actual_sha256'], str(old_file['rows_parsed'])])
|
||||
if not old_check.get('sha256_matches') or not old_check.get('row_count_matches'):
|
||||
raise RuntimeError('old raw file changed or stopped parsing after pod restart')
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue