From 70205e53bad2f27347af5986dde7d37db35bf38f Mon Sep 17 00:00:00 2001 From: Ying Chun Guo Date: Tue, 30 Jul 2024 10:58:47 +0800 Subject: [PATCH] improve chart e2e test workflow and scripts (#239) Signed-off-by: Yingchun Guo --- .github/workflows/chart-e2e.yaml | 21 +++++------- .github/workflows/scripts/e2e/chart_test.sh | 38 +++++++++++++++++---- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/.github/workflows/chart-e2e.yaml b/.github/workflows/chart-e2e.yaml index efbfc72c7..49352b988 100644 --- a/.github/workflows/chart-e2e.yaml +++ b/.github/workflows/chart-e2e.yaml @@ -44,12 +44,16 @@ jobs: cut -d'/' -f3 | sort -u ) run_matrix="{\"include\":[" for chart in ${e2e_charts}; do - run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"gaudi\"}," + if [ -f $CHARTS_DIR/$chart/gaudi-values.yaml ]; then + run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"gaudi\"}," + fi run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"xeon\"}," done for chart in ${common_charts}; do - run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"gaudi\",\"directory\":\"common\"}," - run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"xeon\",\"directory\":\"common\"}," + if [ -f $CHARTS_DIR/common/$chart/gaudi-values.yaml ]; then + run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"gaudi\",\"directory\":\"common\"}," + fi + run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"xeon\",\"directory\":\"common\"}," done run_matrix=$run_matrix"]}" echo "run_matrix=${run_matrix}" @@ -119,14 +123,7 @@ jobs: helm-charts/update_dependency.sh && helm dependency update ${{ env.CHART_FOLDER}} value_file="values.yaml" if [ "${{ matrix.hardware }}" == "gaudi" ]; then - if [ -f ${{ env.CHART_FOLDER}}/gaudi-values.yaml ]; then - value_file="gaudi-values.yaml" - else - echo "No Gaudi version for chart ${{ matrix.example }}, skip test" - echo "skip_validate=true" >> $GITHUB_ENV - echo "should_cleanup=false" >> $GITHUB_ENV - exit 0 - fi + value_file="gaudi-values.yaml" fi if ! helm install --create-namespace --namespace $NAMESPACE --wait \ --timeout "$ROLLOUT_TIMEOUT_SECONDS" \ @@ -161,7 +158,7 @@ jobs: if [[ -f $LOG_PATH/charts-${chart}.log ]] && \ [[ $(grep -c "^Phase:.*Failed" $LOG_PATH/charts-${chart}.log) != 0 ]]; then teststatus=false - .github/workflows/scripts/e2e/chart_test.sh dump_failed_pod_logs $NAMESPACE $LOG_PATH/charts-${chart}.log + .github/workflows/scripts/e2e/chart_test.sh dump_all_pod_logs $NAMESPACE else teststatus=true fi diff --git a/.github/workflows/scripts/e2e/chart_test.sh b/.github/workflows/scripts/e2e/chart_test.sh index 3dd7b13c9..bef37587f 100755 --- a/.github/workflows/scripts/e2e/chart_test.sh +++ b/.github/workflows/scripts/e2e/chart_test.sh @@ -4,6 +4,18 @@ #set -xe +function dump_pod_log() { + pod_name=$1 + namespace=$2 + echo "-----------Pod: $pod_name---------" + echo "#kubectl describe pod $pod_name -n $namespace" + kubectl describe pod $pod_name -n $namespace + echo "-----------------------------------" + echo "#kubectl logs $pod_name -n $namespace" + kubectl logs $pod_name -n $namespace + echo "-----------------------------------" +} + function dump_pods_status() { namespace=$1 echo "-----DUMP POD STATUS in NS $namespace------" @@ -28,10 +40,7 @@ function dump_pods_status() { # Check if the pod is not in "Running" status or READY count is less than required if [[ "$status" != "Running" || "$ready_count" -lt "$required_count" ]]; then - echo "Pod: $pod_name" - echo "Details:" - kubectl describe pod $pod_name -n $namespace - echo "-----------------------------------" + dump_pod_log $pod_name $namespace fi done } @@ -45,12 +54,24 @@ function dump_failed_pod_logs() { if [[ -n $failed_svc_name ]]; then # Get the exact pod name - pod_name=$(kubectl get pods -n $namespace | grep -v 'testpod' | grep $failed_svc_name | awk '{print $1}') - echo "------DUMP POD $pod_name LOG in NS $namespace---------" - kubectl logs $pod_name -n $namespace + pods=$(kubectl get pods -n $namespace | grep -v 'testpod' | grep $failed_svc_name | awk '{print $1}') + for pod_name in $pods + do + dump_pod_log $pod_name $namespace + done fi } +function dump_all_pod_logs() { + namespace=$1 + echo "-----DUMP POD STATUS AND LOG in NS $namespace------" + + pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}') + for pod_name in $pods + do + dump_pod_log $pod_name $namespace + done +} if [ $# -eq 0 ]; then echo "Usage: $0 " @@ -64,6 +85,9 @@ case "$1" in dump_failed_pod_logs) dump_failed_pod_logs $2 $3 ;; + dump_all_pod_logs) + dump_all_pod_logs $2 + ;; *) echo "Unknown function: $1" ;;