From 70205e53bad2f27347af5986dde7d37db35bf38f Mon Sep 17 00:00:00 2001
From: Ying Chun Guo <yingchun.guo@intel.com>
Date: Tue, 30 Jul 2024 10:58:47 +0800
Subject: [PATCH] improve chart e2e test workflow and scripts (#239)

Signed-off-by: Yingchun Guo <yingchun.guo@intel.com>
---
 .github/workflows/chart-e2e.yaml            | 21 +++++-------
 .github/workflows/scripts/e2e/chart_test.sh | 38 +++++++++++++++++----
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/chart-e2e.yaml b/.github/workflows/chart-e2e.yaml
index efbfc72c7..49352b988 100644
--- a/.github/workflows/chart-e2e.yaml
+++ b/.github/workflows/chart-e2e.yaml
@@ -44,12 +44,16 @@ jobs:
           cut -d'/' -f3 | sort -u )
           run_matrix="{\"include\":["
           for chart in ${e2e_charts}; do
-            run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"gaudi\"},"
+            if [ -f $CHARTS_DIR/$chart/gaudi-values.yaml ]; then
+              run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"gaudi\"},"
+            fi
             run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"xeon\"},"
           done
           for chart in ${common_charts}; do
-           run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"gaudi\",\"directory\":\"common\"},"
-           run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"xeon\",\"directory\":\"common\"},"
+            if [ -f $CHARTS_DIR/common/$chart/gaudi-values.yaml ]; then
+              run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"gaudi\",\"directory\":\"common\"},"
+            fi
+            run_matrix="${run_matrix}{\"example\":\"${chart}\",\"hardware\":\"xeon\",\"directory\":\"common\"},"
           done
           run_matrix=$run_matrix"]}"
           echo "run_matrix=${run_matrix}"
@@ -119,14 +123,7 @@ jobs:
           helm-charts/update_dependency.sh && helm dependency update ${{ env.CHART_FOLDER}}
           value_file="values.yaml"
           if [ "${{ matrix.hardware }}" == "gaudi" ]; then
-            if [ -f ${{ env.CHART_FOLDER}}/gaudi-values.yaml ]; then
-              value_file="gaudi-values.yaml"
-            else
-              echo "No Gaudi version for chart ${{ matrix.example }}, skip test"
-              echo "skip_validate=true" >> $GITHUB_ENV
-              echo "should_cleanup=false" >> $GITHUB_ENV
-              exit 0
-            fi
+            value_file="gaudi-values.yaml"
           fi
           if ! helm install --create-namespace --namespace $NAMESPACE --wait \
               --timeout "$ROLLOUT_TIMEOUT_SECONDS" \
@@ -161,7 +158,7 @@ jobs:
             if [[ -f $LOG_PATH/charts-${chart}.log ]] && \
             [[ $(grep -c "^Phase:.*Failed" $LOG_PATH/charts-${chart}.log) != 0 ]]; then
                 teststatus=false
-                .github/workflows/scripts/e2e/chart_test.sh dump_failed_pod_logs $NAMESPACE $LOG_PATH/charts-${chart}.log
+                .github/workflows/scripts/e2e/chart_test.sh dump_all_pod_logs $NAMESPACE
             else
                 teststatus=true
             fi
diff --git a/.github/workflows/scripts/e2e/chart_test.sh b/.github/workflows/scripts/e2e/chart_test.sh
index 3dd7b13c9..bef37587f 100755
--- a/.github/workflows/scripts/e2e/chart_test.sh
+++ b/.github/workflows/scripts/e2e/chart_test.sh
@@ -4,6 +4,18 @@
 
 #set -xe
 
+function dump_pod_log() {
+    pod_name=$1
+    namespace=$2
+    echo "-----------Pod: $pod_name---------"
+    echo "#kubectl describe pod $pod_name -n $namespace"
+    kubectl describe pod $pod_name -n $namespace
+    echo "-----------------------------------"
+    echo "#kubectl logs $pod_name -n $namespace"
+    kubectl logs $pod_name -n $namespace
+    echo "-----------------------------------"
+}
+
 function dump_pods_status() {
     namespace=$1
     echo "-----DUMP POD STATUS in NS $namespace------"
@@ -28,10 +40,7 @@ function dump_pods_status() {
 
         # Check if the pod is not in "Running" status or READY count is less than required
         if [[ "$status" != "Running" || "$ready_count" -lt "$required_count" ]]; then
-            echo "Pod: $pod_name"
-            echo "Details:"
-            kubectl describe pod $pod_name -n $namespace
-            echo "-----------------------------------"
+            dump_pod_log $pod_name $namespace
         fi
     done
 }
@@ -45,12 +54,24 @@ function dump_failed_pod_logs() {
 
     if [[ -n $failed_svc_name ]]; then
         # Get the exact pod name
-        pod_name=$(kubectl get pods -n $namespace | grep -v 'testpod' | grep $failed_svc_name | awk '{print $1}')
-        echo "------DUMP POD $pod_name LOG in NS $namespace---------"
-        kubectl logs $pod_name -n $namespace
+        pods=$(kubectl get pods -n $namespace | grep -v 'testpod' | grep $failed_svc_name | awk '{print $1}')
+        for pod_name in $pods
+        do
+            dump_pod_log $pod_name $namespace
+        done
     fi
 }
 
+function dump_all_pod_logs() {
+    namespace=$1
+    echo "-----DUMP POD STATUS AND LOG in NS $namespace------"
+
+    pods=$(kubectl get pods -n $namespace -o jsonpath='{.items[*].metadata.name}')
+    for pod_name in $pods
+    do
+        dump_pod_log $pod_name $namespace
+    done
+}
 
 if [ $# -eq 0 ]; then
     echo "Usage: $0 <function_name>"
@@ -64,6 +85,9 @@ case "$1" in
     dump_failed_pod_logs)
         dump_failed_pod_logs $2 $3
         ;;
+    dump_all_pod_logs)
+        dump_all_pod_logs $2
+        ;;
     *)
         echo "Unknown function: $1"
         ;;