Add helm chart for deploying GMC itself (#249)

Signed-off-by: Lianhao Lu <[email protected]>
opea-project · Aug 2, 2024 · a76c90f · a76c90f
1 parent 497ff61
commit a76c90f
Show file tree

Hide file tree

Showing 17 changed files with 551 additions and 24 deletions.
diff --git a/.github/workflows/gmc-helm.yaml b/.github/workflows/gmc-helm.yaml
@@ -8,13 +8,13 @@ on:
     branches: [main]
     types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
     paths:
-      - manifests/common/**
       - microservices-connector/config/crd/bases/gmc.opea.io_gmconnectors.yaml
       - microservices-connector/config/gmcrouter/gmc-router.yaml
       - microservices-connector/helm/**
       - "!**.md"
       - "!**.txt"
       - "!**.png"
+      - .github/workflows/scripts/e2e/gmc_xeon_test.sh
   workflow_dispatch:
 
 # If there is a new commit, the previous jobs will be canceled
@@ -56,14 +56,34 @@ jobs:
       - name: Install GMC
         run: |
           echo "should_cleanup=true" >> $GITHUB_ENV
-          ## IMPORTANT NOTICE: need to replace gmc images with {{env.DOCKER_REGISTRY}} and {{env.VERSION}}
-          cd microservices-connector
-          if ! helm install --create-namespace --namespace {{ env.SYSTEM_NAMESPACE}} --wait \
+          USER_ID=$(whoami)
+          HFTOKEN=$(cat /home/$USER_ID/.cache/huggingface/token)
+          pushd microservices-connector
+          mkdir -p $(pwd)/config/manifests
+          cp -f -p $(dirname $(pwd))/manifests/common/*.yaml $(pwd)/config/manifests/
+          popd
+          pushd microservices-connector/helm
+          # replace image for gmc-router
+          sed -i "s|image:.*|image: ${DOCKER_REGISTRY}opea/gmcrouter:$VERSION|" gmc-router.yaml
+          # replace the pull policy "IfNotPresent" with "Always" for gmc-router
+          sed -i "s|imagePullPolicy:.*|imagePullPolicy: Always|" gmc-router.yaml
+          # replace image for GenAI component manifests
+          find manifests_common/ -type f -name "*.yaml" -exec sed -i "s|image: \"opea|image: \"${DOCKER_REGISTRY}opea|" {} \;
+          # replace the pull policy "IfNotPresent" with "Always" for GenAI component manifests
+          find manifests_common/ -type f -name "*.yaml" -exec sed -i "s|imagePullPolicy: IfNotPresent|imagePullPolicy: Always|" {} \;
+          # replace the mount dir "path: /mnt/model" with "path: $CHART_MOUNT"
+          find manifests_common/ -type f -name "*.yaml" -exec sed -i "s|path: /mnt/opea-models|path: $KIND_MOUNT_DIR|" {} \;
+          # replace huggingface token
+          find manifests_common/ -type f -name '*.yaml' -exec sed -i "s#insert-your-huggingface-token-here#${HFTOKEN}#g" {} \;
+          popd
+          # Install GMC
+          if ! helm install --create-namespace --namespace ${{ env.SYSTEM_NAMESPACE }} --wait \
               --timeout "$ROLLOUT_TIMEOUT_SECONDS" \
-              $RELEASE_NAME helm ; then
+              $RELEASE_NAME microservices-connector/helm \
+               --set image.tag=$VERSION --set image.repository=${DOCKER_REGISTRY}opea/gmcmanager; then
             echo "Failed to install GMC by helm"
             echo "skip_validate=true" >> $GITHUB_ENV
-            .github/workflows/scripts/e2e/chart_test.sh dump_pods_status {{ env.SYSTEM_NAMESPACE}}
+            .github/workflows/scripts/e2e/chart_test.sh dump_pods_status ${{ env.SYSTEM_NAMESPACE }}
             exit 1
           fi
 
@@ -80,5 +100,10 @@ jobs:
         run: |
           if $should_cleanup; then
             .github/workflows/scripts/e2e/gmc_xeon_test.sh cleanup_apps
-            helm uninstall $RELEASE_NAME --namespace {{ env.SYSTEM_NAMESPACE}}
+            helm uninstall $RELEASE_NAME --namespace ${{ env.SYSTEM_NAMESPACE }}
+            if kubectl get namespace ${{ env.SYSTEM_NAMESPACE }} > /dev/null 2>&1; then
+              echo "Deleting namespace: $SYSTEM_NAMESPACE"
+              kubectl delete namespace "$SYSTEM_NAMESPACE"
+            fi
+            kubectl delete crd gmconnectors.gmc.opea.io
           fi
diff --git a/.github/workflows/go-unittests.yaml b/.github/workflows/go-unittests.yaml
@@ -9,6 +9,10 @@ on:
     types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
     paths:
       - microservices-connector/**
+      - "!microservices-connector/helm/**"
+      - "!**.md"
+      - "!**.txt"
+      - "!**.png"
   workflow_dispatch:
 
 # If there is a new commit, the previous jobs will be canceled

diff --git a/.github/workflows/scripts/e2e/gmc_xeon_test.sh b/.github/workflows/scripts/e2e/gmc_xeon_test.sh
@@ -46,6 +46,8 @@ function cleanup_apps() {
 function validate_chatqna() {
    kubectl create ns $CHATQNA_NAMESPACE
    sed -i "s|namespace: chatqa|namespace: $CHATQNA_NAMESPACE|g"  $(pwd)/config/samples/chatQnA_xeon.yaml
+   # workaround for issue #268
+   yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "bigscience/bloom-560m"' $(pwd)/config/samples/chatQnA_xeon.yaml
    kubectl apply -f $(pwd)/config/samples/chatQnA_xeon.yaml
 
    # Wait until the router service is ready
@@ -54,17 +56,16 @@ function validate_chatqna() {
    output=$(kubectl get pods -n $CHATQNA_NAMESPACE)
    echo $output
 
-  # Wait until the tgi pod is ready
-  TGI_POD_NAME=$(kubectl get pods --namespace=$CHATQNA_NAMESPACE | grep ^tgi-service | awk '{print $1}')
-  kubectl describe pod $TGI_POD_NAME -n $CHATQNA_NAMESPACE
-  kubectl wait --for=condition=ready pod/$TGI_POD_NAME --namespace=$CHATQNA_NAMESPACE --timeout=300s
-
-
    # deploy client pod for testing
    kubectl create deployment client-test -n $CHATQNA_NAMESPACE --image=python:3.8.13 -- sleep infinity
 
-   # wait for client pod ready
-   wait_until_pod_ready "client-test" $CHATQNA_NAMESPACE "client-test"
+   # Wait until all pods are ready
+   wait_until_all_pod_ready $CHATQNA_NAMESPACE 300s
+   if [ $? -ne 0 ]; then
+       echo "Error Some pods are not ready!"
+       exit 1
+   fi
+
    # giving time to populating data
    sleep 90
 
@@ -108,12 +109,16 @@ function validate_codegen() {
    output=$(kubectl get pods -n $CODEGEN_NAMESPACE)
    echo $output
 
-
    # deploy client pod for testing
    kubectl create deployment client-test -n $CODEGEN_NAMESPACE --image=python:3.8.13 -- sleep infinity
 
-   # wait for client pod ready
-   wait_until_pod_ready "client-test" $CODEGEN_NAMESPACE "client-test"
+   # Wait until all pods are ready
+   wait_until_all_pod_ready $CODEGEN_NAMESPACE 300s
+   if [ $? -ne 0 ]; then
+       echo "Error Some pods are not ready!"
+       exit 1
+   fi
+
    # giving time to populating data
    sleep 60
 
@@ -158,12 +163,16 @@ function validate_codetrans() {
    output=$(kubectl get pods -n $CODETRANS_NAMESPACE)
    echo $output
 
-
    # deploy client pod for testing
    kubectl create deployment client-test -n $CODETRANS_NAMESPACE --image=python:3.8.13 -- sleep infinity
 
-   # wait for client pod ready
-   wait_until_pod_ready "client-test" $CODETRANS_NAMESPACE "client-test"
+   # Wait until all pods are ready
+   wait_until_all_pod_ready $CODETRANS_NAMESPACE 300s
+   if [ $? -ne 0 ]; then
+       echo "Error Some pods are not ready!"
+       exit 1
+   fi
+
    # giving time to populating data
    sleep 60
 
@@ -207,12 +216,16 @@ function validate_docsum() {
    output=$(kubectl get pods -n $DOCSUM_NAMESPACE)
    echo $output
 
-
    # deploy client pod for testing
    kubectl create deployment client-test -n $DOCSUM_NAMESPACE --image=python:3.8.13 -- sleep infinity
 
-   # wait for client pod ready
-   wait_until_pod_ready "client-test" $DOCSUM_NAMESPACE "client-test"
+   # Wait until all pods are ready
+   wait_until_all_pod_ready $DOCSUM_NAMESPACE 300s
+   if [ $? -ne 0 ]; then
+       echo "Error Some pods are not ready!"
+       exit 1
+   fi
+
    # giving time to populating data
    sleep 60
 

diff --git a/.github/workflows/scripts/e2e/utils.sh b/.github/workflows/scripts/e2e/utils.sh
@@ -47,3 +47,25 @@ function get_gmc_controller_logs() {
     echo "Fetching logs for pod $pod_name in namespace $SYSTEM_NAMESPACE..."
     kubectl logs $pod_name -n $SYSTEM_NAMESPACE
 }
+
+
+function wait_until_all_pod_ready() {
+  namespace=$1
+  timeout=$2
+
+  echo "Wait for all pods in NS $namespace to be ready..."
+  pods=$(kubectl get pods -n $namespace --no-headers -o custom-columns=":metadata.name")
+  # Loop through each pod
+  echo "$pods" | while read -r line; do
+    pod_name=$line
+    kubectl wait --for=condition=Ready pod/${pod_name} -n $namespace --timeout=${timeout}
+    if [ $? -ne 0 ]; then
+      echo "Pod $pod_name is not ready after waiting for ${timeout}"
+      echo "Pod $pod_name status:"
+      kubectl describe pod $pod_name -n $namespace
+      echo "Pod $pod_name logs:"
+      kubectl logs $pod_name -n $namespace
+      exit 1
+    fi
+  done
+}
diff --git a/microservices-connector/README.md b/microservices-connector/README.md
@@ -154,3 +154,7 @@ make uninstall
 ```sh
 make undeploy
 ```
+
+### Deploy using helm chart
+
+Please refer to the [helm chart README](./helm/README.md) for deploying GMC using helm chart.
diff --git a/microservices-connector/helm/.helmignore b/microservices-connector/helm/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/microservices-connector/helm/Chart.yaml b/microservices-connector/helm/Chart.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: gmc
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.9.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "v0.8"
diff --git a/microservices-connector/helm/README.md b/microservices-connector/helm/README.md
@@ -0,0 +1,82 @@
+# Helm chart for genai-microservices-connector(GMC)
+
+Helm chart for deploying the genai-microservices-connector(GMC) service on a Kubernetes cluster.
+
+## Installing the GMC Helm Chart
+
+To use this GMC helm chart, you need to have a Kubernetes cluster and helm installed. If you don't have helm(version >= 3.15) installed, you can follow the instructions [here](https://helm.sh/docs/intro/install/).
+
+This helm chart will install the following components of GMC:
+
+- GMC CRD
+- GenAI Components and GMC Router manifests
+- GMC Manager
+
+**NOTE: Because helm doesn't support updating/deleting CRD, you need to manually delete the CRD before upgrading the GMC helm chart.**
+
+**Get related manifests for GenAI Components**
+
+```sh
+cd GenAIInfra/microservices-connector
+mkdir -p $(pwd)/config/manifests
+cp $(dirname $(pwd))/manifests/common/*.yaml -p $(pwd)/config/manifests/
+```
+
+**NOTE:**
+Before installting the manifests, please replace your own huggingface tokens，Google API KEY and Google CSE ID in the manifests:
+
+```sh
+export YOUR_HF_TOKEN=<your hugging facetoken>
+export YOUR_GOOGLE_API_KEY=<your google api key>
+export YOUR_GOOGLE_CSE_ID=<your google cse id>
+find helm/manifests_common/ -name '*.yaml' -type f -exec sed -i "s#insert-your-huggingface-token-here#$YOUR_HF_TOKEN#g" {} \;
+find helm/manifests_common/ -name '*.yaml' -type f -exec sed -i "s#GOOGLE_API_KEY:.*#GOOGLE_API_KEY: "$YOUR_GOOGLE_API_KEY"#g" {} \;
+find helm/manifests_common/ -name '*.yaml' -type f -exec sed -i "s#GOOGLE_CSE_ID:.*#GOOGLE_CSE_ID: "$YOUR_GOOGLE_CSE_ID"#g" {} \;
+```
+
+if you have pre-defined directory to save the models on you cluster hosts, please set the path to the manifests
+
+```sh
+export MOUNT_DIR=<your model path>
+find helm/manifests_common/ -name '*.yaml' -type f -exec sed -i "s#path: /mnt/opea-models#path: $MOUNT_DIR#g" {} \;
+```
+
+**NOTE:**
+GMC manager, GenAI components and GMC router manifests are deployed in any namespace. Here we use `system` as an examep:
+
+```console
+helm install -n system --create-namespace gmc helm
+```
+
+## Check the installation result
+
+Run the command `kubectl get pod -n system` to make sure all pods are running:
+
+```
+NAME                            READY   STATUS    RESTARTS   AGE
+gmc-contoller-8bcb9d469-l6fsj   1/1     Running   0          55s
+```
+
+## Next step
+
+After the GMC is installed, you can follow the [GMC user guide](../usage_guide.md) for sample use cases.
+
+## Uninstall
+
+**Delete the instances (CRs) from the cluster if you have ever deployed the instances from GMC user guide:**
+
+```sh
+kubectl delete -k config/samples/
+```
+
+**UnDeploy the GMC manager and GenAI components and GMC router manifest from the cluster:**
+
+```sh
+helm delete -n system gmc
+```
+
+**Delete the APIs(CRDs) from the cluster:**
+
+```sh
+kubectl delete crd gmconnectors.gmc.opea.io
+```
diff --git a/microservices-connector/helm/crds/gmc.opea.io_gmconnectors.yaml b/microservices-connector/helm/crds/gmc.opea.io_gmconnectors.yaml
@@ -0,0 +1 @@
+../../config/crd/bases/gmc.opea.io_gmconnectors.yaml
diff --git a/microservices-connector/helm/gmc-router.yaml b/microservices-connector/helm/gmc-router.yaml
@@ -0,0 +1 @@
+../config/gmcrouter/gmc-router.yaml
diff --git a/microservices-connector/helm/manifests_common b/microservices-connector/helm/manifests_common
@@ -0,0 +1 @@
+../config/manifests
diff --git a/microservices-connector/helm/templates/NOTES.txt b/microservices-connector/helm/templates/NOTES.txt
@@ -0,0 +1,2 @@
+Please checkout https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/usage_guide.md for usage guide.
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		../../config/crd/bases/gmc.opea.io_gmconnectors.yaml
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Please checkout https://github.com/opea-project/GenAIInfra/blob/main/microservices-connector/usage_guide.md for usage guide.