diff --git a/helm-charts/common/llm-uservice/templates/tests/test-pod.yaml b/helm-charts/common/llm-uservice/templates/tests/test-pod.yaml index 618262018..ccee60226 100644 --- a/helm-charts/common/llm-uservice/templates/tests/test-pod.yaml +++ b/helm-charts/common/llm-uservice/templates/tests/test-pod.yaml @@ -25,6 +25,12 @@ spec: -X POST \ -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ -H 'Content-Type: application/json' && break; + {{- else if contains "llm-faqgen-tgi" .Values.image.repository }} + # Try with faqgen endpoint + curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/faqgen -sS --fail-with-body \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ + -H 'Content-Type: application/json' && break; {{- else }} curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \ -X POST \ diff --git a/helm-charts/common/llm-uservice/variant_docsum-values.yaml b/helm-charts/common/llm-uservice/variant_docsum-values.yaml new file mode 100644 index 000000000..9e1f33bde --- /dev/null +++ b/helm-charts/common/llm-uservice/variant_docsum-values.yaml @@ -0,0 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/llm-docsum-tgi + tag: "latest" diff --git a/helm-charts/common/llm-uservice/variant_faqgen-values.yaml b/helm-charts/common/llm-uservice/variant_faqgen-values.yaml new file mode 100644 index 000000000..4e51fdd1d --- /dev/null +++ b/helm-charts/common/llm-uservice/variant_faqgen-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/llm-faqgen-tgi + tag: "latest" + +tgi: + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct diff --git a/helm-charts/update_manifests.sh b/helm-charts/update_manifests.sh index c0f7ae9cb..8438c2fdc 100755 --- a/helm-charts/update_manifests.sh +++ b/helm-charts/update_manifests.sh @@ -20,15 +20,24 @@ function generate_yaml { extraparams="--set image.tag=$NEWTAG" fi - helm template $chart ./common/$chart --skip-tests --values ./common/$chart/values.yaml --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH,noProbe=true $extraparams > ${outputdir}/$chart.yaml + helm template $chart ./common/$chart --skip-tests --values ./common/$chart/values.yaml --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH $extraparams > ${outputdir}/$chart.yaml for f in `ls ./common/$chart/*-values.yaml 2>/dev/null `; do - ext=$(basename $f | cut -d'-' -f1) + filename=$(basename $f) + releasename=$chart + if [[ "$filename" =~ ^variant_.*-values.yaml ]]; then + ext=$(echo $filename | sed 's/^variant_//' | sed 's/-values.yaml$//') + outputfile="$ext-${chart}.yaml" + releasename=$ext-$chart + else + ext=$(echo $filename | sed 's/-values.yaml$//') + outputfile="${chart}_${ext}.yaml" + fi extraparams="" if [[ $(grep -c 'tag: "latest"' $f) != 0 ]]; then extraparams="--set image.tag=$NEWTAG" fi - helm template $chart ./common/$chart --skip-tests --values ${f} --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH,noProbe=true $extraparams > ${outputdir}/${chart}_${ext}.yaml + helm template $releasename ./common/$chart --skip-tests --values ${f} --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH $extraparams > ${outputdir}/${outputfile} done } @@ -41,7 +50,3 @@ do echo "Update manifest for $chartname..." generate_yaml $chartname $OUTPUTDIR done - -# we need special version of docsum-llm-uservice -echo "Update manifest for docsum-llm-uservice..." -helm template docsum ./common/llm-uservice --skip-tests --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH,noProbe=true,image.repository=opea/llm-docsum-tgi,image.tag=$NEWTAG> ${OUTPUTDIR}/docsum-llm-uservice.yaml diff --git a/microservices-connector/config/manifests/docsum-llm-uservice.yaml b/microservices-connector/config/manifests/docsum-llm-uservice.yaml index b0a5a92d3..40e814f96 100644 --- a/microservices-connector/config/manifests/docsum-llm-uservice.yaml +++ b/microservices-connector/config/manifests/docsum-llm-uservice.yaml @@ -10,11 +10,11 @@ metadata: labels: helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum + app.kubernetes.io/instance: docsum-llm-uservice app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm data: - TGI_LLM_ENDPOINT: "http://docsum-tgi" + TGI_LLM_ENDPOINT: "http://docsum-llm-uservice-tgi" HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" HF_HOME: "/tmp/.cache/huggingface" http_proxy: "" @@ -33,7 +33,7 @@ metadata: labels: helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum + app.kubernetes.io/instance: docsum-llm-uservice app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: @@ -45,7 +45,7 @@ spec: name: llm-uservice selector: app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum + app.kubernetes.io/instance: docsum-llm-uservice --- # Source: llm-uservice/templates/deployment.yaml # Copyright (C) 2024 Intel Corporation @@ -58,7 +58,7 @@ metadata: labels: helm.sh/chart: llm-uservice-1.0.0 app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum + app.kubernetes.io/instance: docsum-llm-uservice app.kubernetes.io/version: "v1.0" app.kubernetes.io/managed-by: Helm spec: @@ -66,17 +66,17 @@ spec: selector: matchLabels: app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum + app.kubernetes.io/instance: docsum-llm-uservice template: metadata: labels: app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: docsum + app.kubernetes.io/instance: docsum-llm-uservice spec: securityContext: {} containers: - - name: docsum + - name: docsum-llm-uservice envFrom: - configMapRef: name: docsum-llm-uservice-config diff --git a/microservices-connector/config/manifests/faqgen-llm-uservice.yaml b/microservices-connector/config/manifests/faqgen-llm-uservice.yaml new file mode 100644 index 000000000..3b4515e19 --- /dev/null +++ b/microservices-connector/config/manifests/faqgen-llm-uservice.yaml @@ -0,0 +1,129 @@ +--- +# Source: llm-uservice/templates/configmap.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: faqgen-llm-uservice-config + labels: + helm.sh/chart: llm-uservice-1.0.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: faqgen-llm-uservice + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +data: + TGI_LLM_ENDPOINT: "http://faqgen-llm-uservice-tgi" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_HOME: "/tmp/.cache/huggingface" + http_proxy: "" + https_proxy: "" + no_proxy: "" + LOGFLAG: "" +--- +# Source: llm-uservice/templates/service.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: faqgen-llm-uservice + labels: + helm.sh/chart: llm-uservice-1.0.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: faqgen-llm-uservice + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + type: ClusterIP + ports: + - port: 9000 + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: faqgen-llm-uservice +--- +# Source: llm-uservice/templates/deployment.yaml +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: faqgen-llm-uservice + labels: + helm.sh/chart: llm-uservice-1.0.0 + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: faqgen-llm-uservice + app.kubernetes.io/version: "v1.0" + app.kubernetes.io/managed-by: Helm +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: faqgen-llm-uservice + template: + metadata: + labels: + app.kubernetes.io/name: llm-uservice + app.kubernetes.io/instance: faqgen-llm-uservice + spec: + securityContext: + {} + containers: + - name: faqgen-llm-uservice + envFrom: + - configMapRef: + name: faqgen-llm-uservice-config + - configMapRef: + name: extra-env-config + optional: true + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + image: "opea/llm-faqgen-tgi:latest" + imagePullPolicy: IfNotPresent + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 24 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + startupProbe: + failureThreshold: 120 + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + {} + volumes: + - name: tmp + emptyDir: {}