diff --git a/cli/deploy-custom-container-torchserve-huggingface-textgen.sh b/cli/deploy-custom-container-torchserve-huggingface-textgen.sh deleted file mode 100644 index bddc55f8b6f..00000000000 --- a/cli/deploy-custom-container-torchserve-huggingface-textgen.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash - -set -e - -# -ENDPOINT_NAME=hf-tg-`echo $RANDOM` -IMAGE_TAG=azureml-examples/huggingface-textgen:1 - -BASE_PATH=endpoints/online/custom-container/torchserve/huggingface-textgen -SERVE_PATH=$BASE_PATH/serve/examples/Huggingface_Transformers -ROOT_PATH=$PWD -ACR_NAME=$(az ml workspace show --query container_registry -o tsv | cut -d'/' -f9-) -# - -# -# Helper function to parameterize YAML -change_vars() { - for FILE in "$@"; do - TMP="${FILE}_" - cp $FILE $TMP - readarray -t VARS < <(cat $TMP | grep -oP '{{.*?}}' | sed -e 's/[}{]//g'); - for VAR in "${VARS[@]}"; do - sed -i "s#{{${VAR}}}#${!VAR}#g" $TMP - done - done -} - -cleanup () { - sudo rm -rf $BASE_PATH/serve || true - rm $BASE_PATH/ts-hf-tg-deployment.yml_ - az ml online-endpoint delete -y -n $ENDPOINT_NAME -} -# - -# -az acr login -n $ACR_NAME -az acr build -t $IMAGE_TAG -f $BASE_PATH/ts-hf-tg.dockerfile -r $ACR_NAME $BASE_PATH -# - -# -cd $BASE_PATH -rm -rf serve -git init serve -cd serve -git remote add -f origin https://github.com/pytorch/serve -git config core.sparseCheckout true -echo "examples/Huggingface_Transformers" >> .git/info/sparse-checkout -git pull origin master -cd $ROOT_PATH -# - -# -if [[ ! -f $SERVE_PATH/setup_config.json_ ]]; then - cp $BASE_PATH/ts-hf-tg-setup_config.json $SERVE_PATH/setup_config.json_ -fi -cp $BASE_PATH/ts-hf-tg-setup_config.json $SERVE_PATH/setup_config.json -chmod -R o+w $SERVE_PATH -cd $SERVE_PATH -docker run --rm -v $PWD:/tmp/wd:z -w /tmp/wd -t "$ACR_NAME.azurecr.io/$IMAGE_TAG" "python Download_Transformer_models.py; \ - sed -i 's#\"max_length\": 50#\"max_length\": 300#g' ./Transformer_model/config.json; \ - torch-model-archiver --model-name Textgeneration --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files Transformer_model/config.json,./setup_config.json; \ - exit;" -cp setup_config.json_ setup_config.json -rm setup_config.json_ -cd $ROOT_PATH -# - -# -docker run --name huggingface-textgen --rm -d -p 8080:8080 -v "$PWD/$SERVE_PATH":/tmp/wd -e AZUREML_MODEL_DIR=/tmp/wd -e TORCHSERVE_MODELS="textgeneration=Textgeneration.mar" -t "$ACR_NAME.azurecr.io/$IMAGE_TAG" -sleep 30 -curl -X POST http://127.0.0.1:8080/predictions/textgeneration -T "$SERVE_PATH/Text_gen_artifacts/sample_text.txt" -docker stop huggingface-textgen -# - -# -az ml online-endpoint create -n $ENDPOINT_NAME -# - -# -endpoint_status=`az ml online-endpoint show --name $ENDPOINT_NAME --query "provisioning_state" -o tsv` -echo $endpoint_status -if [[ $endpoint_status == "Succeeded" ]] -then - echo "Endpoint created successfully" -else - echo "Endpoint creation failed" - exit 1 -fi -# - -# -change_vars $BASE_PATH/ts-hf-tg-deployment.yml -az ml online-deployment create -f $BASE_PATH/ts-hf-tg-deployment.yml_ --all-traffic -# - -# -deploy_status=`az ml online-deployment show --endpoint-name $ENDPOINT_NAME --name textgeneration --query "provisioning_state" -o tsv` -echo $deploy_status -if [[ $deploy_status == "Succeeded" ]] -then - echo "Deployment completed successfully" -else - echo "Deployment failed" - exit 1 -fi -# - -# -# Get key -echo "Getting access key..." -KEY=$(az ml online-endpoint get-credentials -n $ENDPOINT_NAME --query primaryKey -o tsv) - -# Get scoring url -echo "Getting scoring url..." -SCORING_URL=$(az ml online-endpoint show -n $ENDPOINT_NAME --query scoring_uri -o tsv) -echo "Scoring url is $SCORING_URL" -# - -# -curl -X POST -H "Authorization: Bearer $KEY" -T "$SERVE_PATH/Text_gen_artifacts/sample_text.txt" $SCORING_URL -# - -cleanup \ No newline at end of file diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/README.md b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/README.md deleted file mode 100644 index c8d8e36797b..00000000000 --- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Deploy Huggingface models using Torchserve - -This example demonstrates how to deploy Huggingface models to a managed online endpoint and follows along with the [Serving Huggingface Transformers using TorchServe](https://github.com/pytorch/serve/tree/master/examples/Huggingface_Transformers) example from HuggingFace. - -In this example we deploy a BERT model for text generation. - -## How to deploy - -This example can be run end-to-end using the `deploy-customcontainer-torchserve-huggingface-textgen.sh` script in the `CLI` folder. Torchserve is not required to be installed. - -## Image - -The image used for this example is defined in file `ts-hf-tg.dockerfile`. It uses `pytorch/torchserve` as a base image and overrides the default `CMD` so that the `model-store` points to the location of the mounted model upon initialization by referencing the `AZUREML_MODEL_DIR` env var and that the `models` loaded are defined in the custom env var `TORCHSERVE_MODELS`. - -## Model - -To prepare the model, the [Huggingface_Transformers](https://github.com/pytorch/serve/tree/master/examples/Huggingface_Transformers) directory is cloned from the `pytorch/serve` Github repo. We use the same image built for deployment above to prepare the model per the instructions in the Huggingface example. - -## Environment -The environment is defined inline in the deployment yaml and references the ACR url of the image. The ACR must be associated with the workspace (or have a user-assigned managed identity that enables ACRPull) in order to successfully deploy. - -We define an additional env var called `TORCHSERVE_MODELS` which is used by the image upon initialization. - -The environment also contains an `inference_config` block that defines the `liveness`, `readiness`, and `scoring` routes by path and port. Because the images used in this examples are based on the AzureML Inference Minimal images, these values are the same as those in a non-BYOC deployment, however they must be included since we are now using a custom image. \ No newline at end of file diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-deployment.yml b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-deployment.yml deleted file mode 100644 index 227115b6b36..00000000000 --- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-deployment.yml +++ /dev/null @@ -1,24 +0,0 @@ -$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json -name: textgeneration -endpoint_name: {{ENDPOINT_NAME}} -model: - name: hf-tg - path: serve/examples/Huggingface_Transformers/Textgeneration.mar -environment_variables: - TORCHSERVE_MODELS: "textgeneration=Textgeneration.mar" -environment: - name: hf-tg - image: {{ACR_NAME}}.azurecr.io/{{IMAGE_TAG}} - inference_config: - liveness_route: - port: 8080 - path: /ping - readiness_route: - port: 8080 - path: /ping - scoring_route: - port: 8080 - path: /predictions/textgeneration -instance_type: Standard_DS3_v2 -instance_count: 1 - diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-setup_config.json b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-setup_config.json deleted file mode 100644 index 6b717b161bd..00000000000 --- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-setup_config.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "model_name":"gpt2", - "mode":"text_generation", - "do_lower_case":true, - "num_labels":"0", - "save_mode":"pretrained", - "max_length":"20", - "captum_explanation":false, - "FasterTransformer":false, - "BetterTransformer":false, - "embedding_name": "gpt2", - "model_parallel":false - } \ No newline at end of file diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg.dockerfile b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg.dockerfile deleted file mode 100644 index 0b8622f02e4..00000000000 --- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg.dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM pytorch/torchserve:latest-cpu - -RUN pip install transformers==4.6.0 - -CMD ["torchserve","--start", "--disable-token-auth", "--model-store","$AZUREML_MODEL_DIR","--models","$TORCHSERVE_MODELS","--ncs"] \ No newline at end of file