diff --git a/cli/deploy-custom-container-torchserve-huggingface-textgen.sh b/cli/deploy-custom-container-torchserve-huggingface-textgen.sh
deleted file mode 100644
index bddc55f8b6f..00000000000
--- a/cli/deploy-custom-container-torchserve-huggingface-textgen.sh
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/bin/bash
-
-set -e
-
-#
-ENDPOINT_NAME=hf-tg-`echo $RANDOM`
-IMAGE_TAG=azureml-examples/huggingface-textgen:1
-
-BASE_PATH=endpoints/online/custom-container/torchserve/huggingface-textgen
-SERVE_PATH=$BASE_PATH/serve/examples/Huggingface_Transformers
-ROOT_PATH=$PWD
-ACR_NAME=$(az ml workspace show --query container_registry -o tsv | cut -d'/' -f9-)
-#
-
-#
-# Helper function to parameterize YAML
-change_vars() {
- for FILE in "$@"; do
- TMP="${FILE}_"
- cp $FILE $TMP
- readarray -t VARS < <(cat $TMP | grep -oP '{{.*?}}' | sed -e 's/[}{]//g');
- for VAR in "${VARS[@]}"; do
- sed -i "s#{{${VAR}}}#${!VAR}#g" $TMP
- done
- done
-}
-
-cleanup () {
- sudo rm -rf $BASE_PATH/serve || true
- rm $BASE_PATH/ts-hf-tg-deployment.yml_
- az ml online-endpoint delete -y -n $ENDPOINT_NAME
-}
-#
-
-#
-az acr login -n $ACR_NAME
-az acr build -t $IMAGE_TAG -f $BASE_PATH/ts-hf-tg.dockerfile -r $ACR_NAME $BASE_PATH
-#
-
-#
-cd $BASE_PATH
-rm -rf serve
-git init serve
-cd serve
-git remote add -f origin https://github.com/pytorch/serve
-git config core.sparseCheckout true
-echo "examples/Huggingface_Transformers" >> .git/info/sparse-checkout
-git pull origin master
-cd $ROOT_PATH
-#
-
-#
-if [[ ! -f $SERVE_PATH/setup_config.json_ ]]; then
- cp $BASE_PATH/ts-hf-tg-setup_config.json $SERVE_PATH/setup_config.json_
-fi
-cp $BASE_PATH/ts-hf-tg-setup_config.json $SERVE_PATH/setup_config.json
-chmod -R o+w $SERVE_PATH
-cd $SERVE_PATH
-docker run --rm -v $PWD:/tmp/wd:z -w /tmp/wd -t "$ACR_NAME.azurecr.io/$IMAGE_TAG" "python Download_Transformer_models.py; \
- sed -i 's#\"max_length\": 50#\"max_length\": 300#g' ./Transformer_model/config.json; \
- torch-model-archiver --model-name Textgeneration --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files Transformer_model/config.json,./setup_config.json; \
- exit;"
-cp setup_config.json_ setup_config.json
-rm setup_config.json_
-cd $ROOT_PATH
-#
-
-#
-docker run --name huggingface-textgen --rm -d -p 8080:8080 -v "$PWD/$SERVE_PATH":/tmp/wd -e AZUREML_MODEL_DIR=/tmp/wd -e TORCHSERVE_MODELS="textgeneration=Textgeneration.mar" -t "$ACR_NAME.azurecr.io/$IMAGE_TAG"
-sleep 30
-curl -X POST http://127.0.0.1:8080/predictions/textgeneration -T "$SERVE_PATH/Text_gen_artifacts/sample_text.txt"
-docker stop huggingface-textgen
-#
-
-#
-az ml online-endpoint create -n $ENDPOINT_NAME
-#
-
-#
-endpoint_status=`az ml online-endpoint show --name $ENDPOINT_NAME --query "provisioning_state" -o tsv`
-echo $endpoint_status
-if [[ $endpoint_status == "Succeeded" ]]
-then
- echo "Endpoint created successfully"
-else
- echo "Endpoint creation failed"
- exit 1
-fi
-#
-
-#
-change_vars $BASE_PATH/ts-hf-tg-deployment.yml
-az ml online-deployment create -f $BASE_PATH/ts-hf-tg-deployment.yml_ --all-traffic
-#
-
-#
-deploy_status=`az ml online-deployment show --endpoint-name $ENDPOINT_NAME --name textgeneration --query "provisioning_state" -o tsv`
-echo $deploy_status
-if [[ $deploy_status == "Succeeded" ]]
-then
- echo "Deployment completed successfully"
-else
- echo "Deployment failed"
- exit 1
-fi
-#
-
-#
-# Get key
-echo "Getting access key..."
-KEY=$(az ml online-endpoint get-credentials -n $ENDPOINT_NAME --query primaryKey -o tsv)
-
-# Get scoring url
-echo "Getting scoring url..."
-SCORING_URL=$(az ml online-endpoint show -n $ENDPOINT_NAME --query scoring_uri -o tsv)
-echo "Scoring url is $SCORING_URL"
-#
-
-#
-curl -X POST -H "Authorization: Bearer $KEY" -T "$SERVE_PATH/Text_gen_artifacts/sample_text.txt" $SCORING_URL
-#
-
-cleanup
\ No newline at end of file
diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/README.md b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/README.md
deleted file mode 100644
index c8d8e36797b..00000000000
--- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Deploy Huggingface models using Torchserve
-
-This example demonstrates how to deploy Huggingface models to a managed online endpoint and follows along with the [Serving Huggingface Transformers using TorchServe](https://github.com/pytorch/serve/tree/master/examples/Huggingface_Transformers) example from HuggingFace.
-
-In this example we deploy a BERT model for text generation.
-
-## How to deploy
-
-This example can be run end-to-end using the `deploy-customcontainer-torchserve-huggingface-textgen.sh` script in the `CLI` folder. Torchserve is not required to be installed.
-
-## Image
-
-The image used for this example is defined in file `ts-hf-tg.dockerfile`. It uses `pytorch/torchserve` as a base image and overrides the default `CMD` so that the `model-store` points to the location of the mounted model upon initialization by referencing the `AZUREML_MODEL_DIR` env var and that the `models` loaded are defined in the custom env var `TORCHSERVE_MODELS`.
-
-## Model
-
-To prepare the model, the [Huggingface_Transformers](https://github.com/pytorch/serve/tree/master/examples/Huggingface_Transformers) directory is cloned from the `pytorch/serve` Github repo. We use the same image built for deployment above to prepare the model per the instructions in the Huggingface example.
-
-## Environment
-The environment is defined inline in the deployment yaml and references the ACR url of the image. The ACR must be associated with the workspace (or have a user-assigned managed identity that enables ACRPull) in order to successfully deploy.
-
-We define an additional env var called `TORCHSERVE_MODELS` which is used by the image upon initialization.
-
-The environment also contains an `inference_config` block that defines the `liveness`, `readiness`, and `scoring` routes by path and port. Because the images used in this examples are based on the AzureML Inference Minimal images, these values are the same as those in a non-BYOC deployment, however they must be included since we are now using a custom image.
\ No newline at end of file
diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-deployment.yml b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-deployment.yml
deleted file mode 100644
index 227115b6b36..00000000000
--- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-deployment.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
-name: textgeneration
-endpoint_name: {{ENDPOINT_NAME}}
-model:
- name: hf-tg
- path: serve/examples/Huggingface_Transformers/Textgeneration.mar
-environment_variables:
- TORCHSERVE_MODELS: "textgeneration=Textgeneration.mar"
-environment:
- name: hf-tg
- image: {{ACR_NAME}}.azurecr.io/{{IMAGE_TAG}}
- inference_config:
- liveness_route:
- port: 8080
- path: /ping
- readiness_route:
- port: 8080
- path: /ping
- scoring_route:
- port: 8080
- path: /predictions/textgeneration
-instance_type: Standard_DS3_v2
-instance_count: 1
-
diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-setup_config.json b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-setup_config.json
deleted file mode 100644
index 6b717b161bd..00000000000
--- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-setup_config.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
- "model_name":"gpt2",
- "mode":"text_generation",
- "do_lower_case":true,
- "num_labels":"0",
- "save_mode":"pretrained",
- "max_length":"20",
- "captum_explanation":false,
- "FasterTransformer":false,
- "BetterTransformer":false,
- "embedding_name": "gpt2",
- "model_parallel":false
- }
\ No newline at end of file
diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg.dockerfile b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg.dockerfile
deleted file mode 100644
index 0b8622f02e4..00000000000
--- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg.dockerfile
+++ /dev/null
@@ -1,5 +0,0 @@
-FROM pytorch/torchserve:latest-cpu
-
-RUN pip install transformers==4.6.0
-
-CMD ["torchserve","--start", "--disable-token-auth", "--model-store","$AZUREML_MODEL_DIR","--models","$TORCHSERVE_MODELS","--ncs"]
\ No newline at end of file