diff --git a/cli/deploy-custom-container-torchserve-huggingface-textgen.sh b/cli/deploy-custom-container-torchserve-huggingface-textgen.sh
deleted file mode 100644
index bddc55f8b6f..00000000000
--- a/cli/deploy-custom-container-torchserve-huggingface-textgen.sh
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/bin/bash
-
-set -e 
-
-# <set_parameters> 
-ENDPOINT_NAME=hf-tg-`echo $RANDOM`
-IMAGE_TAG=azureml-examples/huggingface-textgen:1 
-
-BASE_PATH=endpoints/online/custom-container/torchserve/huggingface-textgen
-SERVE_PATH=$BASE_PATH/serve/examples/Huggingface_Transformers
-ROOT_PATH=$PWD
-ACR_NAME=$(az ml workspace show --query container_registry -o tsv | cut -d'/' -f9-)
-# </set_parameters> 
-
-# <define_helper_functions> 
-# Helper function to parameterize YAML
-change_vars() {
-  for FILE in "$@"; do 
-    TMP="${FILE}_"
-    cp $FILE $TMP 
-    readarray -t VARS < <(cat $TMP | grep -oP '{{.*?}}' | sed -e 's/[}{]//g'); 
-    for VAR in "${VARS[@]}"; do
-      sed -i "s#{{${VAR}}}#${!VAR}#g" $TMP
-    done
-  done
-}
-
-cleanup () {
-    sudo rm -rf $BASE_PATH/serve || true
-    rm $BASE_PATH/ts-hf-tg-deployment.yml_
-    az ml online-endpoint delete -y -n $ENDPOINT_NAME
-}
-# </define_helper_functions> 
-
-# <build_image>
-az acr login -n $ACR_NAME
-az acr build -t $IMAGE_TAG -f $BASE_PATH/ts-hf-tg.dockerfile -r $ACR_NAME $BASE_PATH
-# </build_image> 
-
-# <download_huggingface_assets> 
-cd $BASE_PATH
-rm -rf serve
-git init serve
-cd serve
-git remote add -f origin https://github.com/pytorch/serve
-git config core.sparseCheckout true
-echo "examples/Huggingface_Transformers" >> .git/info/sparse-checkout
-git pull origin master
-cd $ROOT_PATH
-# </download_huggingface_assets> 
-
-# <generate_model>
-if [[ ! -f $SERVE_PATH/setup_config.json_ ]]; then
-    cp $BASE_PATH/ts-hf-tg-setup_config.json $SERVE_PATH/setup_config.json_
-fi
-cp $BASE_PATH/ts-hf-tg-setup_config.json $SERVE_PATH/setup_config.json
-chmod -R o+w $SERVE_PATH
-cd $SERVE_PATH
-docker run --rm -v $PWD:/tmp/wd:z -w /tmp/wd -t "$ACR_NAME.azurecr.io/$IMAGE_TAG" "python Download_Transformer_models.py; \
-    sed -i 's#\"max_length\": 50#\"max_length\": 300#g' ./Transformer_model/config.json; \
-    torch-model-archiver --model-name Textgeneration --version 1.0 --serialized-file Transformer_model/pytorch_model.bin --handler ./Transformer_handler_generalized.py --extra-files Transformer_model/config.json,./setup_config.json; \
-    exit;"
-cp setup_config.json_ setup_config.json
-rm setup_config.json_
-cd $ROOT_PATH
-# </generate_model>
-
-# <test_locally> 
-docker run --name huggingface-textgen --rm -d -p 8080:8080 -v "$PWD/$SERVE_PATH":/tmp/wd -e AZUREML_MODEL_DIR=/tmp/wd -e TORCHSERVE_MODELS="textgeneration=Textgeneration.mar" -t "$ACR_NAME.azurecr.io/$IMAGE_TAG" 
-sleep 30
-curl -X POST http://127.0.0.1:8080/predictions/textgeneration -T "$SERVE_PATH/Text_gen_artifacts/sample_text.txt"
-docker stop huggingface-textgen
-# </test_locally> 
-
-# <create_endpoint> 
-az ml online-endpoint create -n $ENDPOINT_NAME
-# </create_endpoint> 
-
-# <check_endpoint_status> 
-endpoint_status=`az ml online-endpoint show --name $ENDPOINT_NAME --query "provisioning_state" -o tsv`
-echo $endpoint_status
-if [[ $endpoint_status == "Succeeded" ]]
-then
-  echo "Endpoint created successfully"
-else 
-  echo "Endpoint creation failed"
-  exit 1
-fi
-# </check_endpoint_status> 
-
-# <create_deployment> 
-change_vars $BASE_PATH/ts-hf-tg-deployment.yml
-az ml online-deployment create -f $BASE_PATH/ts-hf-tg-deployment.yml_ --all-traffic
-# </create_deployment> 
-
-# <check_deployment_status> 
-deploy_status=`az ml online-deployment show --endpoint-name $ENDPOINT_NAME --name textgeneration --query "provisioning_state" -o tsv`
-echo $deploy_status
-if [[ $deploy_status == "Succeeded" ]]
-then
-    echo "Deployment completed successfully"
-else
-    echo "Deployment failed"
-    exit 1
-fi
-# </check_deployment_status> 
-
-# <get_endpoint_details> 
-# Get key
-echo "Getting access key..."
-KEY=$(az ml online-endpoint get-credentials -n $ENDPOINT_NAME --query primaryKey -o tsv)
-
-# Get scoring url
-echo "Getting scoring url..."
-SCORING_URL=$(az ml online-endpoint show -n $ENDPOINT_NAME --query scoring_uri -o tsv)
-echo "Scoring url is $SCORING_URL"
-# </get_endpoint_details> 
-
-# <test_endpoint> 
-curl -X POST -H "Authorization: Bearer $KEY" -T "$SERVE_PATH/Text_gen_artifacts/sample_text.txt" $SCORING_URL
-# </test_endpoint> 
-
-cleanup
\ No newline at end of file
diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/README.md b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/README.md
deleted file mode 100644
index c8d8e36797b..00000000000
--- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Deploy Huggingface models using Torchserve
-
-This example demonstrates how to deploy Huggingface models to a managed online endpoint and follows along with the [Serving Huggingface Transformers using TorchServe](https://github.com/pytorch/serve/tree/master/examples/Huggingface_Transformers) example from HuggingFace. 
-
-In this example we deploy a BERT model for text generation. 
-
-## How to deploy
-
-This example can be run end-to-end using the `deploy-customcontainer-torchserve-huggingface-textgen.sh` script in the `CLI` folder. Torchserve is not required to be installed. 
-
-## Image
-
-The image used for this example is defined in file `ts-hf-tg.dockerfile`. It uses `pytorch/torchserve` as a base image and overrides the default `CMD` so that the `model-store` points to the location of the mounted model upon initialization by referencing the `AZUREML_MODEL_DIR` env var and that the `models` loaded are defined in the custom env var `TORCHSERVE_MODELS`. 
-
-## Model
-
-To prepare the model, the [Huggingface_Transformers](https://github.com/pytorch/serve/tree/master/examples/Huggingface_Transformers) directory is cloned from the `pytorch/serve` Github repo. We use the same image built for deployment above to prepare the model per the instructions in the Huggingface example. 
-
-## Environment
-The environment is defined inline in the deployment yaml and references the ACR url of the image. The ACR must be associated with the workspace (or have a user-assigned managed identity that enables ACRPull) in order to successfully deploy.
-
-We define an additional env var called `TORCHSERVE_MODELS` which is used by the image upon initialization. 
-
-The environment also contains an `inference_config` block that defines the `liveness`, `readiness`, and `scoring` routes by path and port. Because the images used in this examples are based on the AzureML Inference Minimal images, these values are the same as those in a non-BYOC deployment, however they must be included since we are now using a custom image. 
\ No newline at end of file
diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-deployment.yml b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-deployment.yml
deleted file mode 100644
index 227115b6b36..00000000000
--- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-deployment.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
-name: textgeneration
-endpoint_name: {{ENDPOINT_NAME}}
-model:
-  name: hf-tg
-  path: serve/examples/Huggingface_Transformers/Textgeneration.mar
-environment_variables:
-  TORCHSERVE_MODELS: "textgeneration=Textgeneration.mar"
-environment:
-  name: hf-tg
-  image: {{ACR_NAME}}.azurecr.io/{{IMAGE_TAG}}
-  inference_config:
-    liveness_route:
-      port: 8080
-      path: /ping
-    readiness_route:
-      port: 8080
-      path: /ping
-    scoring_route:
-      port: 8080
-      path: /predictions/textgeneration
-instance_type: Standard_DS3_v2
-instance_count: 1
-    
diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-setup_config.json b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-setup_config.json
deleted file mode 100644
index 6b717b161bd..00000000000
--- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg-setup_config.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-    "model_name":"gpt2",
-    "mode":"text_generation",
-    "do_lower_case":true,
-    "num_labels":"0",
-    "save_mode":"pretrained",
-    "max_length":"20",
-    "captum_explanation":false,
-    "FasterTransformer":false,
-    "BetterTransformer":false,
-    "embedding_name": "gpt2",
-    "model_parallel":false
-   }
\ No newline at end of file
diff --git a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg.dockerfile b/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg.dockerfile
deleted file mode 100644
index 0b8622f02e4..00000000000
--- a/cli/endpoints/online/custom-container/torchserve/huggingface-textgen/ts-hf-tg.dockerfile
+++ /dev/null
@@ -1,5 +0,0 @@
-FROM pytorch/torchserve:latest-cpu
-
-RUN pip install transformers==4.6.0
-
-CMD ["torchserve","--start", "--disable-token-auth", "--model-store","$AZUREML_MODEL_DIR","--models","$TORCHSERVE_MODELS","--ncs"]
\ No newline at end of file