Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix clean-eks script bug #1991

Merged
merged 1 commit into from
Mar 19, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 65 additions & 20 deletions ci/aws-clean-eks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,52 @@ function get_stacks() {
aws cloudformation list-stacks --stack-status-filter CREATE_COMPLETE DELETE_FAILED --query 'StackSummaries[*].StackName' --output text
}

function delete_security_group() {
local sgId="$1"
echo "info: deleting security group '$sgId'"
for eni in $(aws ec2 describe-network-interfaces --filters "Name=group-id,Values=$sgId" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do
echo "info: clear leaked network interfaces '$eni'"
aws ec2 delete-network-interface --network-interface-id "$eni"
done
aws ec2 delete-security-group --group-id "$sgId"
if [ $? -eq 0 ]; then
echo "info: succesfully deleted security group '$sgId'"
else
echo "error: failed to deleted security group '$sgId'"
fi
}

function delete_vpc() {
local vpcId="$1"
echo "info: deleting vpc '$vpcId'"
for sgId in $(aws ec2 describe-security-groups --filters "Name=vpc-id,Values=$vpcId" --query "SecurityGroups[?GroupName != 'default'].GroupId" --output text); do
delete_security_group "$sgId"
done
aws ec2 delete-vpc --vpc-id "$vpcId"
if [ $? -eq 0 ]; then
echo "info: succesfully deleted vpc '$vpcId'"
else
echo "error: failed to deleted vpc '$vpcId'"
fi
}

function fix_eks_mng_deletion_issues() {
local cluster="$1"
local mng="$2"
while IFS=$'\n' read -r line; do
read -r code resourceIds <<< $line
if [ "$code" == "Ec2SecurityGroupDeletionFailure" ]; then
echo "info: clear security group '$resourceIds'"
for eni in $(aws ec2 describe-network-interfaces --filters "Name=group-id,Values=$resourceIds" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do
echo "info: clear leaked network interfaces '$eni'"
aws ec2 delete-network-interface --network-interface-id "$eni"
IFS=',' read -ra sgIds <<< "$resourceIds"
for sgId in ${sgIds[@]}; do
delete_security_group "$sgId"
done
aws ec2 delete-security-group --group-id $resourceIds
fi
done <<< $(aws eks describe-nodegroup --cluster-name "$cluster" --nodegroup-name "$mng" --query 'nodegroup.health.issues' --output json | jq -r '.[].resourceIds |= join(",") | .[] | "\(.code)\t\(.resourceIds)"')
}

function clean_eks() {
local CLUSTER="$1"
echo "info: deleting mng stack"
echo "info: searching mng stack"
local regex='^'$CLUSTER'-mng-[0-9]+$'
local mngStack=
for stackName in $(get_stacks); do
Expand All @@ -53,24 +80,15 @@ function clean_eks() {
break
done
if [ -n "$mngStack" ]; then
echo "info: mng stack found '$mngStack', deleting it"
aws cloudformation delete-stack --stack-name $mngStack
aws cloudformation wait stack-delete-complete --stack-name $mngStack
if [ $? -ne 0 ]; then
echo "error: failed to delete mng stack '$mngStack', delete related resource first"
for mngName in $(aws eks list-nodegroups --cluster-name jenkins-tidb-operator-e2e2 --query 'nodegroups[*]' --output text); do
fix_eks_mng_deletion_issues "$CLUSTER" $mngName
done
aws cloudformation delete-stack --stack-name $mngStack
aws cloudformation wait stack-delete-complete --stack-name $mngStack
fi
echo "info: mng stack found '$mngStack'"
else
echo "info: mng stack not found, skipped"
echo "info: mng stack not found"
fi

echo "info: deleting cluster/cluster-role/mng-role/vpc stacks"
echo "info: deleting mng/cluster/cluster-role/mng-role/vpc stacks"
local stacks=(
$CLUSTER-cluster
$mngStack
$CLUSTER-cluster
$CLUSTER-role-cluster
$CLUSTER-role-mng
$CLUSTER-vpc
Expand All @@ -79,6 +97,33 @@ function clean_eks() {
echo "info: deleting stack $stack"
aws cloudformation delete-stack --stack-name $stack
aws cloudformation wait stack-delete-complete --stack-name $stack
if [ $? -ne 0 ]; then
echo "error: failed to delete stack '$stack'"
if [ "$stack" == "$mngStack" ]; then
echo "info: try to fix mng stack '$stack'"
for mngName in $(aws eks list-nodegroups --cluster-name "$CLUSTER" --query 'nodegroups[*]' --output text); do
fix_eks_mng_deletion_issues "$CLUSTER" $mngName
done
elif [ "$stack" == "$CLUSTER-vpc" ]; then
echo "info: try to fix vpc stack '$stack'"
while IFS=$'\n' read -r sgId; do
delete_security_group "$sgId"
done <<< $(aws cloudformation describe-stacks --stack-name "$stack" --query 'Stacks[*].Outputs[*]' --output json | jq -r '.[] | .[] | select(.OutputKey == "ControlPlaneSecurityGroupID") | .OutputValue')
while IFS=$'\n' read -r vpcId; do
delete_vpc "$vpcId"
done <<< $(aws cloudformation describe-stacks --stack-name "$stack" --query 'Stacks[*].Outputs[*]' --output json | jq -r '.[] | .[] | select(.OutputKey == "VPCID") | .OutputValue')
else
echo "fatal: unable to delete stack $stack"
exit 1
fi
echo "info: try to delete the stack '$stack' again"
aws cloudformation delete-stack --stack-name $stack
aws cloudformation wait stack-delete-complete --stack-name $stack
if [ $? -ne 0 ]; then
echo "fatal: unable to delete stack $stack"
exit 1
fi
fi
done
}

Expand Down