From 91bd163179f4161a8a8f14282718cf243cdfe04b Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Wed, 9 Oct 2024 14:40:02 +0800 Subject: [PATCH] Optimize path and link validity check. (#462) * Optimize path and link validity check. Signed-off-by: ZePan110 * debug Signed-off-by: ZePan110 * debug2 Signed-off-by: ZePan110 * Fix issue Signed-off-by: ZePan110 * Remove debug output Signed-off-by: ZePan110 * test Signed-off-by: ZePan110 * Fix issue. Signed-off-by: ZePan110 * Fix issue Signed-off-by: ZePan110 * Restore test file Signed-off-by: ZePan110 --------- Signed-off-by: ZePan110 --- .github/workflows/pr-path-detection.yml | 87 +++++++++++++++---------- 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml index 408a84461..9d43d0e90 100644 --- a/.github/workflows/pr-path-detection.yml +++ b/.github/workflows/pr-path-detection.yml @@ -17,28 +17,38 @@ jobs: - name: Checkout Repo GenAIInfra uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Check the Validity of Hyperlinks run: | cd ${{github.workspace}} fail="FALSE" - url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'GenAIEval/blob/main') - if [ -n "$url_lines" ]; then - for url_line in $url_lines; do - url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') - path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) - response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response" -ne 200 ]; then - echo "**********Validation failed, try again**********" - response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successfully*****" - else - echo "Invalid link from ${{github.workspace}}/$path: $url" - fail="TRUE" - fi + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" + if [ -n "changed_files" ]; then + for changed_file in $changed_files; do + url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIEval/blob/main') + if [ -n "$url_lines" ]; then + for url_line in $url_lines; do + url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') + path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) + response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response" -ne 200 ]; then + echo "**********Validation failed, try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successful*****" + else + echo "Invalid link from ${{github.workspace}}/$path: $url" + fail="TRUE" + fi + fi + done fi done + else + echo "No changed .md file." fi if [[ "$fail" == "TRUE" ]]; then @@ -56,6 +66,8 @@ jobs: - name: Checkout Repo GenAIInfra uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Checking Relative Path Validity run: | @@ -69,33 +81,33 @@ jobs: branch="https://github.com/opea-project/GenAIInfra/blob/${{ github.event.pull_request.head.ref }}" fi link_head="https://github.com/opea-project/GenAIInfra/blob/main" + + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http') if [ -n "$png_lines" ]; then for png_line in $png_lines; do refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) if [[ "${png_path:0:1}" == "/" ]]; then - check_path=${{github.workspace}}$png_path - elif [[ "${png_path:0:1}" == "#" ]]; then - check_path=${{github.workspace}}/$refer_path$png_path + check_path=$png_path + elif [[ "$png_path" == *#* ]]; then + relative_path=$(echo "$png_path" | cut -d '#' -f1) + if [ -n "$relative_path" ]; then + check_path=$(dirname "$refer_path")/$relative_path + png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}') + else + check_path=$refer_path + fi else - check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path + check_path=$(dirname "$refer_path")/$png_path fi - real_path=$(realpath $check_path) - if [ $? -ne 0 ]; then - echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist" - fail="TRUE" - else - url=$link_head$(echo "$real_path" | sed 's|.*/GenAIInfra||') - response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response" -ne 200 ]; then - echo "**********Validation failed, try again**********" - response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successfully*****" - else - echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}" - url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIInfra||') + + if [ -e "$check_path" ]; then + real_path=$(realpath $check_path) + if [[ "$png_line" == *#* ]]; then + if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then + url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIComps||')$png_path response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") if [ "$response" -ne 200 ]; then echo "**********Validation failed, try again**********" @@ -103,14 +115,17 @@ jobs: if [ "$response_retry" -eq 200 ]; then echo "*****Retry successfully*****" else - echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path" + echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev" fail="TRUE" fi else - echo "Check branch ${{ github.event.pull_request.head.ref }} successfully." + echo "Validation succeed $png_line" fi fi fi + else + echo "$check_path does not exist" + fail="TRUE" fi done fi