diff --git a/newdocs/src/changelog.md b/newdocs/src/changelog.md index 4d74f90c..c325c251 100644 --- a/newdocs/src/changelog.md +++ b/newdocs/src/changelog.md @@ -8,6 +8,8 @@ - [Issue 1289](https://github.com/jackdewinter/pymarkdown/issues/1289) - added documentation under Pre-Commit for how to write the configuration file for Pre-Commit if an alternate extension is needed for the files being scanned +- [Issue 1318](https://github.com/jackdewinter/pymarkdown/issues/1318) + - ported perf_*.cmd scripts to perf_.sh scripts ### Fixed diff --git a/perf_sample.sh b/perf_sample.sh new file mode 100644 index 00000000..9092da62 --- /dev/null +++ b/perf_sample.sh @@ -0,0 +1,268 @@ +#!/usr/bin/env bash + +# Set the script mode to "strict". +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ without the fail fast. +set -uo pipefail + +# Set up any project based local script variables. +SCRIPT_NAME=$( basename -- "${BASH_SOURCE[0]}" ) +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +TEMP_FILE=$(mktemp /tmp/$SCRIPT_NAME.XXXXXXXXX) +TEMP_PERF_OUTPUT=$(mktemp /tmp/$SCRIPT_NAME.XXXXXXXXX) + +SCRIPT_TITLE="Profiling application constructed sample file" + +TEST_FILE_DIRECTORY="${SCRIPT_DIR}/build/ptest" + +# Perform any cleanup required by the script. +cleanup_function(){ + + # If the temp file was used, get rid of it. + if [ -f "$TEMP_FILE" ]; then + rm "$TEMP_FILE" + fi + if [ -f "$TEMP_PERF_OUTPUT" ]; then + rm "$TEMP_PERF_OUTPUT" + fi + + # Restore the current directory. + if [ "$DID_PUSHD" -eq 1 ]; then + popd > /dev/null 2>&1 || exit + fi +} + +# Start the main part of the script off with a title. +start_process() { + if [ "$VERBOSE_MODE" -ne 0 ]; then + echo "Saving current directory prior to execution." + fi + if ! pushd . >"$TEMP_FILE" 2>&1; then + cat "$TEMP_FILE" + complete_process 1 "Script cannot save the current directory before proceeding." + fi + DID_PUSHD=1 + + trap cleanup_function EXIT + + if [ "$VERBOSE_MODE" -ne 0 ]; then + echo "$SCRIPT_TITLE..." + fi +} + +# Simple function to stop the process with information about why it stopped. +complete_process() { + local SCRIPT_RETURN_CODE=$1 + local COMPLETE_REASON=${2:-} + + if [ -n "$COMPLETE_REASON" ] ; then + echo "$COMPLETE_REASON" + fi + + if [ "$SCRIPT_RETURN_CODE" -ne 0 ]; then + echo "$SCRIPT_TITLE failed." + else + if [ "$VERBOSE_MODE" -ne 0 ]; then + echo "$SCRIPT_TITLE succeeded." + fi + fi + + exit "$SCRIPT_RETURN_CODE" +} + +# Give the user hints on how the script can be used. +show_usage() { + local SCRIPT_NAME=$0 + + echo "Usage:" + echo " $(basename "$SCRIPT_NAME") [flags]" + echo "" + echo "Summary:" + echo " Executes a scan of a constructed document, capturing timing measurements." + echo "" + echo "Flags:" + echo " -c,--csv-file {file} Append results to file in CSV format." + echo " -r,--repeats {num} Number of repititions of the test document to merge together. Default 10." + echo " -nr,--no-rules Take measurements without processing any rules {Parser only.}" + echo " -nc,--no-clear-cache Do not clear the Python cache. Only recommended for repeated calls of this script." + echo " -v,--view View the measured performance metrics." + echo " -x,--debug Display debug information about the script as it executes." + echo " -q,--quiet Do not display detailed information during execution." + echo " -h,--help Display this help text." + echo "" + exit 1 +} + +# Parse the command line. +parse_command_line() { + + NO_RULES_MODE=0 + NO_CLEAR_MODE=0 + DEBUG_MODE=0 + VIEW_MODE=0 + VERBOSE_MODE=1 + NUM_REPEATS=10 + CSV_OUTPUT= + PARAMS=() + while (( "$#" )); do + case "$1" in + -c|--csv-file) + if [ -z "${2:-}" ] ; then + echo "Error: Argument $1 must be followed by the file to write to." >&2 + show_usage + fi + CSV_OUTPUT=$2 + shift + shift + ;; + -r|--repeats) + if [ -z "${2:-}" ] ; then + echo "Error: Argument $1 must be followed by the number of repititions to use." >&2 + show_usage + fi + NUM_REPEATS=$2 + if ! [[ $NUM_REPEATS =~ ^[1-9][0-9]*$ ]]; then + echo "${NUM} is not an integer" + echo "Error: Argument $1 is not followed by a valid number: ${NUM_REPEATS}" >&2 + show_usage + fi + shift + shift + ;; + -v|--view) + VIEW_MODE=1 + shift + ;; + -nr|--no-rules) + NO_RULES_MODE=1 + shift + ;; + -nc|--no-clear-cache) + NO_CLEAR_MODE=1 + shift + ;; + -x|--debug) + DEBUG_MODE=1 + shift + ;; + -q|--quiet) + VERBOSE_MODE=0 + shift + ;; + -h|--help) + show_usage + ;; + -*) # unsupported flags + echo "Error: Unsupported flag $1" >&2 + show_usage + ;; + *) # preserve positional arguments + PARAMS+=("$1") + shift + ;; + esac + done + + if [[ $DEBUG_MODE -ne 0 ]] ; then + set -x + fi +} + +# Get the executable path for the current bash shell. +BASH_EXEC=${BASH} +if [[ "${MSYSTEM:-}" =~ ^MINGW(64|32)$ ]] ; then + WINPID=$(ps -p $$ | awk 'NR ==2{print $4}') + if [[ -z "$WINPID" ]] ; then + echo "Cannot get Windows PID for Bash shell." + exit 1 + fi + BASH_EXEC="$(wmic process where "ProcessID=$WINPID" get ExecutablePath | sed -n 2p | sed 's/\\/\\\\/g')" +fi + +# Parse any command line values. +parse_command_line "$@" + +# Clean entrance into the script. +start_process + +SINGLE_TEST_SOURCE_FILE=test/resources/performance/sample.md +SINGLE_TEST_DESTINATION_FILE="${TEST_FILE_DIRECTORY}"/test.md + +# Make sure we have a directory to create the test files for profiling in, and ensure +# that it is empty before we start processing. +if ! mkdir -p "${TEST_FILE_DIRECTORY}" > "${TEMP_FILE}" 2>&1 ; then + cat "${TEMP_FILE}" + complete_process 1 "{Creating test report directory failed.}" +fi +rm -r "${TEST_FILE_DIRECTORY}"/* > /dev/null 2>&1 + +# Create a composite document with NUM_REPEATS copies of the source document. +echo "Creating single document with ${NUM_REPEATS} copies of '${SINGLE_TEST_SOURCE_FILE}'." +for i in $(seq ${NUM_REPEATS}); do + cat "${SINGLE_TEST_SOURCE_FILE}" >> "${SINGLE_TEST_DESTINATION_FILE}" +done + +# Remove any __pycache__ related files unless asked not to. +if [[ $NO_CLEAR_MODE -eq 0 ]] ; then + echo "Resetting Python caches..." + PYTHONPYCACHEPREFIX="${SCRIPT_DIR}"/.pycache + rm -r "${PYTHONPYCACHEPREFIX}" > /dev/null 2>&1 + if ! python3 -Bc "import pathlib; [p.unlink() for p in pathlib.Path('.').rglob('*.py[co]')]" > "${TEMP_FILE}" 2>&1 ; then + cat "${TEMP_FILE}" + complete_process 1 "{Creating test report directory failed.}" + fi + if ! python3 -Bc "import pathlib; [p.rmdir() for p in pathlib.Path('.').rglob('__pycache__')]" > "${TEMP_FILE}" 2>&1 ; then + cat "${TEMP_FILE}" + complete_process 1 "{Creating test report directory failed.}" + fi + + # ... and then take the steps to properly create any needed caching by running through a single pass once. + if ! python -m compileall pymarkdown > "${TEMP_FILE}" 2>&1 ; then + cat "${TEMP_FILE}" + complete_process 1 "{Pre-compilation of project failed.}" + fi + if ! python -OO -c "import subprocess; subprocess.run(['${BASH_EXEC}','run.sh','scan','$SINGLE_TEST_SOURCE_FILE'])" > "${TEMP_FILE}" 2>&1 ; then + cat "${TEMP_FILE}" + complete_process 1 "{Pre-measurement pass of project failed.}" + fi +else + echo "Resetting Python caches skipped by request." +fi + +NO_RULES_ARGS= +if [[ $NO_RULES_MODE -ne 0 ]] ; then + NO_RULES_ARGS="'--disable-rules','*'," +fi + +echo "Scanning created document..." +python -OO -c "import subprocess,os,time; \ + my_env = os.environ.copy();\ + my_env['PYMARKDOWNLINT__PERFRUN'] = '1';\ + start_time = time.time();\ + subprocess.run(['${BASH_EXEC}','run.sh',${NO_RULES_ARGS}'scan','$SINGLE_TEST_DESTINATION_FILE'], env=my_env);\ + value = time.time() - start_time;\ + print(f'{value:.3f}');\ + " > "${TEMP_PERF_OUTPUT}" 2>&1 +echo "Document scanning completed." + +EXECUTION_TIME=$( tail -n 1 "${TEMP_PERF_OUTPUT}" ) +LINES_IN_PROF_OUTPUT=$(sed -n '$=' "${TEMP_PERF_OUTPUT}" ) + +if [[ -n "$CSV_OUTPUT" ]] ; then + echo "${NUM_REPEATS},${LINES_IN_PROF_OUTPUT},$EXECUTION_TIME" >> "$CSV_OUTPUT" +else + echo "" + echo "Repeats in File: ${NUM_REPEATS}" + echo "Lines in output: $LINES_IN_PROF_OUTPUT" + echo "Execution time: $EXECUTION_TIME" + echo "" +fi + + +# If in view mode, use SnakeViz to visualize. +if [[ $VIEW_MODE -ne 0 ]] ; then + echo "" + echo "Starting SnakeViz to view performance profile..." + pipenv run snakeviz p0.prof +fi + +complete_process 0 diff --git a/perf_series.sh b/perf_series.sh new file mode 100644 index 00000000..dc9dd241 --- /dev/null +++ b/perf_series.sh @@ -0,0 +1,283 @@ +#!/usr/bin/env bash + +# Set the script mode to "strict". +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ without the fail fast. +set -uo pipefail + +# Set up any project based local script variables. +SCRIPT_NAME=$( basename -- "${BASH_SOURCE[0]}" ) +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +TEMP_FILE=$(mktemp /tmp/$SCRIPT_NAME.XXXXXXXXX) + +SCRIPT_TITLE="Batch profiling of application" + +TEST_FILE_DIRECTORY="${SCRIPT_DIR}/build/ptest" + +# Perform any cleanup required by the script. +cleanup_function(){ + + # If the temp file was used, get rid of it. + if [ -f "$TEMP_FILE" ]; then + rm "$TEMP_FILE" + fi + + # Restore the current directory. + if [ "$DID_PUSHD" -eq 1 ]; then + popd > /dev/null 2>&1 || exit + fi +} + +# Start the main part of the script off with a title. +start_process() { + if [ "$VERBOSE_MODE" -ne 0 ]; then + echo "Saving current directory prior to execution." + fi + if ! pushd . >"$TEMP_FILE" 2>&1; then + cat "$TEMP_FILE" + complete_process 1 "Script cannot save the current directory before proceeding." + fi + DID_PUSHD=1 + + trap cleanup_function EXIT + + if [ "$VERBOSE_MODE" -ne 0 ]; then + echo "$SCRIPT_TITLE..." + fi +} + +# Simple function to stop the process with information about why it stopped. +complete_process() { + local SCRIPT_RETURN_CODE=$1 + local COMPLETE_REASON=${2:-} + + if [ -n "$COMPLETE_REASON" ] ; then + echo "$COMPLETE_REASON" + fi + + if [ "$SCRIPT_RETURN_CODE" -ne 0 ]; then + echo "$SCRIPT_TITLE failed." + else + if [ "$VERBOSE_MODE" -ne 0 ]; then + echo "$SCRIPT_TITLE succeeded." + fi + fi + + exit "$SCRIPT_RETURN_CODE" +} + +# Give the user hints on how the script can be used. +show_usage() { + local SCRIPT_NAME=$0 + + echo "Usage:" + echo " $(basename "$SCRIPT_NAME") [flags]" + echo "" + echo "Summary:" + echo " Executes a scan of a constructed document, capturing timing measurements." + echo "" + echo "Flags:" + echo " -s,--start Repeat count to start at." + echo " -e,--end Repeat count to end at." + echo " -l,--list List of comma separated repeat counts to use instead of -s and -e." + echo " -c,--count Count of times for each series of repeats." + echo " -o,--only-first Only clear the Python cache at the start of the series." + echo " -t,--tag Tag to associate with this series of tests." + echo " -nr,--no-rules Take measurements without processing any rules {Parser only.}" + echo " -x,--debug Display debug information about the script as it executes." + echo " -q,--quiet Do not display detailed information during execution." + echo " -h,--help Display this help text." + echo "" + echo "Example:" + echo " To run a series of tests, from 10 to 15 repeats:" + echo " $(basename "$SCRIPT_NAME") -s 10 -e 15" + echo " To run a series of tests, from 10 to 15 repeats, twice:" + echo " $(basename "$SCRIPT_NAME") -s 10 -e 15 --count 2" + echo " To run a series of tests, only 10 and 15 repeats, twice:" + echo " $(basename "$SCRIPT_NAME") -l 10_15 --count 2" + exit 1 +} + +# Parse the command line. +parse_command_line() { + + NO_RULES_MODE=0 + DEBUG_MODE=0 + ONLY_FIRST=0 + VERBOSE_MODE=1 + TEST_SERIES_TAG= + NUM_COUNT=1 + NUM_MINIMUM=1 + NUM_MAXIMUM=2 + ALTERNATE_REPEAT_LIST= + PARAMS=() + while (( "$#" )); do + case "$1" in + -s|--start) + if [ -z "${2:-}" ] ; then + echo "Error: Argument $1 must be followed by the number of repeats to start at." >&2 + show_usage + fi + NUM_MINIMUM=$2 + if ! [[ $NUM_MINIMUM =~ ^[1-9][0-9]*$ ]]; then + echo "${NUM_MINIMUM} is not an integer" + echo "Error: Argument $1 is not followed by a valid number: ${NUM_MINIMUM}" >&2 + show_usage + fi + shift + shift + ;; + -e|--end) + if [ -z "${2:-}" ] ; then + echo "Error: Argument $1 must be followed by the number of repeats to start at." >&2 + show_usage + fi + NUM_MAXIMUM=$2 + if ! [[ $NUM_MAXIMUM =~ ^[1-9][0-9]*$ ]]; then + echo "${NUM_MAXIMUM} is not an integer" + echo "Error: Argument $1 is not followed by a valid number: ${NUM_MAXIMUM}" >&2 + show_usage + fi + shift + shift + ;; + -c|--count) + if [ -z "${2:-}" ] ; then + echo "Error: Argument $1 must be followed by the number of series to execute." >&2 + show_usage + fi + NUM_COUNT=$2 + if ! [[ $NUM_COUNT =~ ^[1-9][0-9]*$ ]]; then + echo "${NUM_COUNT} is not an integer" + echo "Error: Argument $1 is not followed by a valid number: ${NUM_COUNT}" >&2 + show_usage + fi + shift + shift + ;; + -l|--list) + if [ -z "${2:-}" ] ; then + echo "Error: Argument $1 must be followed by a list of repeat counts." >&2 + show_usage + fi + ALTERNATE_REPEAT_LIST=$2 + shift + shift + ;; + -t|--tag) + if [ -z "${2:-}" ] ; then + echo "Error: Argument $1 must be followed by the tag to use." >&2 + show_usage + fi + TEST_SERIES_TAG=$2 + shift + shift + ;; + -o|--only-first) + ONLY_FIRST=1 + shift + ;; + -nr|--no-rules) + NO_RULES_MODE=1 + shift + ;; + -x|--debug) + DEBUG_MODE=1 + shift + ;; + -q|--quiet) + VERBOSE_MODE=0 + shift + ;; + -h|--help) + show_usage + ;; + -*) # unsupported flags + echo "Error: Unsupported flag $1" >&2 + show_usage + ;; + *) # preserve positional arguments + PARAMS+=("$1") + shift + ;; + esac + done + + if [[ $DEBUG_MODE -ne 0 ]] ; then + set -x + fi +} + +# Parse any command line values. +parse_command_line "$@" + +# Clean entrance into the script. +start_process + +# Determine whether the CSV file will be written to and make sure that the directory exists. +if [[ -n "$TEST_SERIES_TAG" ]] ; then + DEST_FILE="build/series-${TEST_SERIES_TAG}.csv" +else + DEST_FILE=build/series.csv +fi + +if ! mkdir -p $(dirname "${DEST_FILE}") > "${TEMP_FILE}" 2>&1 ; then + cat "${TEMP_FILE}" + complete_process 1 "{Creating test report directory failed.}" +fi +rm "${DEST_FILE}" > /dev/null 2>&1 + +# Set up so we can pass the '--no-rules' argument to the perf_sample.sh script. +PERF_SAMPLE_ARGS= +if [[ $NO_RULES_MODE -ne 0 ]] ; then + PERF_SAMPLE_ARGS="--no-rules " +fi + +# If asked to only clear the python cache for the first sample, do so before +# any samples are taken, and make sure any following calls do not clear the cache. +if [[ $ONLY_FIRST -ne 0 ]] ; then + if ! $SCRIPT_DIR/perf_sample.sh -nr --repeats 1 > ${TEMP_FILE} 2>&1 ; then + cat "${TEMP_FILE}" + complete_process 1 "Executing warmup run before series failed." + fi + PERF_SAMPLE_ARGS="$PERF_SAMPLE_ARGS --no-clear-cache" +fi + +# Repeat the samples NUM_COUNT times. +for i in $(seq ${NUM_COUNT}); do + echo "Times Through Series: $i" + + # If a comma separated repeat list, cycle through it... + if [[ -n $ALTERNATE_REPEAT_LIST ]] ; then + + IFS=',' read -r -a repeat_array <<< "$ALTERNATE_REPEAT_LIST" + for REPEAT_COUNT in "${repeat_array[@]}"; do + + echo " Repeat Count: $REPEAT_COUNT" + if ! $SCRIPT_DIR/perf_sample.sh ${PERF_SAMPLE_ARGS} --repeats $REPEAT_COUNT -c ${DEST_FILE} > ${TEMP_FILE} 2>&1 ; then + cat "${TEMP_FILE}" + complete_process 1 "Executing profile run for $REPEAT_COUNT repeats failed." + fi + + done + + # ... otherwise, do a simple loop. + else + REPEAT_COUNT=$NUM_MINIMUM + while [ $REPEAT_COUNT -le $NUM_MAXIMUM ] ; do + + echo " Repeat Count: $REPEAT_COUNT" + if ! $SCRIPT_DIR/perf_sample.sh ${PERF_SAMPLE_ARGS} --repeats $REPEAT_COUNT -c ${DEST_FILE} > ${TEMP_FILE} 2>&1 ; then + cat "${TEMP_FILE}" + complete_process 1 "Executing profile run for $REPEAT_COUNT repeats failed." + fi + + ((REPEAT_COUNT++)) + done + fi +done + + +echo "" +echo "CSV file '${DEST_FILE}' written with sample timings." + +complete_process 0