From 9209f1522d2407820d3ec6a7ddcc78b7ccb076c6 Mon Sep 17 00:00:00 2001
From: Andy Linfoot <78757007+andy-neuma@users.noreply.github.com>
Date: Thu, 22 Feb 2024 22:43:40 -0500
Subject: [PATCH] additional updates to "bump-to-v0.3.2" (#39)

SUMMARY
* update `TORCH_CUDA_ARCH_LIST` to match `magic_wand`
* update "test vllm" action to run tests serially
* add helper script to find *.py tests, run them serially, and output
JUnit formatted xml

TEST
working through changes manually on debug instance

---------

Co-authored-by: andy-neuma <andy@neuralmagic.com>
---
 .github/actions/nm-build-vllm/action.yml |  2 -
 .github/actions/nm-set-env/action.yml    | 13 +++--
 .github/actions/nm-test-vllm/action.yml  | 12 ++---
 .github/pull_request_template.md         |  6 +++
 .github/scripts/run-tests                | 66 ++++++++++++++++++++++++
 .github/workflows/build-test.yml         | 24 ++++++---
 .github/workflows/remote-push.yml        |  5 +-
 7 files changed, 106 insertions(+), 22 deletions(-)
 create mode 100644 .github/pull_request_template.md
 create mode 100755 .github/scripts/run-tests

diff --git a/.github/actions/nm-build-vllm/action.yml b/.github/actions/nm-build-vllm/action.yml
index 780c2f99de3c6..5218078ba1704 100644
--- a/.github/actions/nm-build-vllm/action.yml
+++ b/.github/actions/nm-build-vllm/action.yml
@@ -19,8 +19,6 @@ runs:
   steps:
   - id: build
     run: |
-      # TODO: this is a hack ... fix it later
-      # pyenv hardcoded ... python version hardcoded ...
       COMMIT=${{ github.sha }}
       VENV="${{ inputs.venv }}-${COMMIT:0:7}"
       source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
diff --git a/.github/actions/nm-set-env/action.yml b/.github/actions/nm-set-env/action.yml
index d5b108d97ba4a..863354f35dd0b 100644
--- a/.github/actions/nm-set-env/action.yml
+++ b/.github/actions/nm-set-env/action.yml
@@ -1,15 +1,20 @@
 name: set neuralmagic env
 description: 'sets environment variables for neuralmagic'
 inputs:
-  hf_home:
+  hf_token:
     description: 'Hugging Face home'
     required: true
+  Gi_per_thread:
+    description: 'requested GiB to reserve per thread'
+    required: true
 runs:
   using: composite
   steps:
   - run: |
-      echo "HF_HOME=${HF_HOME_TOKEN}" >> $GITHUB_ENV
-      echo "TORCH_CUDA_ARCH_LIST=8.0+PTX" >> $GITHUB_ENV
+      echo "HF_TOKEN=${HF_TOKEN_SECRET}" >> $GITHUB_ENV
+      NUM_THREADS=$(./.github/scripts/determine-threading -G ${{ inputs.Gi_per_thread }})
+      echo "MAX_JOBS=${NUM_THREADS}" >> $GITHUB_ENV
+      echo "VLLM_INSTALL_PUNICA_KERNELS=1" >> $GITHUB_ENV
       echo "PYENV_ROOT=/usr/local/apps/pyenv" >> $GITHUB_ENV
       echo "XDG_CONFIG_HOME=/usr/local/apps" >> $GITHUB_ENV
       WHOAMI=$(whoami)
@@ -17,5 +22,5 @@ runs:
       echo "LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64::/usr/local/cuda-12.1/lib64:" >> $GITHUB_ENV
       echo "PROJECT_ID=12" >> $GITHUB_ENV
     env:
-        HF_HOME_TOKEN: ${{ inputs.hf_home }}
+        HF_TOKEN_SECRET: ${{ inputs.hf_token }}
     shell: bash
diff --git a/.github/actions/nm-test-vllm/action.yml b/.github/actions/nm-test-vllm/action.yml
index 27dae15df0332..7d05450e4e1c2 100644
--- a/.github/actions/nm-test-vllm/action.yml
+++ b/.github/actions/nm-test-vllm/action.yml
@@ -4,8 +4,8 @@ inputs:
   test_directory:
     description: 'test directory, path is relative to neuralmagic-vllm'
     required: true
-  test_xml:
-    description: 'filename for xml test results'
+  test_results:
+    description: 'top-level directory for xml test results'
     required: true
   python:
     description: 'python version, e.g. 3.10.12'
@@ -22,15 +22,15 @@ runs:
   steps:
   - id: test
     run: |
-      SUCCESS=0
-      # TODO: this is a hack ... fix it later
-      # pyenv hardcoded ... python version hardcoded ...
       COMMIT=${{ github.sha }}
       VENV="${{ inputs.venv }}-${COMMIT:0:7}"
       source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
       pip3 install --index-url http://192.168.201.226:8080/ --trusted-host 192.168.201.226 magic-wand
       pip3 install -r requirements-dev.txt
-      pytest --junitxml=${{ inputs.test_xml }} ${{ inputs.test_directory }} || SUCCESS=$?
+      # run tests via runner script (serially)
+      SUCCESS=0
+      ./.github/scripts/run-tests -t ${{ inputs.test_directory }} -r ${{ inputs.test_results }} || SUCCESS=$?
+      echo "was this a SUCCESS? ${SUCCESS}"
       echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
       exit ${SUCCESS}
     shell: bash
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000000000..e871931956390
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,6 @@
+SUMMARY:
+"please provide a brief summary"
+
+TEST PLAN:
+"please outline how the changes were tested"
+
diff --git a/.github/scripts/run-tests b/.github/scripts/run-tests
new file mode 100755
index 0000000000000..2c5aeb1d9826e
--- /dev/null
+++ b/.github/scripts/run-tests
@@ -0,0 +1,66 @@
+#!/bin/bash -e
+
+# simple helper script to manage concurrency while running tests
+
+usage() {
+    echo "Usage: ${0} <options>"
+    echo
+    echo "  -t    - test directory, i.e. location of *.py test files. (default 'tests/')"
+    echo "  -r    - desired results base directory. xml results will mirror provided tests directory structure. (default 'test-results/')"
+    echo "  -h    - this list of options"
+    echo
+    echo "note: all paths are relative to 'neuralmagic-vllm' root"
+    echo
+    exit 1
+}
+
+TEST_DIR=tests
+RESULTS_DIR=test-results
+
+while getopts "ht:r:" OPT; do
+    case "${OPT}" in
+	h)
+	    usage
+	    ;;
+	t)
+	    TEST_DIR="${OPTARG}"
+	    ;;
+    r)
+        RESULTS_DIR="${OPTARG}"
+        ;;
+    esac
+done
+
+# check if variables are valid
+if [ -z "${RESULTS_DIR}" ]; then
+    echo "please set desired results base directory"
+    usage
+fi
+
+if [ -z "${TEST_DIR}" ]; then
+    echo "please set test directory"
+    usage
+fi
+
+if [ ! -d "${TEST_DIR}" ]; then
+    echo "specified test directory, '${TEST_DIR}' does not exist ..."
+    usage
+fi
+
+# run tests serially
+TESTS_DOT_PY=$(find ${TEST_DIR}  -not -name "__init__.py" -name "*.py")
+TESTS_TO_RUN=($TESTS_DOT_PY)
+SUCCESS=0
+for TEST in "${TESTS_TO_RUN[@]}"
+do
+    LOCAL_SUCCESS=0
+    RESULT_XML=$(echo ${TEST} | sed -e "s/${TEST_DIR}/${RESULTS_DIR}/" | sed -e "s/.py/.xml/")
+    pytest --junitxml=${RESULT_XML} ${TEST} || LOCAL_SUCCESS=$?
+    SUCCESS=$((SUCCESS + LOCAL_SUCCESS))
+done
+
+if [ "${SUCCESS}" -eq "0" ]; then
+    exit 0
+else
+    exit 1
+fi
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 26a9b5cb89bcd..7d571b50adf14 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -15,6 +15,10 @@ on:
         description: "git commit hash or branch name"
         type: string
         required: true
+      Gi_per_thread:
+        description: 'requested GiB to reserve per thread'
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -35,6 +39,10 @@ on:
         description: "git commit hash or branch name"
         type: string
         required: true
+      Gi_per_thread:
+        description: 'requested GiB to reserve per thread'
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -61,7 +69,8 @@ jobs:
               id: setenv
               uses: ./.github/actions/nm-set-env/
               with:
-                hf_home: ${{ secrets.NM_HF_HOME }}
+                hf_token: ${{ secrets.NM_HF_TOKEN }}
+                Gi_per_thread: ${{ inputs.Gi_per_thread }}
 
             - name: set python
               id: set_python
@@ -88,7 +97,7 @@ jobs:
               id: build
               uses: ./.github/actions/nm-build-vllm/
               with:
-                Gi_per_thread: 1
+                Gi_per_thread: ${{ inputs.Gi_per_thread }}
                 python: ${{ inputs.python }}
                 venv: TEST
 
@@ -97,7 +106,7 @@ jobs:
               uses: ./.github/actions/nm-test-vllm/
               with:
                 test_directory: tests
-                test_xml: test-results/all_tests.xml
+                test_results: test-results
                 python: ${{ inputs.python }}
                 venv: TEST
 
@@ -134,12 +143,13 @@ jobs:
                 TEST_STATUS: ${{ steps.test.outputs.status }}
               run: |
                   echo "checkout status: ${CHECKOUT}"
-                  if [[ "${CHECKOUT}" != *"success"* ]]; then exit 1; fi
-                  if [ ${LINT_STATUS} -ne 0 ]; then exit 1; fi
-                  if [ ${BUILD_STATUS} -ne 0 ]; then exit 1; fi
+                  echo "lint status: ${LINT_STATUS}"
                   echo "build status: ${BUILD_STATUS}"
-                  if [ ${TEST_STATUS} -ne 0 ]; then exit 1; fi
                   echo "test status: ${TEST_STATUS}"
+                  if [[ "${CHECKOUT}" != *"success"* ]]; then exit 1; fi
+                  if [ -z "${LINT_STATUS}" ] || [ "${LINT_STATUS}" -ne "0" ]; then exit 1; fi
+                  if [ -z "${BUILD_STATUS}" ] || [ "${BUILD_STATUS}" -ne "0" ]; then exit 1; fi
+                  if [ -z "${TEST_STATUS}" ] || [ "${TEST_STATUS}" -ne "0" ]; then exit 1; fi
 
             - name: complete testmo run
               uses: ./.github/actions/nm-testmo-run-complete/
diff --git a/.github/workflows/remote-push.yml b/.github/workflows/remote-push.yml
index c10b386ceb23e..800db24fde970 100644
--- a/.github/workflows/remote-push.yml
+++ b/.github/workflows/remote-push.yml
@@ -13,8 +13,6 @@ jobs:
 
     # TODO: expand python matrix later, once CI system has
     #       matured.
-    # TODO: adjust timeout after we get a bit more experience.
-    #       making it 60 is a bit permissive.
 
     # TODO: enable this later
     AWS-AVX2-32G-A10G-24G:
@@ -24,7 +22,8 @@ jobs:
         uses: ./.github/workflows/build-test.yml
         with:
             label: aws-avx2-32G-a10g-24G
-            timeout: 60
+            timeout: 180
             gitref: '${{ github.ref }}'
+            Gi_per_thread: 4
             python: ${{ matrix.python }}
         secrets: inherit