Skip to content

Fix aarch64 build issue(WIP) #3604

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 42 additions & 3 deletions .github/scripts/install-cuda-aarch64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,53 @@ install_cuda_aarch64() {
echo "install cuda ${CU_VERSION}"
# CU_VERSION: cu128 --> CU_VER: 12-8
CU_VER=${CU_VERSION:2:2}-${CU_VERSION:4:1}
# CU_VERSION: cu128 --> CU_DOT_VER: 12.8
CU_DOT_VER=${CU_VERSION:2:2}.${CU_VERSION:4:1}
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
dnf -y install cuda-compiler-${CU_VER}.aarch64 \

nvidia_drivers=$(dnf list installed | grep nvidia-driver)
echo "before install: dnf list installed | grep nvidia-driver: ${nvidia_drivers}"

dnf -y install nvidia-driver nvidia-driver-cuda \
cuda-compiler-${CU_VER}.aarch64 \
cuda-libraries-${CU_VER}.aarch64 \
cuda-libraries-devel-${CU_VER}.aarch64
cuda-libraries-devel-${CU_VER}.aarch64 \
libnccl-2.26.5-1+cuda${CU_DOT_VER} libnccl-devel-2.26.5-1+cuda${CU_DOT_VER} libnccl-static-2.26.5-1+cuda${CU_DOT_VER}
dnf clean all
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

nvidia_drivers=$(dnf list installed | grep nvidia-driver)
echo "after install: dnf list installed | grep nvidia-driver: ${nvidia_drivers}"
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib64:$LD_LIBRARY_PATH
ls -lart /usr/local/
nvcc --version
# Check if nvidia-smi is available and working
if command -v nvidia-smi &> /dev/null; then
nvidia-smi
else
echo “nvidia-smi not found - no NVIDIA GPU or drivers installed”
fi
# Check for NVIDIA device files in /dev
if compgen -G “/dev/nvidia[0-9]” >/dev/null; then
echo “NVIDIA GPU devices found:”
ls -la /dev/nvidia*
else
echo “No NVIDIA GPU devices found in /dev”
fi
# Check for NVIDIA GPU controllers in PCI devices
if lspci -v | grep -e ‘controller.*NVIDIA’ >/dev/null 2>/dev/null; then
echo “NVIDIA GPU found”
lspci | grep -i nvidia
else
echo “No NVIDIA GPU found”
fi

# Check if NVIDIA kernel module is loaded
if lsmod | grep -q nvidia; then
echo “NVIDIA kernel module is loaded”
lsmod | grep nvidia
else
echo “NVIDIA kernel module not loaded”
fi
echo "cuda ${CU_VER} installed successfully"
}

1 change: 1 addition & 0 deletions .github/scripts/install-torch-tensorrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ fi

# Install all the dependencies required for Torch-TensorRT
pip install --pre -r ${PWD}/tests/py/requirements.txt
pip uninstall -y torch torchvision
pip install --force-reinstall --pre ${TORCH} --index-url ${INDEX_URL}
pip install --force-reinstall --pre ${TORCHVISION} --index-url ${INDEX_URL}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-test-linux-aarch64-jetpack.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: Build and test Linux aarch64 wheels for Jetpack

on:
pull_request:
#pull_request:
push:
branches:
- main
Expand Down
Loading
Loading