chore: Bump to CUDA 12.8 and TRT 10.8 for Blackwell support (#3405)

pytorch · Feb 28, 2025 · ac1f324 · ac1f324
1 parent 4f0bb6f
commit ac1f324
Show file tree

Hide file tree

Showing 32 changed files with 97 additions and 116 deletions.
diff --git a/.github/scripts/generate-release-matrix.py b/.github/scripts/generate-release-matrix.py
@@ -5,16 +5,16 @@
 import sys
 
 RELEASE_CUDA_VERSION = {
-    "wheel": ["cu124"],
-    "tarball": ["cu124"],
+    "wheel": ["cu128"],
+    "tarball": ["cu128"],
 }
 RELEASE_PYTHON_VERSION = {
-    "wheel": ["3.8", "3.9", "3.10", "3.11", "3.12"],
-    "tarball": ["3.10"],
+    "wheel": ["3.9", "3.10", "3.11", "3.12"],
+    "tarball": ["3.11"],
 }
 
 CXX11_TARBALL_CONTAINER_IMAGE = {
-    "cu124": "pytorch/libtorch-cxx11-builder:cuda12.4-main",
+    "cu128": "pytorch/libtorch-cxx11-builder:cuda12.8-main",
 }
 
 

diff --git a/.github/scripts/generate-tensorrt-test-matrix.py b/.github/scripts/generate-tensorrt-test-matrix.py
@@ -11,31 +11,23 @@
 # channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch
 # channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....)
 CUDA_VERSIONS_DICT = {
-    "nightly": ["cu126"],
-    "test": ["cu124", "cu126"],
-    "release": ["cu124", "cu126"],
+    "nightly": ["cu128"],
+    "test": ["cu118", "cu126", "cu128"],
+    "release": ["cu118", "cu126", "cu128"],
 }
 
 # please update the python version you want to test with the future tensorRT version here
 # channel: nightly if the future tensorRT version test workflow is triggered from the main branch or your personal branch
 # channel: test if the future tensorRT version test workflow is triggered from the release branch(release/2.5 etc....)
 PYTHON_VERSIONS_DICT = {
-    "nightly": ["3.9"],
+    "nightly": ["3.11"],
     "test": ["3.9", "3.10", "3.11", "3.12"],
     "release": ["3.9", "3.10", "3.11", "3.12"],
 }
 
 # please update the future tensorRT version you want to test here
 TENSORRT_VERSIONS_DICT = {
     "windows": {
-        "10.4.0": {
-            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip",
-            "strip_prefix": "TensorRT-10.4.0.26",
-        },
-        "10.5.0": {
-            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip",
-            "strip_prefix": "TensorRT-10.5.0.18",
-        },
         "10.6.0": {
             "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip",
             "strip_prefix": "TensorRT-10.6.0.26",
@@ -44,16 +36,12 @@
             "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/zip/TensorRT-10.7.0.23.Windows.win10.cuda-12.6.zip",
             "strip_prefix": "TensorRT-10.7.0.23",
         },
+        "10.8.0": {
+            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/zip/TensorRT-10.8.0.43.Windows.win10.cuda-12.8.zip",
+            "strip_prefix": "TensorRT-10.8.0.43",
+        },
     },
     "linux": {
-        "10.4.0": {
-            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/tars/TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz",
-            "strip_prefix": "TensorRT-10.4.0.26",
-        },
-        "10.5.0": {
-            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz",
-            "strip_prefix": "TensorRT-10.5.0.18",
-        },
         "10.6.0": {
             "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz",
             "strip_prefix": "TensorRT-10.6.0.26",
@@ -62,6 +50,10 @@
             "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/tars/TensorRT-10.7.0.23.Linux.x86_64-gnu.cuda-12.6.tar.gz",
             "strip_prefix": "TensorRT-10.7.0.23",
         },
+        "10.8.0": {
+            "urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/tars/TensorRT-10.8.0.43.Linux.x86_64-gnu.cuda-12.8.tar.gz",
+            "strip_prefix": "TensorRT-10.8.0.43",
+        },
     },
 }
 
@@ -87,7 +79,7 @@ def check_file_availability(url: str) -> bool:
     # calculate the next minor version
     minor = int(list(TENSORRT_VERSIONS_DICT["linux"].keys())[-1].split(".")[1]) + 1
     trt_version = f"{major}.{minor}.0"
-    for patch in range(patch_from, 50):
+    for patch in range(patch_from, 80):
         for cuda_minor in range(4, 11):
             trt_linux_release_url_candidate = f"https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/{trt_version}/tars/TensorRT-{trt_version}.{patch}.Linux.x86_64-gnu.cuda-12.{cuda_minor}.tar.gz"
             if check_file_availability(trt_linux_release_url_candidate):

diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
@@ -24,23 +24,16 @@
     "release": ["3.9", "3.10", "3.11", "3.12"],
 }
 CUDA_ARCHES_DICT = {
-    "nightly": ["11.8", "12.4", "12.6"],
-    "test": ["11.8", "12.1", "12.4"],
-    "release": ["11.8", "12.1", "12.4"],
+    "nightly": ["11.8", "12.6", "12.8"],
+    "test": ["11.8", "12.6", "12.8"],
+    "release": ["11.8", "12.6", "12.8"],
 }
 ROCM_ARCHES_DICT = {
     "nightly": ["6.1", "6.2"],
     "test": ["6.1", "6.2"],
     "release": ["6.1", "6.2"],
 }
 
-CUDA_CUDDN_VERSIONS = {
-    "11.8": {"cuda": "11.8.0", "cudnn": "9"},
-    "12.1": {"cuda": "12.1.1", "cudnn": "9"},
-    "12.4": {"cuda": "12.4.1", "cudnn": "9"},
-    "12.6": {"cuda": "12.6.2", "cudnn": "9"},
-}
-
 PACKAGE_TYPES = ["wheel", "conda", "libtorch"]
 PRE_CXX11_ABI = "pre-cxx11"
 CXX11_ABI = "cxx11-abi"
@@ -151,6 +144,7 @@ def initialize_globals(channel: str, build_python_only: bool) -> None:
         "12.1": "pytorch/manylinux2_28-builder:cuda12.1",
         "12.4": "pytorch/manylinux2_28-builder:cuda12.4",
         "12.6": "pytorch/manylinux2_28-builder:cuda12.6",
+        "12.8": "pytorch/manylinux2_28-builder:cuda12.8",
         **{
             gpu_arch: f"pytorch/manylinux2_28-builder:rocm{gpu_arch}"
             for gpu_arch in ROCM_ARCHES
@@ -278,7 +272,7 @@ def get_wheel_install_command(
             return f"{WHL_INSTALL_BASE} {PACKAGES_TO_INSTALL_WHL} --index-url {get_base_download_url_for_repo('whl', channel, gpu_arch_type, desired_cuda)}_pypi_pkg"  # noqa: E501
         else:
             raise ValueError(
-                "Split build is not supported for this configuration. It is only supported for CUDA 11.8, 12.4, 12.6 on Linux nightly builds."  # noqa: E501
+                "Split build is not supported for this configuration. It is only supported for CUDA 11.8, 12.4, 12.6, 12.8 on Linux nightly builds."  # noqa: E501
             )
     if (
         channel == RELEASE

diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml
@@ -23,6 +23,7 @@ jobs:
       test-infra-ref: main
       with-rocm: false
       with-cpu: false
+      python-versions: '["3.11"]'
 
   filter-matrix:
     needs: [generate-matrix]
@@ -32,7 +33,7 @@ jobs:
     steps:
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.11'
       - uses: actions/checkout@v4
         with:
           repository: pytorch/tensorrt

diff --git a/.github/workflows/build-test-tensorrt-linux.yml b/.github/workflows/build-test-tensorrt-linux.yml
@@ -20,6 +20,7 @@ jobs:
       test-infra-ref: main
       with-rocm: false
       with-cpu: false
+      python-versions: '["3.11"]'
 
   generate-tensorrt-matrix:
     needs: [generate-matrix]
@@ -29,7 +30,7 @@ jobs:
     steps:
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.11'
       - uses: actions/checkout@v4
         with:
           repository: pytorch/tensorrt

diff --git a/.github/workflows/build-test-tensorrt-windows.yml b/.github/workflows/build-test-tensorrt-windows.yml
@@ -20,6 +20,7 @@ jobs:
       test-infra-ref: main
       with-rocm: false
       with-cpu: false
+      python-versions: '["3.11"]'
 
   generate-tensorrt-matrix:
     needs: [generate-matrix]
@@ -29,7 +30,7 @@ jobs:
     steps:
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.11'
       - uses: actions/checkout@v4
         with:
           repository: pytorch/tensorrt

diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml
@@ -23,6 +23,7 @@ jobs:
       test-infra-ref: main
       with-rocm: false
       with-cpu: false
+      python-versions: '["3.11"]'
 
   substitute-runner:
     needs: generate-matrix

diff --git a/.github/workflows/docgen.yml b/.github/workflows/docgen.yml
@@ -14,12 +14,12 @@ jobs:
         if: ${{ ! contains(github.actor, 'pytorchbot') }}
         environment: pytorchbot-env
         container:
-            image: docker.io/pytorch/manylinux2_28-builder:cuda12.6
+            image: docker.io/pytorch/manylinux2_28-builder:cuda12.8
             options: --gpus all
         env:
-            CUDA_HOME: /usr/local/cuda-12.6
-            VERSION_SUFFIX: cu126
-            CU_VERSION: cu126
+            CUDA_HOME: /usr/local/cuda-12.8
+            VERSION_SUFFIX: cu128
+            CU_VERSION: cu128
             CHANNEL: nightly
             CI_BUILD: 1
         steps:

diff --git a/.github/workflows/generate_binary_build_matrix.yml b/.github/workflows/generate_binary_build_matrix.yml
@@ -72,7 +72,7 @@ jobs:
     steps:
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.11'
       - name: Checkout test-infra repository
         uses: actions/checkout@v4
         with:

diff --git a/.github/workflows/release-linux.yml b/.github/workflows/release-linux.yml
@@ -24,6 +24,7 @@ jobs:
       test-infra-ref: main
       with-rocm: false
       with-cpu: false
+      python-versions: '["3.11"]'
 
   generate-release-tarball-matrix:
     needs: [generate-matrix]
@@ -33,7 +34,7 @@ jobs:
     steps:
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.11'
       - uses: actions/checkout@v4
         with:
           repository: pytorch/tensorrt
@@ -83,7 +84,7 @@ jobs:
     steps:
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.11'
       - uses: actions/checkout@v4
         with:
           repository: pytorch/tensorrt

diff --git a/.github/workflows/release-wheel-linux.yml b/.github/workflows/release-wheel-linux.yml
@@ -241,7 +241,7 @@ jobs:
           name: ${{ env.ARTIFACT_NAME }}
           path: ${{ inputs.repository }}/release/wheel/
       - name: Upload pre-cxx11 tarball to GitHub
-        if: ${{ inputs.cxx11-tarball-release != 'true' && env.PYTHON_VERSION == '3.10' }}
+        if: ${{ inputs.cxx11-tarball-release != 'true' && env.PYTHON_VERSION == '3.11' }}
         continue-on-error: true
         uses: actions/upload-artifact@v4
         with:

diff --git a/.github/workflows/release-windows.yml b/.github/workflows/release-windows.yml
@@ -24,6 +24,7 @@ jobs:
       test-infra-ref: main
       with-rocm: false
       with-cpu: false
+      python-versions: '["3.11"]'
 
   generate-release-matrix:
     needs: [generate-matrix]
@@ -33,7 +34,7 @@ jobs:
     steps:
       - uses: actions/setup-python@v5
         with:
-          python-version: '3.10'
+          python-version: '3.11'
       - uses: actions/checkout@v4
         with:
           repository: pytorch/tensorrt

diff --git a/MODULE.bazel b/MODULE.bazel
@@ -36,13 +36,13 @@ new_local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.
 new_local_repository(
     name = "cuda",
     build_file = "@//third_party/cuda:BUILD",
-    path = "/usr/local/cuda-12.6/",
+    path = "/usr/local/cuda-12.8/",
 )
 
 new_local_repository(
     name = "cuda_win",
     build_file = "@//third_party/cuda:BUILD",
-    path = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.6/",
+    path = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.8/",
 )
 
 http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
@@ -55,21 +55,21 @@ http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu126/libtorch-cxx11-abi-shared-with-deps-latest.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu128/libtorch-cxx11-abi-shared-with-deps-latest.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu126/libtorch-shared-with-deps-latest.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu128/libtorch-shared-with-deps-latest.zip"],
 )
 
 http_archive(
     name = "libtorch_win",
     build_file = "@//third_party/libtorch:BUILD",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu126/libtorch-win-shared-with-deps-latest.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu128/libtorch-win-shared-with-deps-latest.zip"],
 )
 
 # Download these tarballs manually from the NVIDIA website
@@ -79,18 +79,18 @@ http_archive(
 http_archive(
     name = "tensorrt",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.7.0.23",
+    strip_prefix = "TensorRT-10.8.0.43",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/tars/TensorRT-10.7.0.23.Linux.x86_64-gnu.cuda-12.6.tar.gz",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/tars/TensorRT-10.8.0.43.Linux.x86_64-gnu.cuda-12.8.tar.gz",
     ],
 )
 
 http_archive(
     name = "tensorrt_win",
     build_file = "@//third_party/tensorrt/archive:BUILD",
-    strip_prefix = "TensorRT-10.7.0.23",
+    strip_prefix = "TensorRT-10.8.0.43",
     urls = [
-        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/zip/TensorRT-10.7.0.23.Windows.win10.cuda-12.6.zip",
+        "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.8.0/zip/TensorRT-10.8.0.43.Windows.win10.cuda-12.8.zip",
     ],
 )
 

diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ Torch-TensorRT
 [![Documentation](https://img.shields.io/badge/docs-master-brightgreen)](https://nvidia.github.io/Torch-TensorRT/)
 [![pytorch](https://img.shields.io/badge/PyTorch-2.4-green)](https://www.python.org/downloads/release/python-31013/)
 [![cuda](https://img.shields.io/badge/CUDA-12.4-green)](https://developer.nvidia.com/cuda-downloads)
-[![trt](https://img.shields.io/badge/TensorRT-10.7.0-green)](https://github.com/nvidia/tensorrt-llm)
+[![trt](https://img.shields.io/badge/TensorRT-10.8.0-green)](https://github.com/nvidia/tensorrt-llm)
 [![license](https://img.shields.io/badge/license-BSD--3--Clause-blue)](./LICENSE)
 [![linux_tests](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux.yml/badge.svg)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux.yml)
 [![windows_tests](https://github.com/pytorch/TensorRT/actions/workflows/build-test-windows.yml/badge.svg)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-windows.yml)
@@ -117,9 +117,9 @@ auto results = trt_mod.forward({input_tensor});
 These are the following dependencies used to verify the testcases. Torch-TensorRT can work with other versions, but the tests are not guaranteed to pass.
 
 - Bazel 6.3.2
-- Libtorch 2.5.0.dev (latest nightly) (built with CUDA 12.4)
-- CUDA 12.4
-- TensorRT 10.7.0.23
+- Libtorch 2.7.0.dev (latest nightly) (built with CUDA 12.8)
+- CUDA 12.8
+- TensorRT 10.8.0.43
 
 ## Deprecation Policy
 

diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml
@@ -1,2 +1,2 @@
-__cuda_version__: "12.6"
-__tensorrt_version__: "10.7.0.post1"
+__cuda_version__: "12.8"
+__tensorrt_version__: "10.8.0"
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,15 +1,16 @@
 # syntax=docker/dockerfile:1
 
 # Base image starts with CUDA
-ARG BASE_IMG=nvidia/cuda:12.4.1-devel-ubuntu22.04
+#TODO: cuda version
+ARG BASE_IMG=nvidia/cuda:12.8.0-devel-ubuntu22.04
 FROM ${BASE_IMG} as base
-ENV BASE_IMG=nvidia/cuda:12.4.1-devel-ubuntu22.04
+ENV BASE_IMG=nvidia/cuda:12.8.0-devel-ubuntu22.04
 
 ARG TENSORRT_VERSION
 ENV TENSORRT_VERSION=${TENSORRT_VERSION}
 RUN test -n "$TENSORRT_VERSION" || (echo "No tensorrt version specified, please use --build-arg TENSORRT_VERSION=x.y to specify a version." && exit 1)
 
-ARG PYTHON_VERSION=3.10
+ARG PYTHON_VERSION=3.11
 ENV PYTHON_VERSION=${PYTHON_VERSION}
 
 ARG USE_PRE_CXX11_ABI