Skip to content

add nvptx_target_feature #138689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
// Filter out features that are not supported by the current LLVM version
("aarch64", "fpmr") => None, // only existed in 18
("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
// NVPTX targets added in LLVM 20
("nvptx64", "sm_100") if get_version().0 < 20 => None,
("nvptx64", "sm_100a") if get_version().0 < 20 => None,
("nvptx64", "sm_101") if get_version().0 < 20 => None,
("nvptx64", "sm_101a") if get_version().0 < 20 => None,
("nvptx64", "sm_120") if get_version().0 < 20 => None,
("nvptx64", "sm_120a") if get_version().0 < 20 => None,
("nvptx64", "ptx86") if get_version().0 < 20 => None,
("nvptx64", "ptx87") if get_version().0 < 20 => None,
// Filter out features that are not supported by the current LLVM version
("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq")
if get_version().0 < 20 =>
Expand Down Expand Up @@ -324,15 +333,12 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
///
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
pub(crate) fn target_config(sess: &Session) -> TargetConfig {
// Add base features for the target.
// We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
// The reason is that if LLVM considers a feature implied but we do not, we don't want that to
// show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of
// the target CPU, that is still expanded to target features (with all their implied features)
// by LLVM.
let target_machine = create_informational_target_machine(sess, true);

let (unstable_target_features, target_features) = cfg_target_feature(sess, |feature| {
// This closure determines whether the target CPU has the feature according to LLVM. We do
// *not* consider the `-Ctarget-feature`s here, as that will be handled later in
// `cfg_target_feature`.
if let Some(feat) = to_llvm_features(sess, feature) {
// All the LLVM features this expands to must be enabled.
for llvm_feature in feat {
Expand Down
15 changes: 13 additions & 2 deletions compiler/rustc_codegen_ssa/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,10 @@ fn parse_rust_feature_flag<'a>(
/// 2nd component of the return value, respectively).
///
/// `target_base_has_feature` should check whether the given feature (a Rust feature name!) is
/// enabled in the "base" target machine, i.e., without applying `-Ctarget-feature`.
/// enabled in the "base" target machine, i.e., without applying `-Ctarget-feature`. Note that LLVM
/// may consider features to be implied that we do not and vice-versa. We want `cfg` to be entirely
/// consistent with Rust feature implications, and thus only consult LLVM to expand the target CPU
/// to target features.
///
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled elsewhere.
pub fn cfg_target_feature(
Expand All @@ -238,7 +241,15 @@ pub fn cfg_target_feature(
.rust_target_features()
.iter()
.filter(|(feature, _, _)| target_base_has_feature(feature))
.map(|(feature, _, _)| Symbol::intern(feature))
.flat_map(|(base_feature, _, _)| {
// Expand the direct base feature into all transitively-implied features. Note that we
// cannot simply use the `implied` field of the tuple since that only contains
// directly-implied features.
//
// Iteration order is irrelevant because we're collecting into an `UnordSet`.
#[allow(rustc::potential_query_instability)]
sess.target.implied_target_features(base_feature).into_iter().map(|f| Symbol::intern(f))
})
.collect();

// Add enabled and remove disabled features.
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_feature/src/unstable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ declare_features! (
(unstable, m68k_target_feature, "1.85.0", Some(134328)),
(unstable, mips_target_feature, "1.27.0", Some(44839)),
(unstable, movrs_target_feature, "1.88.0", Some(137976)),
(unstable, nvptx_target_feature, "CURRENT_RUSTC_VERSION", Some(44839)),
(unstable, powerpc_target_feature, "1.27.0", Some(44839)),
(unstable, prfchw_target_feature, "1.78.0", Some(44839)),
(unstable, riscv_target_feature, "1.45.0", Some(44839)),
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1509,6 +1509,7 @@ symbols! {
not,
notable_trait,
note,
nvptx_target_feature,
object_safe_for_dispatch,
of,
off,
Expand Down
69 changes: 68 additions & 1 deletion compiler/rustc_target/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,71 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-end
];

const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
("sm_86", Unstable(sym::nvptx_target_feature), &["sm_80"]),
("sm_87", Unstable(sym::nvptx_target_feature), &["sm_86"]),
("sm_89", Unstable(sym::nvptx_target_feature), &["sm_87"]),
("sm_90", Unstable(sym::nvptx_target_feature), &["sm_89"]),
("sm_90a", Unstable(sym::nvptx_target_feature), &["sm_90"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("sm_100", Unstable(sym::nvptx_target_feature), &["sm_90"]),
("sm_100a", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101", Unstable(sym::nvptx_target_feature), &["sm_100"]),
("sm_101a", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120", Unstable(sym::nvptx_target_feature), &["sm_101"]),
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
// tidy-alphabetical-end
// tidy-alphabetical-start
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),
("ptx74", Unstable(sym::nvptx_target_feature), &["ptx73"]),
("ptx75", Unstable(sym::nvptx_target_feature), &["ptx74"]),
("ptx76", Unstable(sym::nvptx_target_feature), &["ptx75"]),
("ptx77", Unstable(sym::nvptx_target_feature), &["ptx76"]),
("ptx78", Unstable(sym::nvptx_target_feature), &["ptx77"]),
("ptx80", Unstable(sym::nvptx_target_feature), &["ptx78"]),
("ptx81", Unstable(sym::nvptx_target_feature), &["ptx80"]),
("ptx82", Unstable(sym::nvptx_target_feature), &["ptx81"]),
("ptx83", Unstable(sym::nvptx_target_feature), &["ptx82"]),
("ptx84", Unstable(sym::nvptx_target_feature), &["ptx83"]),
("ptx85", Unstable(sym::nvptx_target_feature), &["ptx84"]),
("ptx86", Unstable(sym::nvptx_target_feature), &["ptx85"]),
("ptx87", Unstable(sym::nvptx_target_feature), &["ptx86"]),
// tidy-alphabetical-end
];

static RISCV_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
// tidy-alphabetical-start
("a", Stable, &["zaamo", "zalrsc"]),
Expand Down Expand Up @@ -782,6 +847,7 @@ pub fn all_rust_features() -> impl Iterator<Item = (&'static str, Stability)> {
.chain(HEXAGON_FEATURES.iter())
.chain(POWERPC_FEATURES.iter())
.chain(MIPS_FEATURES.iter())
.chain(NVPTX_FEATURES.iter())
.chain(RISCV_FEATURES.iter())
.chain(WASM_FEATURES.iter())
.chain(BPF_FEATURES.iter())
Expand Down Expand Up @@ -847,6 +913,7 @@ impl Target {
"x86" | "x86_64" => X86_FEATURES,
"hexagon" => HEXAGON_FEATURES,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES,
"nvptx64" => NVPTX_FEATURES,
"powerpc" | "powerpc64" => POWERPC_FEATURES,
"riscv32" | "riscv64" => RISCV_FEATURES,
"wasm32" | "wasm64" => WASM_FEATURES,
Expand All @@ -873,7 +940,7 @@ impl Target {
"sparc" | "sparc64" => SPARC_FEATURES_FOR_CORRECT_VECTOR_ABI,
"hexagon" => HEXAGON_FEATURES_FOR_CORRECT_VECTOR_ABI,
"mips" | "mips32r6" | "mips64" | "mips64r6" => MIPS_FEATURES_FOR_CORRECT_VECTOR_ABI,
"bpf" | "m68k" => &[], // no vector ABI
"nvptx64" | "bpf" | "m68k" => &[], // no vector ABI
"csky" => CSKY_FEATURES_FOR_CORRECT_VECTOR_ABI,
// FIXME: for some tier3 targets, we are overly cautious and always give warnings
// when passing args in vector registers.
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@
#![feature(hexagon_target_feature)]
#![feature(loongarch_target_feature)]
#![feature(mips_target_feature)]
#![feature(nvptx_target_feature)]
#![feature(powerpc_target_feature)]
#![feature(riscv_target_feature)]
#![feature(rtm_target_feature)]
Expand Down
40 changes: 40 additions & 0 deletions src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,46 @@ platform.
[@RDambrosio016](https://github.com/RDambrosio016)
[@kjetilkjeka](https://github.com/kjetilkjeka)

## Requirements

This target is `no_std` and will typically be built with crate-type `cdylib` and `-C linker-flavor=llbc`, which generates PTX.
The necessary components for this workflow are:

- `rustup toolchain add nightly`
- `rustup component add llvm-tools --toolchain nightly`
- `rustup component add llvm-bitcode-linker --toolchain nightly`

There are two options for using the core library:

- `rustup component add rust-src --toolchain nightly` and build using `-Z build-std=core`.
- `rustup target add nvptx64-nvidia-cuda --toolchain nightly`

### Target and features

It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
Later PTX versions may allow more efficient code generation.

Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.

## Building Rust kernels

A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.

```console
$ RUSTFLAGS='-Ctarget-cpu=sm_89' cargo +nightly rustc --target=nvptx64-nvidia-cuda -Zbuild-std=core --crate-type=cdylib -- -Clinker-flavor=llbc -Zunstable-options
```

Intrinsics in `core::arch::nvptx` may use `#[cfg(target_feature = "...")]`, thus it's necessary to use `-Zbuild-std=core` with appropriate `RUSTFLAGS`. The following components are needed for this workflow:

```console
$ rustup component add rust-src --toolchain nightly
$ rustup component add llvm-tools --toolchain nightly
$ rustup component add llvm-bitcode-linker --toolchain nightly
```


<!-- FIXME: fill this out

## Requirements
Expand Down
56 changes: 56 additions & 0 deletions tests/ui/check-cfg/target_feature.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,35 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`power9-altivec`
`power9-vector`
`prfchw`
`ptx32`
`ptx40`
`ptx41`
`ptx42`
`ptx43`
`ptx50`
`ptx60`
`ptx61`
`ptx62`
`ptx63`
`ptx64`
`ptx65`
`ptx70`
`ptx71`
`ptx72`
`ptx73`
`ptx74`
`ptx75`
`ptx76`
`ptx77`
`ptx78`
`ptx80`
`ptx81`
`ptx82`
`ptx83`
`ptx84`
`ptx85`
`ptx86`
`ptx87`
`quadword-atomics`
`rand`
`ras`
Expand All @@ -222,6 +251,33 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE");
`simd128`
`sm3`
`sm4`
`sm_100`
`sm_100a`
`sm_101`
`sm_101a`
`sm_120`
`sm_120a`
`sm_20`
`sm_21`
`sm_30`
`sm_32`
`sm_35`
`sm_37`
`sm_50`
`sm_52`
`sm_53`
`sm_60`
`sm_61`
`sm_62`
`sm_70`
`sm_72`
`sm_75`
`sm_80`
`sm_86`
`sm_87`
`sm_89`
`sm_90`
`sm_90a`
`sme`
`sme-b16b16`
`sme-f16f16`
Expand Down
1 change: 1 addition & 0 deletions tests/ui/target-feature/gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// gate-test-arm_target_feature
// gate-test-hexagon_target_feature
// gate-test-mips_target_feature
// gate-test-nvptx_target_feature
// gate-test-wasm_target_feature
// gate-test-adx_target_feature
// gate-test-cmpxchg16b_target_feature
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/target-feature/gate.stderr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
error[E0658]: the target feature `x87` is currently unstable
--> $DIR/gate.rs:29:18
--> $DIR/gate.rs:30:18
|
LL | #[target_feature(enable = "x87")]
| ^^^^^^^^^^^^^^
Expand Down
28 changes: 28 additions & 0 deletions tests/ui/target-feature/implied-features-nvptx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//@ assembly-output: ptx-linker
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_80 -Z unstable-options -Clinker-flavor=llbc
//@ only-nvptx64
//@ build-pass
#![no_std]
#![allow(dead_code)]

#[panic_handler]
pub fn panic(_info: &core::panic::PanicInfo) -> ! {
loop {}
}

// -Ctarget-cpu=sm_80 directly enables sm_80 and ptx70
#[cfg(not(all(target_feature = "sm_80", target_feature = "ptx70")))]
compile_error!("direct target features not enabled");

// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
#[cfg(not(all(
target_feature = "sm_60",
target_feature = "sm_70",
target_feature = "ptx50",
target_feature = "ptx60",
)))]
compile_error!("implied target features not enabled");

// -Ctarget-cpu=sm_80 implies all earlier sm_* and ptx* features.
#[cfg(target_feature = "ptx71")]
compile_error!("sm_80 requires only ptx70, but ptx71 enabled");
Loading