Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DSV3/R1 fixes #1173

Merged
merged 13 commits into from
Mar 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.PHONY: fmt

fmt:
cargo fmt
ruff format
find mistralrs-* -type f \( -name "*.metal" -o -name "*.c" -o -name "*.cu" -o -name "*.hpp" -o -name "*.h" -o -name "*.cpp" \) -exec clang-format -i {} +
4 changes: 2 additions & 2 deletions examples/server/stream_completion_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def run():
request(stream=False)
finished = datetime.now()

print(f"Duration: {finished-now}")
print(f"Duration: {finished - now}")

print("\nStreaming: ")
print("=" * 15)
Expand All @@ -42,7 +42,7 @@ def run():
pass
finished = datetime.now()

print(f"Duration: {finished-now}")
print(f"Duration: {finished - now}")


if __name__ == "__main__":
Expand Down
8 changes: 6 additions & 2 deletions mistralrs-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,11 @@ fn main() -> anyhow::Result<()> {
#[cfg(feature = "metal")]
let device = Device::new_metal(0)?;
#[cfg(not(feature = "metal"))]
let device = Device::cuda_if_available(0)?;
let device = if cfg!(feature = "nccl") {
Device::Cpu
} else {
Device::cuda_if_available(0)?
};

if let Some(seed) = args.seed {
device.set_seed(seed)?;
Expand Down Expand Up @@ -429,7 +433,7 @@ fn main() -> anyhow::Result<()> {
DeviceMapSetting::Auto(auto_device_map_params)
};

let no_paged_attn = if device.is_cuda() {
let no_paged_attn = if device.is_cuda() || cfg!(feature = "nccl") {
args.no_paged_attn
} else if device.is_metal() {
!args.paged_attn
Expand Down
6 changes: 4 additions & 2 deletions mistralrs-core/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ fn main() {
use std::{path::PathBuf, vec};
println!("cargo:rerun-if-changed=build.rs");
let build_dir = PathBuf::from(std::env::var("OUT_DIR").unwrap());
let lib_files = vec!["src/cuda/nonzero_bitwise.cu"];
let lib_files = vec!["src/cuda/nonzero_bitwise.cu", "src/cuda/sort.cu"];
for lib_file in lib_files.iter() {
println!("cargo:rerun-if-changed={lib_file}");
}
Expand All @@ -23,7 +23,9 @@ fn main() {
.arg("--expt-relaxed-constexpr")
.arg("--expt-extended-lambda")
.arg("--use_fast_math")
.arg("--verbose");
.arg("--verbose")
.arg("--compiler-options")
.arg("-fPIC");

// https://github.com/EricLBuehler/mistral.rs/issues/286
if let Some(cuda_nvcc_flags_env) = CUDA_NVCC_FLAGS {
Expand Down
24 changes: 10 additions & 14 deletions mistralrs-core/src/cublaslt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ static mut CUBLASLT: Option<CublasLtWrapper> = None;
pub static CUBLASLT_HANDLE: Lazy<Mutex<Option<&'static CublasLtWrapper>>> =
Lazy::new(|| Mutex::new(None));

pub fn setup_cublas_lt_wrapper() {
pub fn setup_cublas_lt_wrapper(device: Device) {
unsafe {
INIT.call_once(|| {
#[cfg(not(feature = "cuda"))]
Expand All @@ -34,21 +34,17 @@ pub fn setup_cublas_lt_wrapper() {
// Then check if we can create a device
// Then check that the device is CUDA
use candle_core::cuda_backend::cudarc::driver;
CUBLASLT = driver::result::init()
.ok()
.and_then(|_| Device::cuda_if_available(0).ok())
.and_then(|device| match device {
Device::Cuda(_) => Some(CublasLtWrapper {
cublaslt: CublasLt::new(&device).unwrap(),
}),
_ => None,
});
tracing::info!("Initialized cuBLASlt handle");
CUBLASLT = match device {
Device::Cuda(_) => Some(CublasLtWrapper {
cublaslt: CublasLt::new(&device).unwrap(),
}),
_ => None,
}
}
#[allow(static_mut_refs)]
let cublaslt: Option<&'static CublasLtWrapper> = CUBLASLT.as_ref();
*CUBLASLT_HANDLE.lock().unwrap() = cublaslt;
});
#[allow(static_mut_refs)]
let cublaslt: Option<&'static CublasLtWrapper> = CUBLASLT.as_ref();
*CUBLASLT_HANDLE.lock().unwrap() = cublaslt;
}
}

Expand Down
145 changes: 136 additions & 9 deletions mistralrs-core/src/cuda/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,29 @@
use std::ffi::c_void;

#[cfg(feature = "cuda")]
type FfiCudaStream = candle_core::cuda::cudarc::driver::sys::CUstream;
#[cfg(not(feature = "cuda"))]
type FfiCudaStream = *const std::ffi::c_void;

#[allow(dead_code)]
extern "C" {
pub(crate) fn count_nonzero_bf16(d_in: *const c_void, N: u32) -> u32;
pub(crate) fn count_nonzero_f16(d_in: *const c_void, N: u32) -> u32;
pub(crate) fn count_nonzero_f32(d_in: *const c_void, N: u32) -> u32;
pub(crate) fn count_nonzero_f64(d_in: *const c_void, N: u32) -> u32;
pub(crate) fn count_nonzero_u8(d_in: *const c_void, N: u32) -> u32;
pub(crate) fn count_nonzero_u32(d_in: *const c_void, N: u32) -> u32;
pub(crate) fn count_nonzero_i16(d_in: *const c_void, N: u32) -> u32;
pub(crate) fn count_nonzero_i64(d_in: *const c_void, N: u32) -> u32;
pub(crate) fn count_nonzero_i32(d_in: *const c_void, N: u32) -> u32;
pub(crate) fn count_nonzero_bf16(d_in: *const c_void, N: u32, stream: FfiCudaStream) -> u32;
pub(crate) fn count_nonzero_f16(d_in: *const c_void, N: u32, stream: FfiCudaStream) -> u32;
pub(crate) fn count_nonzero_f32(d_in: *const c_void, N: u32, stream: FfiCudaStream) -> u32;
pub(crate) fn count_nonzero_f64(d_in: *const c_void, N: u32, stream: FfiCudaStream) -> u32;
pub(crate) fn count_nonzero_u8(d_in: *const c_void, N: u32, stream: FfiCudaStream) -> u32;
pub(crate) fn count_nonzero_u32(d_in: *const c_void, N: u32, stream: FfiCudaStream) -> u32;
pub(crate) fn count_nonzero_i16(d_in: *const c_void, N: u32, stream: FfiCudaStream) -> u32;
pub(crate) fn count_nonzero_i64(d_in: *const c_void, N: u32, stream: FfiCudaStream) -> u32;
pub(crate) fn count_nonzero_i32(d_in: *const c_void, N: u32, stream: FfiCudaStream) -> u32;
pub(crate) fn nonzero_bf16(
d_in: *const c_void,
N: u32,
num_nonzero: u32,
dims: *const c_void,
num_dims: u32,
d_out: *mut c_void,
stream: FfiCudaStream,
);
pub(crate) fn nonzero_f16(
d_in: *const c_void,
Expand All @@ -26,6 +32,7 @@ extern "C" {
dims: *const c_void,
num_dims: u32,
d_out: *mut c_void,
stream: FfiCudaStream,
);
pub(crate) fn nonzero_f32(
d_in: *const c_void,
Expand All @@ -34,6 +41,7 @@ extern "C" {
dims: *const c_void,
num_dims: u32,
d_out: *mut c_void,
stream: FfiCudaStream,
);
pub(crate) fn nonzero_f64(
d_in: *const c_void,
Expand All @@ -42,6 +50,7 @@ extern "C" {
dims: *const c_void,
num_dims: u32,
d_out: *mut c_void,
stream: FfiCudaStream,
);
pub(crate) fn nonzero_u8(
d_in: *const c_void,
Expand All @@ -50,6 +59,7 @@ extern "C" {
dims: *const c_void,
num_dims: u32,
d_out: *mut c_void,
stream: FfiCudaStream,
);
pub(crate) fn nonzero_u32(
d_in: *const c_void,
Expand All @@ -58,6 +68,7 @@ extern "C" {
dims: *const c_void,
num_dims: u32,
d_out: *mut c_void,
stream: FfiCudaStream,
);
pub(crate) fn nonzero_i64(
d_in: *const c_void,
Expand All @@ -66,6 +77,7 @@ extern "C" {
dims: *const c_void,
num_dims: u32,
d_out: *mut c_void,
stream: FfiCudaStream,
);
pub(crate) fn nonzero_i16(
d_in: *const c_void,
Expand All @@ -74,6 +86,7 @@ extern "C" {
dims: *const c_void,
num_dims: u32,
d_out: *mut c_void,
stream: FfiCudaStream,
);
pub(crate) fn nonzero_i32(
d_in: *const c_void,
Expand All @@ -82,6 +95,7 @@ extern "C" {
dims: *const c_void,
num_dims: u32,
d_out: *mut c_void,
stream: FfiCudaStream,
);

pub(crate) fn bitwise_and_u8(
Expand Down Expand Up @@ -161,4 +175,117 @@ extern "C" {
pub(crate) fn leftshift_u32(d_in1: *const c_void, d_out: *mut c_void, N: u32, k: i32);
pub(crate) fn leftshift_i64(d_in1: *const c_void, d_out: *mut c_void, N: u32, k: i32);
pub(crate) fn leftshift_i32(d_in1: *const c_void, d_out: *mut c_void, N: u32, k: i32);

pub(crate) fn asort_asc_f32(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_asc_f16(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_asc_bf16(
x: *const c_void,
dst: *const c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_asc_f64(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_asc_u8(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_asc_u32(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_asc_i64(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_desc_f32(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_desc_f16(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_desc_bf16(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_desc_f64(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_desc_u8(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_desc_u32(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
pub(crate) fn asort_desc_i64(
x: *const c_void,
dst: *mut c_void,
nrows: i32,
ncols: i32,
inplace: bool,
stream: i64,
);
}
Loading
Loading