Skip to content

[Clang][Driver] Add jobserver support for --offload-jobs #145131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1243,8 +1243,9 @@ def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
HelpText<"Compression level for offload device binaries (HIP only)">;

def offload_jobs_EQ : Joined<["--"], "offload-jobs=">,
HelpText<"Specify the number of threads to use for device offloading tasks"
" during compilation.">;
HelpText<"Specify the number of threads to use for device offloading tasks "
"during compilation. Can be a positive integer or the string "
"'jobserver' to use the make-style jobserver from the environment.">;

defm offload_via_llvm : BoolFOption<"offload-via-llvm",
LangOpts<"OffloadViaLLVM">, DefaultFalse,
Expand Down
22 changes: 14 additions & 8 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9293,14 +9293,20 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
addOffloadCompressArgs(Args, CmdArgs);

if (Arg *A = Args.getLastArg(options::OPT_offload_jobs_EQ)) {
int NumThreads;
if (StringRef(A->getValue()).getAsInteger(10, NumThreads) ||
NumThreads <= 0)
C.getDriver().Diag(diag::err_drv_invalid_int_value)
<< A->getAsString(Args) << A->getValue();
else
CmdArgs.push_back(
Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads)));
StringRef Val = A->getValue();

if (Val.equals_insensitive("jobserver"))
CmdArgs.push_back(Args.MakeArgString("--wrapper-jobs=jobserver"));
else {
int NumThreads;
if (Val.getAsInteger(10, NumThreads) || NumThreads <= 0) {
C.getDriver().Diag(diag::err_drv_invalid_int_value)
<< A->getAsString(Args) << Val;
} else {
CmdArgs.push_back(
Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads)));
}
}
}

const char *Exec =
Expand Down
6 changes: 6 additions & 0 deletions clang/test/Driver/hip-options.hip
Original file line number Diff line number Diff line change
Expand Up @@ -259,3 +259,9 @@
// RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=0x4 %s 2>&1 | \
// RUN: FileCheck -check-prefix=INVJOBS %s
// INVJOBS: clang: error: invalid integral value '0x4' in '--offload-jobs=0x4'

// RUN: %clang -### -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
// RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=jobserver %s 2>&1 | \
// RUN: FileCheck -check-prefix=JOBSV %s
// JOBSV: clang-linker-wrapper{{.*}} "--wrapper-jobs=jobserver"

2 changes: 2 additions & 0 deletions clang/test/Driver/linker-wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=4 \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=jobserver \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR

// CUDA-PAR: fatbinary{{.*}}-64 --create {{.*}}.fatbin

Expand Down
18 changes: 12 additions & 6 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1420,12 +1420,18 @@ int main(int Argc, char **Argv) {

parallel::strategy = hardware_concurrency(1);
if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) {
unsigned Threads = 0;
if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0)
reportError(createStringError("%s: expected a positive integer, got '%s'",
Arg->getSpelling().data(),
Arg->getValue()));
parallel::strategy = hardware_concurrency(Threads);
StringRef Val = Arg->getValue();
if (Val.equals_insensitive("jobserver"))
parallel::strategy = jobserver_concurrency();
else {
unsigned Threads = 0;
if (!llvm::to_integer(Val, Threads) || Threads == 0) {
reportError(createStringError(
"%s: expected a positive integer or 'jobserver', got '%s'",
Arg->getSpelling().data(), Val.data()));
} else
parallel::strategy = hardware_concurrency(Threads);
}
}

if (Args.hasArg(OPT_wrapper_time_trace_eq)) {
Expand Down
3 changes: 2 additions & 1 deletion clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def wrapper_time_trace_granularity : Joined<["--"], "wrapper-time-trace-granular

def wrapper_jobs : Joined<["--"], "wrapper-jobs=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<number>">,
HelpText<"Sets the number of parallel jobs to use for device linking">;
HelpText<"Sets the number of parallel jobs for device linking. Can be a "
"positive integer or 'jobserver'.">;

def override_image : Joined<["--"], "override-image=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">,
Expand Down
141 changes: 141 additions & 0 deletions llvm/include/llvm/Support/Jobserver.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
//===- llvm/Support/Jobserver.h - Jobserver Client --------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines a client for the GNU Make jobserver protocol. This allows
// LLVM tools to coordinate parallel execution with a parent `make` process.
//
// The jobserver protocol is a mechanism for GNU Make to share its pool of
// available "job slots" with the subprocesses it invokes. This is particularly
// useful for tools that can perform parallel operations themselves (e.g., a
// multi-threaded linker or compiler). By participating in this protocol, a
// tool can ensure the total number of concurrent jobs does not exceed the
// limit specified by the user (e.g., `make -j8`).
//
// How it works:
//
// 1. Establishment:
// A child process discovers the jobserver by inspecting the `MAKEFLAGS`
// environment variable. If a jobserver is active, this variable will
// contain a `--jobserver-auth=<value>` argument. The format of `<value>`
// determines how to communicate with the server.
//
// 2. The Implicit Slot:
// Every command invoked by `make` is granted one "implicit" job slot. This
// means a tool can always perform at least one unit of work without needing
// to communicate with the jobserver. This implicit slot should NEVER be
// released back to the jobserver.
//
// 3. Acquiring and Releasing Slots:
// On POSIX systems, the jobserver is implemented as a pipe. The
// `--jobserver-auth` value specifies either a path to a named pipe
// (`fifo:PATH`) or a pair of file descriptors (`R,W`). The pipe is
// pre-loaded with single-character tokens, one for each available job slot.
//
// - To acquire an additional slot, a client reads a single-character token
// from the pipe.
// - To release a slot, the client must write the *exact same* character
// token back to the pipe.
//
// It is critical that a client releases all acquired slots before it exits,
// even in cases of error, to avoid deadlocking the build.
//
// Example:
// A multi-threaded linker invoked by `make -j8` wants to use multiple
// threads. It first checks for the jobserver. It knows it has one implicit
// slot, so it can use one thread. It then tries to acquire 7 more slots by
// reading 7 tokens from the jobserver pipe. If it only receives 3 tokens,
// it knows it can use a total of 1 (implicit) + 3 (acquired) = 4 threads.
// Before exiting, it must write the 3 tokens it read back to the pipe.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_JOBSERVER_H
#define LLVM_SUPPORT_JOBSERVER_H

#include "llvm/ADT/StringRef.h"
#include <memory>
#include <string>

namespace llvm {

/// A JobSlot represents a single job slot that can be acquired from or released
/// to a jobserver pool. This class is move-only.
class JobSlot {
public:
/// Default constructor creates an invalid instance.
JobSlot() = default;

// Move operations are allowed.
JobSlot(JobSlot &&Other) noexcept : Value(Other.Value) { Other.Value = -1; }
JobSlot &operator=(JobSlot &&Other) noexcept {
if (this != &Other) {
this->Value = Other.Value;
Other.Value = -1;
}
return *this;
}

// Copy operations are disallowed.
JobSlot(const JobSlot &) = delete;
JobSlot &operator=(const JobSlot &) = delete;

/// Returns true if this instance is valid (either implicit or explicit).
bool isValid() const { return Value >= 0; }

/// Returns true if this instance represents the implicit job slot.
bool isImplicit() const { return Value == kImplicitValue; }

static JobSlot createExplicit(uint8_t V) {
return JobSlot(static_cast<int16_t>(V));
}

static JobSlot createImplicit() { return JobSlot(kImplicitValue); }

uint8_t getExplicitValue() const;
bool isExplicit() const { return isValid() && !isImplicit(); }

private:
friend class JobserverClient;
friend class JobserverClientImpl;

JobSlot(int16_t V) : Value(V) {}

static constexpr int16_t kImplicitValue = 256;
int16_t Value = -1;
};

/// The public interface for a jobserver client.
/// This client is a lazy-initialized singleton that is created on first use.
class JobserverClient {
public:
virtual ~JobserverClient();

/// Tries to acquire a job slot from the pool. On failure (e.g., if the pool
/// is empty), this returns an invalid JobSlot instance. The first successful
/// call will always return the implicit slot.
virtual JobSlot tryAcquire() = 0;

/// Releases a job slot back to the pool.
virtual void release(JobSlot Slot) = 0;

/// Returns the number of job slots available, as determined on first use.
/// This value is cached. Returns 0 if no jobserver is active.
virtual unsigned getNumJobs() const = 0;

/// Returns the singleton instance of the JobserverClient.
/// The instance is created on the first call to this function.
/// Returns a nullptr if no jobserver is configured or an error occurs.
static JobserverClient *getInstance();

/// Resets the singleton instance. For testing purposes only.
static void resetForTesting();
};

} // end namespace llvm

#endif // LLVM_SUPPORT_JOBSERVER_H
4 changes: 4 additions & 0 deletions llvm/include/llvm/Support/ThreadPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Jobserver.h"
#include "llvm/Support/RWMutex.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/thread.h"
Expand Down Expand Up @@ -184,6 +185,7 @@ class LLVM_ABI StdThreadPool : public ThreadPoolInterface {
void grow(int requested);

void processTasks(ThreadPoolTaskGroup *WaitingForGroup);
void processTasksWithJobserver();

/// Threads in flight
std::vector<llvm::thread> Threads;
Expand Down Expand Up @@ -212,6 +214,8 @@ class LLVM_ABI StdThreadPool : public ThreadPoolInterface {

/// Maximum number of threads to potentially grow this pool to.
const unsigned MaxThreadCount;

JobserverClient *TheJobserver = nullptr;
};
#endif // LLVM_ENABLE_THREADS

Expand Down
18 changes: 18 additions & 0 deletions llvm/include/llvm/Support/Threading.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
/// the thread shall remain on the actual CPU socket.
LLVM_ABI std::optional<unsigned>
compute_cpu_socket(unsigned ThreadPoolNum) const;

/// If true, the thread pool will attempt to coordinate with a GNU Make
/// jobserver, acquiring a job slot before processing a task. If no
/// jobserver is found in the environment, this is ignored.
bool UseJobserver = false;
};

/// Build a strategy from a number of threads as a string provided in \p Num.
Expand Down Expand Up @@ -210,6 +215,19 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
return S;
}

/// Returns a thread strategy that attempts to coordinate with a GNU Make
/// jobserver. The number of active threads will be limited by the number of
/// available job slots. If no jobserver is detected in the environment, this
/// strategy falls back to the default hardware_concurrency() behavior.
inline ThreadPoolStrategy jobserver_concurrency() {
ThreadPoolStrategy S;
S.UseJobserver = true;
// We can still request all threads be created, as they will simply
// block waiting for a job slot if the jobserver is the limiting factor.
S.ThreadsRequested = 0; // 0 means 'use all available'
return S;
}

/// Return the current thread id, as used in various OS system calls.
/// Note that not all platforms guarantee that the value returned will be
/// unique across the entire system, so portable code should not assume
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ add_llvm_component_library(LLVMSupport
InstructionCost.cpp
IntEqClasses.cpp
IntervalMap.cpp
Jobserver.cpp
JSON.cpp
KnownBits.cpp
KnownFPClass.cpp
Expand Down
Loading
Loading