Skip to content

Commit dd5d65a

Browse files
committedOct 17, 2023
[HIP][Clang][CodeGen] Add CodeGen support for hipstdpar
This patch adds the CodeGen changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. This change relaxes restrictions on what gets emitted on the device path, when compiling in `hipstdpar` mode: 1. Unless a function is explicitly marked `__host__`, it will get emitted, whereas before only `__device__` and `__global__` functions would be emitted; 2. Unsupported builtins are ignored as opposed to being marked as an error, as the decision on their validity is deferred to the `hipstdpar` specific code selection pass; 3. We add a `hipstdpar` specific pass to the opt pipeline, independent of optimisation level: - When compiling for the host, iff the user requested it via the `--hipstdpar-interpose-alloc` flag, we add a pass which replaces canonical allocation / deallocation functions with accelerator aware equivalents. A test to validate that unannotated functions get correctly emitted is added as well. Reviewed by: yaxunl, efriedma Differential Revision: https://reviews.llvm.org/D155850
1 parent be9bc54 commit dd5d65a

9 files changed

+116
-9
lines changed
 

‎clang/lib/CodeGen/BackendUtil.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
#include "llvm/Transforms/Scalar/EarlyCSE.h"
7979
#include "llvm/Transforms/Scalar/GVN.h"
8080
#include "llvm/Transforms/Scalar/JumpThreading.h"
81+
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
8182
#include "llvm/Transforms/Utils/Debugify.h"
8283
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
8384
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1108,6 +1109,10 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
11081109
return;
11091110
}
11101111

1112+
if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
1113+
LangOpts.HIPStdParInterposeAlloc)
1114+
MPM.addPass(HipStdParAllocationInterpositionPass());
1115+
11111116
// Now that we have all of the passes ready, run them.
11121117
{
11131118
PrettyStackTraceString CrashInfo("Optimizer");

‎clang/lib/CodeGen/CGBuiltin.cpp

+26
Original file line numberDiff line numberDiff line change
@@ -2327,6 +2327,19 @@ static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
23272327
return nullptr;
23282328
}
23292329

2330+
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
2331+
const FunctionDecl *FD) {
2332+
auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2333+
auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2334+
auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2335+
2336+
SmallVector<Value *, 16> Args;
2337+
for (auto &&FormalTy : FnTy->params())
2338+
Args.push_back(llvm::PoisonValue::get(FormalTy));
2339+
2340+
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2341+
}
2342+
23302343
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
23312344
const CallExpr *E,
23322345
ReturnValueSlot ReturnValue) {
@@ -5765,6 +5778,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
57655778
llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
57665779
}
57675780

5781+
if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
5782+
return EmitHipStdParUnsupportedBuiltin(this, FD);
5783+
57685784
ErrorUnsupported(E, "builtin function");
57695785

57705786
// Unknown builtin, for now just dump it out and return undef.
@@ -5775,6 +5791,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
57755791
unsigned BuiltinID, const CallExpr *E,
57765792
ReturnValueSlot ReturnValue,
57775793
llvm::Triple::ArchType Arch) {
5794+
// When compiling in HipStdPar mode we have to be conservative in rejecting
5795+
// target specific features in the FE, and defer the possible error to the
5796+
// AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
5797+
// referenced by an accelerator executable function, we emit an error.
5798+
// Returning nullptr here leads to the builtin being handled in
5799+
// EmitStdParUnsupportedBuiltin.
5800+
if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
5801+
Arch != CGF->getTarget().getTriple().getArch())
5802+
return nullptr;
5803+
57785804
switch (Arch) {
57795805
case llvm::Triple::arm:
57805806
case llvm::Triple::armeb:

‎clang/lib/CodeGen/CGStmt.cpp

+33-4
Original file line numberDiff line numberDiff line change
@@ -2420,6 +2420,24 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
24202420
}
24212421
}
24222422

2423+
static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF,
2424+
const AsmStmt &S) {
2425+
constexpr auto Name = "__ASM__hipstdpar_unsupported";
2426+
2427+
StringRef Asm;
2428+
if (auto GCCAsm = dyn_cast<GCCAsmStmt>(&S))
2429+
Asm = GCCAsm->getAsmString()->getString();
2430+
2431+
auto &Ctx = CGF->CGM.getLLVMContext();
2432+
2433+
auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm);
2434+
auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
2435+
{StrTy->getType()}, false);
2436+
auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2437+
2438+
CGF->Builder.CreateCall(UBF, {StrTy});
2439+
}
2440+
24232441
void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
24242442
// Pop all cleanup blocks at the end of the asm statement.
24252443
CodeGenFunction::RunCleanupsScope Cleanups(*this);
@@ -2431,27 +2449,38 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
24312449
SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
24322450
SmallVector<TargetInfo::ConstraintInfo, 4> InputConstraintInfos;
24332451

2434-
for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
2452+
bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
2453+
bool IsValidTargetAsm = true;
2454+
for (unsigned i = 0, e = S.getNumOutputs(); i != e && IsValidTargetAsm; i++) {
24352455
StringRef Name;
24362456
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
24372457
Name = GAS->getOutputName(i);
24382458
TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
24392459
bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid;
2440-
assert(IsValid && "Failed to parse output constraint");
2460+
if (IsHipStdPar && !IsValid)
2461+
IsValidTargetAsm = false;
2462+
else
2463+
assert(IsValid && "Failed to parse output constraint");
24412464
OutputConstraintInfos.push_back(Info);
24422465
}
24432466

2444-
for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
2467+
for (unsigned i = 0, e = S.getNumInputs(); i != e && IsValidTargetAsm; i++) {
24452468
StringRef Name;
24462469
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
24472470
Name = GAS->getInputName(i);
24482471
TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name);
24492472
bool IsValid =
24502473
getTarget().validateInputConstraint(OutputConstraintInfos, Info);
2451-
assert(IsValid && "Failed to parse input constraint"); (void)IsValid;
2474+
if (IsHipStdPar && !IsValid)
2475+
IsValidTargetAsm = false;
2476+
else
2477+
assert(IsValid && "Failed to parse input constraint");
24522478
InputConstraintInfos.push_back(Info);
24532479
}
24542480

2481+
if (!IsValidTargetAsm)
2482+
return EmitHipStdParUnsupportedAsm(this, S);
2483+
24552484
std::string Constraints;
24562485

24572486
std::vector<LValue> ResultRegDests;

‎clang/lib/CodeGen/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS
1111
Extensions
1212
FrontendHLSL
1313
FrontendOpenMP
14+
HIPStdPar
1415
IPO
1516
IRPrinter
1617
IRReader

‎clang/lib/CodeGen/CodeGenFunction.cpp

+9-3
Original file line numberDiff line numberDiff line change
@@ -2594,10 +2594,15 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
25942594
std::string MissingFeature;
25952595
llvm::StringMap<bool> CallerFeatureMap;
25962596
CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD);
2597+
// When compiling in HipStdPar mode we have to be conservative in rejecting
2598+
// target specific features in the FE, and defer the possible error to the
2599+
// AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
2600+
// referenced by an accelerator executable function, we emit an error.
2601+
bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
25972602
if (BuiltinID) {
25982603
StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
25992604
if (!Builtin::evaluateRequiredTargetFeatures(
2600-
FeatureList, CallerFeatureMap)) {
2605+
FeatureList, CallerFeatureMap) && !IsHipStdPar) {
26012606
CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
26022607
<< TargetDecl->getDeclName()
26032608
<< FeatureList;
@@ -2630,7 +2635,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
26302635
return false;
26312636
}
26322637
return true;
2633-
}))
2638+
}) && !IsHipStdPar)
26342639
CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
26352640
<< FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
26362641
} else if (!FD->isMultiVersion() && FD->hasAttr<TargetAttr>()) {
@@ -2639,7 +2644,8 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
26392644

26402645
for (const auto &F : CalleeFeatureMap) {
26412646
if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) ||
2642-
!CallerFeatureMap.find(F.getKey())->getValue()))
2647+
!CallerFeatureMap.find(F.getKey())->getValue()) &&
2648+
!IsHipStdPar)
26432649
CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
26442650
<< FD->getDeclName() << TargetDecl->getDeclName() << F.getKey();
26452651
}

‎clang/lib/CodeGen/CodeGenModule.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -3526,7 +3526,7 @@ ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject(
35263526
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
35273527
Emitter.finalize(GV);
35283528

3529-
return ConstantAddress(GV, GV->getValueType(), Alignment);
3529+
return ConstantAddress(GV, GV->getValueType(), Alignment);
35303530
}
35313531

35323532
ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
@@ -3585,7 +3585,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
35853585
!Global->hasAttr<CUDAConstantAttr>() &&
35863586
!Global->hasAttr<CUDASharedAttr>() &&
35873587
!Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
3588-
!Global->getType()->isCUDADeviceBuiltinTextureType())
3588+
!Global->getType()->isCUDADeviceBuiltinTextureType() &&
3589+
!(LangOpts.HIPStdPar &&
3590+
isa<FunctionDecl>(Global) &&
3591+
!Global->hasAttr<CUDAHostAttr>()))
35893592
return;
35903593
} else {
35913594
// We need to emit host-side 'shadows' for all global
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// RUN: %clang_cc1 -x hip -emit-llvm -fcuda-is-device \
2+
// RUN: -o - %s | FileCheck --check-prefix=NO-HIPSTDPAR-DEV %s
3+
4+
// RUN: %clang_cc1 --hipstdpar -emit-llvm -fcuda-is-device \
5+
// RUN: -o - %s | FileCheck --check-prefix=HIPSTDPAR-DEV %s
6+
7+
#define __device__ __attribute__((device))
8+
9+
// NO-HIPSTDPAR-DEV-NOT: define {{.*}} void @foo({{.*}})
10+
// HIPSTDPAR-DEV: define {{.*}} void @foo({{.*}})
11+
extern "C" void foo(float *a, float b) {
12+
*a = b;
13+
}
14+
15+
// NO-HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}})
16+
// HIPSTDPAR-DEV: define {{.*}} void @bar({{.*}})
17+
extern "C" __device__ void bar(float *a, float b) {
18+
*a = b;
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \
2+
// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
3+
4+
#define __global__ __attribute__((global))
5+
6+
__global__ void foo(int i) {
7+
asm ("addl %2, %1; seto %b0" : "=q" (i), "+g" (i) : "r" (i));
8+
}
9+
10+
// CHECK: declare void @__ASM__hipstdpar_unsupported([{{.*}}])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu \
2+
// RUN: --hipstdpar -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
3+
4+
#define __global__ __attribute__((global))
5+
6+
__global__ void foo() { return __builtin_ia32_pause(); }
7+
8+
// CHECK: declare void @__builtin_ia32_pause__hipstdpar_unsupported()

0 commit comments

Comments
 (0)
Please sign in to comment.