Skip to content

Commit cb4f4a8

Browse files
authoredDec 11, 2024
[compiler-rt][AArch64] Rewrite SME routines to all use __aarch64_cpu_features. (#119414)
When #92921 added the `__arm_get_current_vg` functionality, it used the FMV feature bits mechanism rather than the mechanism that was previously added for SME which called `getauxval` on Linux platforms or `__aarch64_sme_accessible` required for baremetal libraries. It is better to always use `__aarch64_cpu_features`. For baremetal we still need to rely on `__arm_sme_accessible` to initialise the struct.
1 parent 40986fe commit cb4f4a8

File tree

6 files changed

+59
-69
lines changed

6 files changed

+59
-69
lines changed
 

‎compiler-rt/lib/builtins/CMakeLists.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -573,8 +573,8 @@ set(aarch64_SOURCES
573573
)
574574

575575
if (COMPILER_RT_HAS_AARCH64_SME)
576-
if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
577-
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-init.c aarch64/sme-libc-routines.c)
576+
if (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG)
577+
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-assert.c aarch64/sme-libc-routines.c)
578578
message(STATUS "AArch64 SME ABI routines enabled")
579579
set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin")
580580
else()
@@ -842,6 +842,8 @@ else ()
842842

843843
if(COMPILER_RT_DISABLE_AARCH64_FMV)
844844
list(APPEND BUILTIN_DEFS DISABLE_AARCH64_FMV)
845+
elseif(COMPILER_RT_BAREMETAL_BUILD)
846+
list(APPEND BUILTIN_DEFS ENABLE_BAREMETAL_AARCH64_FMV)
845847
endif()
846848

847849
append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2+
// See https://llvm.org/LICENSE.txt for license information.
3+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
5+
// We rely on the FMV __aarch64_cpu_features mechanism to determine
6+
// which features are set at runtime.
7+
8+
#include "../cpu_model/AArch64CPUFeatures.inc"
9+
_Static_assert(FEAT_SVE == 30, "sme-abi.S assumes FEAT_SVE = 30");
10+
_Static_assert(FEAT_SME == 42, "sme-abi.S assumes FEAT_SME = 42");

‎compiler-rt/lib/builtins/aarch64/sme-abi-init.c

-50
This file was deleted.

‎compiler-rt/lib/builtins/aarch64/sme-abi.S

+12-17
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,15 @@
99
#include "../assembly.h"
1010

1111
.set FEAT_SVE_BIT, 30
12+
.set FEAT_SME_BIT, 42
1213
.set SVCR_PSTATE_SM_BIT, 0
1314

1415
#if !defined(__APPLE__)
15-
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
16-
#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
1716
#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
1817
#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
1918
#else
2019
// MachO requires @page/@pageoff directives because the global is defined
2120
// in a different file. Otherwise this file may fail to build.
22-
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
23-
#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
2421
#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
2522
#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
2623
#endif
@@ -63,9 +60,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
6360
mov x0, xzr
6461
mov x1, xzr
6562

66-
adrp x16, TPIDR2_SYMBOL
67-
ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
68-
cbz w16, 1f
63+
adrp x16, CPU_FEATS_SYMBOL
64+
ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET]
65+
tbz x16, #FEAT_SME_BIT, 1f
6966
0:
7067
orr x0, x0, #0xC000000000000000
7168
mrs x16, SVCR
@@ -116,9 +113,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
116113
BTI_C
117114
// If the current thread does not have access to TPIDR2_EL0, the subroutine
118115
// does nothing.
119-
adrp x14, TPIDR2_SYMBOL
120-
ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
121-
cbz w14, 1f
116+
adrp x14, CPU_FEATS_SYMBOL
117+
ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
118+
tbz x14, #FEAT_SME_BIT, 1f
122119

123120
// If TPIDR2_EL0 is null, the subroutine does nothing.
124121
mrs x16, TPIDR2_EL0
@@ -157,9 +154,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
157154
BTI_C
158155
// If the current thread does not have access to SME, the subroutine does
159156
// nothing.
160-
adrp x14, TPIDR2_SYMBOL
161-
ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
162-
cbz w14, 0f
157+
adrp x14, CPU_FEATS_SYMBOL
158+
ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET]
159+
tbz x14, #FEAT_SME_BIT, 0f
163160

164161
// Otherwise, the subroutine behaves as if it did the following:
165162
// * Call __arm_tpidr2_save.
@@ -191,11 +188,9 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
191188
BTI_C
192189

193190
adrp x17, CPU_FEATS_SYMBOL
194-
ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
191+
ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET]
195192
tbnz w17, #FEAT_SVE_BIT, 1f
196-
adrp x17, TPIDR2_SYMBOL
197-
ldrb w17, [x17, TPIDR2_SYMBOL_OFFSET]
198-
cbz x17, 2f
193+
tbz x17, #FEAT_SME_BIT, 2f
199194
0:
200195
mrs x17, SVCR
201196
tbz x17, #SVCR_PSTATE_SM_BIT, 2f

‎compiler-rt/lib/builtins/cpu_model/aarch64.c

+2
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ struct {
8080
#include "aarch64/fmv/getauxval.inc"
8181
#elif defined(_WIN32)
8282
#include "aarch64/fmv/windows.inc"
83+
#elif defined(ENABLE_BAREMETAL_AARCH64_FMV)
84+
#include "aarch64/fmv/baremetal.inc"
8385
#else
8486
#include "aarch64/fmv/unimplemented.inc"
8587
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// For baremetal platforms, we don't really initialise '__aarch64_cpu_features',
2+
// with exception of FEAT_SME that we can get from '__aarch64_sme_accessible'.
3+
4+
#if defined(COMPILER_RT_SHARED_LIB)
5+
__attribute__((weak))
6+
#endif
7+
extern _Bool
8+
__aarch64_sme_accessible(void);
9+
10+
static _Bool has_sme(void) {
11+
#if defined(COMPILER_RT_SHARED_LIB)
12+
if (!__aarch64_sme_accessible)
13+
return 0;
14+
#endif
15+
return __aarch64_sme_accessible();
16+
}
17+
18+
void __init_cpu_features_resolver(unsigned long hwcap,
19+
const __ifunc_arg_t *arg) {}
20+
21+
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
22+
// CPU features already initialized.
23+
if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
24+
return;
25+
26+
unsigned long long feat = 0;
27+
if (has_sme())
28+
feat |= 1ULL << FEAT_SME;
29+
30+
__atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
31+
}

0 commit comments

Comments
 (0)