Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Register bug fixes #363

Merged
merged 3 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/include/simeng/RegisterFileSet.hh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ struct Register {
* architectural register, depending on point of usage. */
uint16_t tag;

/** A boolean identifier for whether the creation of this register has been a
* result of a register renaming scheme. */
bool renamed = false;

/** Check for equality of two register identifiers. */
bool operator==(const Register& other) const;

Expand Down
17 changes: 9 additions & 8 deletions src/include/simeng/arch/aarch64/helpers/sve.hh
Original file line number Diff line number Diff line change
Expand Up @@ -1678,9 +1678,10 @@ class sveHelp {

/** Helper function for SVE instructions store instructions to merge
* consecutive active elements into blocks to be written.
* T represents the type of operands (e.g. for zn.d, T = uint64_t).
* T represents the size of the vector elements (e.g. for zn.d, T = uint64_t).
* C represents the size of the memory elements (e.g. for st1w, C = uint32_t).
* Return a vector of RegisterValues. */
template <typename T>
template <typename T, typename C = T>
static std::vector<RegisterValue> sve_merge_store_data(const T* d,
const uint64_t* p,
uint16_t vl_bits) {
Expand All @@ -1690,26 +1691,26 @@ class sveHelp {
// Determine how many predicate elements are present per uint64_t.
uint16_t predsPer64 = (64 / sizeof(T));

// Determine size of array based on the size of the stored element (This is
// the T specifier in sve instructions)
std::array<T, 256 / sizeof(T)> mData;
// Determine size of array based on the size of the memory access (This is
// the C specifier in sve instructions)
std::array<C, 256 / sizeof(C)> mData;
uint16_t mdSize = 0;

for (uint16_t x = 0; x < numVecElems; x++) {
// Determine mask to get predication for active element.
uint64_t shiftedActive = 1ull << ((x % predsPer64) * sizeof(T));
if (p[x / predsPer64] & shiftedActive) {
mData[mdSize] = d[x];
mData[mdSize] = static_cast<C>(d[x]);
mdSize++;
} else if (mdSize) {
outputData.push_back(
RegisterValue((char*)mData.data(), mdSize * sizeof(T)));
RegisterValue((char*)mData.data(), mdSize * sizeof(C)));
mdSize = 0;
}
}
if (mdSize) {
outputData.push_back(
RegisterValue((char*)mData.data(), mdSize * sizeof(T)));
RegisterValue((char*)mData.data(), mdSize * sizeof(C)));
}
return outputData;
}
Expand Down
3 changes: 2 additions & 1 deletion src/lib/arch/aarch64/Instruction_execute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4740,7 +4740,8 @@ void Instruction::execute() {
const uint64_t* d = operands[0].getAsVector<uint64_t>();
const uint64_t* p = operands[1].getAsVector<uint64_t>();

memoryData = sveHelp::sve_merge_store_data<uint64_t>(d, p, VL_bits);
memoryData =
sveHelp::sve_merge_store_data<uint64_t, uint32_t>(d, p, VL_bits);
break;
}
case Opcode::AArch64_ST1W_IMM: { // st1w {zt.s}, pg, [xn{, #imm, mul vl}]
Expand Down
7 changes: 5 additions & 2 deletions src/lib/pipeline/RegisterAliasTable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Register RegisterAliasTable::getMapping(Register architectural) const {
"Invalid register type. Cannot find RAT mapping.");

auto tag = mappingTable_[architectural.type][architectural.tag];
return {architectural.type, tag};
return {architectural.type, tag, true};
}

bool RegisterAliasTable::canAllocate(uint8_t type,
Expand Down Expand Up @@ -84,7 +84,7 @@ Register RegisterAliasTable::allocate(Register architectural) {
mappingTable_[architectural.type][architectural.tag] = tag;
destinationTable_[architectural.type][tag] = architectural.tag;

return {architectural.type, tag};
return {architectural.type, tag, true};
}

void RegisterAliasTable::commit(Register physical) {
Expand All @@ -94,6 +94,9 @@ void RegisterAliasTable::commit(Register physical) {
freeQueues_[physical.type].push(oldTag);
}
void RegisterAliasTable::rewind(Register physical) {
assert(physical.renamed &&
"Attempted to rewind a physical register which hasn't been subject to "
"the register renaming scheme");
// Find which architectural tag this referred to
auto destinationTag = destinationTable_[physical.type][physical.tag];
// Rewind the mapping table to the old physical tag
Expand Down
3 changes: 2 additions & 1 deletion src/lib/pipeline/ReorderBuffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,8 @@ void ReorderBuffer::flush(uint64_t afterSeqId) {
auto destinations = uop->getDestinationRegisters();
for (int i = destinations.size() - 1; i >= 0; i--) {
const auto& reg = destinations[i];
rat_.rewind(reg);
// Only rewind the register if it was renamed
if (reg.renamed) rat_.rewind(reg);
}
uop->setFlushed();
// If the instruction is a branch, supply address to branch flushing logic
Expand Down
4 changes: 4 additions & 0 deletions test/regression/RegressionTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ void RegressionTest::run(const char* source, const char* triple,
const char* extensions) {
testing::internal::CaptureStdout();

// Zero-out process memory from any prior runs
if (processMemory_ != nullptr)
std::memset(processMemory_, '\0', processMemorySize_);

// Assemble the source to a flat binary
assemble(source, triple, extensions);
if (HasFatalFailure()) return;
Expand Down
119 changes: 53 additions & 66 deletions test/regression/aarch64/instructions/sve.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6540,72 +6540,59 @@ TEST_P(InstSve, st1w) {
EXPECT_EQ(getMemoryValue<uint32_t>((VL / 64) + 16 + (i * 4)), src[i % 4]);
}

// 64-bit
// initialHeapData_.resize(64);
// uint64_t* heap64 = reinterpret_cast<uint64_t*>(initialHeapData_.data());
// heap64[0] = 0xDEADBEEFDEADBEEF;
// heap64[1] = 0x1234567812345678;
// heap64[2] = 0x9876543298765432;
// heap64[3] = 0xABCDEF01ABCDEF01;
// heap64[4] = 0xDEADBEEFDEADBEEF;
// heap64[5] = 0x1234567812345678;
// heap64[6] = 0x9876543298765432;
// heap64[7] = 0xABCDEF01ABCDEF01;

// RUN_AARCH64(R"(
// # Get heap address
// mov x0, 0
// mov x8, 214
// svc #0

// mov x1, #0
// mov x4, #64
// mov x5, #3
// ptrue p0.d
// ld1w {z0.d}, p0/z, [x0, x1, lsl #3]
// ld1w {z2.d}, p0/z, [x0, x1, lsl #3]
// st1w {z0.d}, p0, [sp, x1, lsl #2]
// st1w {z2.d}, p0, [x4, x5, lsl #2]
// )");
// CHECK_NEON(0, uint64_t,
// {0xDEADBEEFDEADBEEFu, 0x1234567812345678u,
// 0x9876543298765432u,
// 0xABCDEF01ABCDEF01u, 0xDEADBEEFDEADBEEFu,
// 0x1234567812345678u, 0x9876543298765432u,
// 0xABCDEF01ABCDEF01u});
// CHECK_NEON(2, uint64_t,
// {0xDEADBEEFDEADBEEFu, 0x1234567812345678u,
// 0x9876543298765432u,
// 0xABCDEF01ABCDEF01u, 0xDEADBEEFDEADBEEFu,
// 0x1234567812345678u, 0x9876543298765432u,
// 0xABCDEF01ABCDEF01u});

// EXPECT_EQ(getMemoryValue<uint32_t>(process_->getStackPointer()),
// 0xDEADBEEF);
// EXPECT_EQ(getMemoryValue<uint32_t>(process_->getStackPointer()
// + 4),
// 0x12345678);
// EXPECT_EQ(getMemoryValue<uint32_t>(process_->getStackPointer() + 8),
// 0x98765432);
// EXPECT_EQ(getMemoryValue<uint32_t>(process_->getStackPointer() + 12),
// 0xABCDEF01);
// EXPECT_EQ(getMemoryValue<uint32_t>(process_->getStackPointer() + 16),
// 0xDEADBEEF);
// EXPECT_EQ(getMemoryValue<uint32_t>(process_->getStackPointer() + 20),
// 0x12345678);
// EXPECT_EQ(getMemoryValue<uint32_t>(process_->getStackPointer() + 24),
// 0x98765432);
// EXPECT_EQ(getMemoryValue<uint32_t>(process_->getStackPointer() + 28),
// 0xABCDEF01);

// EXPECT_EQ(getMemoryValue<uint32_t>(64 + (3 * 4)), 0xDEADBEEF);
// EXPECT_EQ(getMemoryValue<uint32_t>(64 + (3 * 4) + 4), 0x12345678);
// EXPECT_EQ(getMemoryValue<uint32_t>(64 + (3 * 4) + 8), 0x98765432);
// EXPECT_EQ(getMemoryValue<uint32_t>(64 + (3 * 4) + 12), 0xABCDEF01);
// EXPECT_EQ(getMemoryValue<uint32_t>(64 + (3 * 4) + 16), 0xDEADBEEF);
// EXPECT_EQ(getMemoryValue<uint32_t>(64 + (3 * 4) + 20), 0x12345678);
// EXPECT_EQ(getMemoryValue<uint32_t>(64 + (3 * 4) + 24), 0x98765432);
// EXPECT_EQ(getMemoryValue<uint32_t>(64 + (3 * 4) + 28), 0xABCDEF01);
// 64 - bit
initialHeapData_.resize(VL / 8);
uint64_t* heap64 = reinterpret_cast<uint64_t*>(initialHeapData_.data());
std::vector<uint64_t> srcA = {0xDEADBEEF, 0x12345678, 0x98765432, 0xABCDEF01};
std::vector<uint64_t> srcB = {0xDEADBEEFDEADBEEF, 0x1234567812345678,
0x9876543298765432, 0xABCDEF01ABCDEF01};
fillHeapCombined(heap64, srcA, srcB, VL / 64);

RUN_AARCH64(R"(
# Get heap address
mov x0, 0
mov x8, 214
svc #0

sub sp, sp, #4095

ptrue p0.d
mov x2, #0
mov x5, #16
addvl x2, x2, #1
udiv x2, x2, x5
mov x3, #2
whilelo p1.d, xzr, x2

mov x1, #0
mov x6, #64
mov x7, #3

ld1d {z0.d}, p1/z, [x0, x1, lsl #3]
ld1d {z2.d}, p0/z, [x0, x1, lsl #3]
st1w {z0.d}, p1, [sp, x1, lsl #2]
st1w {z2.d}, p0, [x6, x7, lsl #2]
)");

CHECK_NEON(0, uint64_t, fillNeonCombined<uint64_t>(srcA, {0ull}, VL / 8));
CHECK_NEON(2, uint64_t, fillNeonCombined<uint64_t>(srcA, srcB, VL / 8));

std::array<uint32_t, (256 / sizeof(uint32_t))> srcC =
fillNeonCombined<uint32_t>(
{0xDEADBEEF, 0x12345678, 0x98765432, 0xABCDEF01}, {0ul}, VL / 16);
for (int i = 0; i < (VL / 64); i++) {
EXPECT_EQ(
getMemoryValue<uint32_t>(process_->getStackPointer() - 4095 + (i * 4)),
srcC[i]);
}

std::array<uint32_t, (256 / sizeof(uint32_t))> srcD =
fillNeonCombined<uint32_t>(
{0xDEADBEEF, 0x12345678, 0x98765432, 0xABCDEF01},
{0xDEADBEEF, 0x12345678, 0x98765432, 0xABCDEF01}, VL / 16);
for (int i = 0; i < (VL / 64); i++) {
EXPECT_EQ(getMemoryValue<uint32_t>(64 + (3 + i) * 4), srcD[i]);
}
}

TEST_P(InstSve, str_predicate) {
Expand Down